1、文件配置

    在core下面新建lib文件夹,存放相关的jar包,如图所示:

    

    

    修改solrconfig.xml

   

<lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> 
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> 
 
  <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" /> 
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" /> 
 
  <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> 
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> 
 
  <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> 
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> 
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar" /> 
  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar" /> 
  <lib dir="./lib" regex=".*\.jar"/>

    增加配置,如果有则不用添加:

    

 <requestHandler name="/update/extract" 
                  startup="lazy" 
                  class="solr.extraction.ExtractingRequestHandler" > 
    <lst name="defaults"> 
      <str name="fmap.content">text</str> 
      <str name="fmap.meta">ignored_</str> 
      <str name="lowernames">true</str> 
      <str name="uprefix">attr_</str> 
      <str name="captureAttr">true</str> 
    </lst> 
  </requestHandler>

   配置managed-schema文件:

   

  

  修改managed-schema文件,增加字段:

  <field name="path"      type="string"   indexed="true"  stored="true"  multiValued="false" /> 
  <field name="pathftype"      type="string"   indexed="true"  stored="true"  multiValued="false" /> 
  <field name="pathuploaddate"      type="string"   indexed="true"  stored="true"  multiValued="false" /> 
  <field name="pathsummary"      type="string"   indexed="true"  stored="true"  multiValued="false" /> 
  <field name="attr_content"      type="text_general"   indexed="true"  stored="true"  multiValued="false" />

  2、Java代码solrj操作(6.6.0版本) 

import java.io.File; 
import java.io.IOException; 
import java.text.SimpleDateFormat; 
import java.util.Date; 
 
import org.apache.solr.client.solrj.SolrClient; 
import org.apache.solr.client.solrj.SolrQuery; 
import org.apache.solr.client.solrj.SolrServerException; 
import org.apache.solr.client.solrj.impl.HttpSolrClient; 
import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION; 
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; 
import org.apache.solr.client.solrj.response.QueryResponse; 
 
/** 
 * @Author:sks 
 * @Description:索引pdf等富文本文件 
 * @Date:Created in 15:16 2017/12/13 
 * @Modified by: 
 **/ 
public class solr_pdf { 
    public static void main(String[] args) 
    { 
 
        String fileName = "D:/work/Solr/ImportData/20160229001cn.pdf"; 
        String solrId = "20160229001cn.pdf"; 
 
        try 
        { 
            indexFilesSolrCell(solrId, solrId,fileName); 
        } 
        catch (IOException e) 
        { 
            e.printStackTrace(); 
        } 
        catch (SolrServerException e) 
        { 
            e.printStackTrace(); 
        } 
      
    } 
 
    /** 
     * @Author:sks 
     * @Description:获取系统当天日期yyyy-mm-dd 
     * @Date: 
     */ 
    private static String GetCurrentDate(){ 
        Date dt = new Date(); 
        //最后的aa表示“上午”或“下午”    HH表示24小时制    如果换成hh表示12小时制 
//        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss aa"); 
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); 
        String day =sdf.format(dt); 
        return day; 
    } 
 
    public static void indexFilesSolrCell(String fileName, String solrId, String path) 
            throws IOException, SolrServerException 
    { 
        String urlString = "http://localhost:8983/solr/test"; 
        SolrClient solr = new HttpSolrClient.Builder(urlString).build(); 
 
        ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract"); 
        String contentType = getFileContentType(fileName); 
        up.addFile(new File(path), contentType); 
        String fileType = fileName.substring(fileName.lastIndexOf(".")+1); 
        up.setParam("literal.id", fileName); 
 
        up.setParam("literal.path", path);//文件路径 
        up.setParam("literal.pathuploaddate", GetCurrentDate());//文件上传时间 
        up.setParam("literal.pathftype", fileType);//文件类型,doc,pdf 
        up.setParam("fmap.content", "attr_content");//文件内容 
        up.setAction(ACTION.COMMIT, true, true); 
        solr.request(up); 
    } 
 
    /** 
    * @Author:sks 
    * @Description:根据文件名获取文件的ContentType类型 
    * @Date:  
    */ 
    public static String getFileContentType(String filename) { 
        String contentType = ""; 
        String prefix = filename.substring(filename.lastIndexOf(".") + 1); 
        if (prefix.equals("xlsx")) { 
            contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; 
        } else if (prefix.equals("pdf")) { 
            contentType = "application/pdf"; 
        } else if (prefix.equals("doc")) { 
            contentType = "application/msword"; 
        } else if (prefix.equals("txt")) { 
            contentType = "text/plain"; 
        } else if (prefix.equals("xls")) { 
            contentType = "application/vnd.ms-excel"; 
        } else if (prefix.equals("docx")) { 
            contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; 
        } else if (prefix.equals("ppt")) { 
            contentType = "application/vnd.ms-powerpoint"; 
        } else if (prefix.equals("pptx")) { 
            contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"; 
        } 
 
        else { 
            contentType = "othertype"; 
        } 
 
        return contentType; 
    } 
}

发布评论
IT序号网

微信公众号号:IT虾米 (左侧二维码扫一扫)欢迎添加!

Java solr 索引数据增删改查知识解答
你是第一个吃螃蟹的人
发表评论

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。