Documentation Center

Configuring Solr Cores

Perform the configuration procedure for each of your Solr Cores.

Procedure

  1. In <core>/conf/schema.xml verify if the following fields and field types are added:
    Fields:
    <!-- SI4T Mandatory fields -->
    <field name="id"       type="string"    indexed="true"  stored="true"  multiValued="false" required="true"/>
    <field name="url"     type="string"    indexed="true"  stored="true"  multiValued="false" />
    <field name="pubdate"     type="tdate"    indexed="true"  stored="true"  multiValued="false" />
    <!-- Fields configured in the standard SI4T TBB -->
    <field name="title"     type="string"    indexed="true"  stored="true"  multiValued="false" />
    <field name="publicationid"     type="tint"    indexed="true"  stored="true"  multiValued="false" />
    <field name="schemaid"     type="tint"    indexed="true"  stored="true"  multiValued="false" />
    <field name="itemtype"     type="tint"    indexed="true"  stored="true"  multiValued="false" />
    <field name="parentsgid"     type="tint"    indexed="true"  stored="true"  multiValued="false" />
    <field name="sgid"     type="tint"    indexed="true"  stored="true"  multiValued="true" />
    <field name="type"     type="tint"    indexed="true"  stored="true"  multiValued="false" />
    <field name="body" type="text_general"    indexed="true"  stored="true"  multiValued="true" />
    <field name="summary" type="text_general"    indexed="true"  stored="true"  multiValued="true" />
    <field name="_version_" type="long" indexed="true" stored="true"/>
    <!-- Binary extraction storage fields -->
    <field name="fileType" type="string"    indexed="true"  stored="true"  multiValued="false" />
    <field name="fileSize" type="string"    indexed="true"  stored="true"  multiValued="false" />
    <!-- ignore any other field, including anything other than binary_content -->
    <dynamicField name="*" type="ignored" />
    <dynamicField name="binary_*" type="ignored" />
    Field types:
    <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
  2. In <core>/conf/solrconfig.xml, update the /update/extract requestHandler element to map binary content and title into the body, and title fields in the index:
    <requestHandler name="/update/extract"
                    startup="lazy"
                    class="solr.extraction.ExtractingRequestHandler" >
      <lst name="defaults">
          <str name="lowernames">true</str>
          <str name="uprefix">binary_</str>
          <str name="fmap.content">body</str>
          <str name="fmap.title">title</str>
      </lst>
    </requestHandler>
  3. Add library references to the dist folders provided with the Solr installation files to enabes the core to perform binary content extraction:
    <config>
        <lib dir="../contrib/extraction/lib" regex=".*\.jar" />
        <lib dir="../dist/" regex=".*\.jar" />
        <lib dir="../contrib/velocity/lib" regex=".*\.jar" />
  4. Restart your Solr web application.

What to do next

You can start configuring your deployer for Solr.