diff options
Diffstat (limited to 'isuifang_solr/example-DIH/solr/mail/conf')
6 files changed, 1286 insertions, 0 deletions
| diff --git a/isuifang_solr/example-DIH/solr/mail/conf/data-config.xml b/isuifang_solr/example-DIH/solr/mail/conf/data-config.xml new file mode 100644 index 0000000..eed4c2b --- /dev/null +++ b/isuifang_solr/example-DIH/solr/mail/conf/data-config.xml @@ -0,0 +1,11 @@ +<dataConfig> +  <document> +      <!-- +        Note - In order to index attachments, set processAttachement="true" and drop +        Tika and its dependencies to example-DIH/solr/mail/lib directory +       --> +      <entity processor="MailEntityProcessor" user="email@gmail.com" +            password="password" host="imap.gmail.com" protocol="imaps" +            fetchMailsSince="2009-09-20 00:00:00" batchSize="20" folders="inbox" processAttachement="false"/> +  </document> +</dataConfig> diff --git a/isuifang_solr/example-DIH/solr/mail/conf/protwords.txt b/isuifang_solr/example-DIH/solr/mail/conf/protwords.txt new file mode 100644 index 0000000..1dfc0ab --- /dev/null +++ b/isuifang_solr/example-DIH/solr/mail/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License.  You may obtain a copy of the License at +# +#     http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/isuifang_solr/example-DIH/solr/mail/conf/schema.xml b/isuifang_solr/example-DIH/solr/mail/conf/schema.xml new file mode 100644 index 0000000..406ce3b --- /dev/null +++ b/isuifang_solr/example-DIH/solr/mail/conf/schema.xml @@ -0,0 +1,373 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements.  See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License.  You may obtain a copy of the License at + +     http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This is the Solr schema file. This file should be named "schema.xml" and + should be in the conf directory under the solr home + (i.e. ./solr/conf/schema.xml by default) + or located where the classloader for the Solr webapp can find it. + + This example schema is the recommended starting point for users. + It should be kept correct and concise, usable out-of-the-box. + + For more information, on how to customize this file, please see + http://wiki.apache.org/solr/SchemaXml + + NOTE: this schema includes many optional features and should not + be used for benchmarking. +--> + +<schema name="example" version="1.2"> +  <!-- attribute "name" is the name of this schema and is only used for display purposes. +       Applications should change this to reflect the nature of the search collection. +       version="1.2" is Solr's version number for the schema syntax and semantics.  It should +       not normally be changed by applications. +       1.0: multiValued attribute did not exist, all fields are multiValued by nature +       1.1: multiValued attribute introduced, false by default +       1.2: omitTf attribute introduced, true by default --> + +  <types> +    <!-- field type definitions. The "name" attribute is +       just a label to be used by field definitions.  The "class" +       attribute and any other attributes determine the real +       behavior of the fieldType. +         Class names starting with "solr" refer to java classes in the +       org.apache.solr.analysis package. +    --> + +    <!-- The StrField type is not analyzed, but indexed/stored verbatim. +       - StrField and TextField support an optional compressThreshold which +       limits compression (if enabled in the derived fields) to values which +       exceed a certain size (in characters). +    --> +    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> + +    <!-- boolean type: "true" or "false" --> +    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> + +    <!-- The optional sortMissingLast and sortMissingFirst attributes are +         currently supported on types that are sorted internally as strings. +       - If sortMissingLast="true", then a sort on this field will cause documents +         without the field to come after documents with the field, +         regardless of the requested sort order (asc or desc). +       - If sortMissingFirst="true", then a sort on this field will cause documents +         without the field to come before documents with the field, +         regardless of the requested sort order. +       - If sortMissingLast="false" and sortMissingFirst="false" (the default), +         then default lucene sorting will be used which places docs without the +         field first in an ascending sort and last in a descending sort. +    --> + + +    <!-- numeric field types that store and index the text +         value verbatim (and hence don't support range queries, since the +         lexicographic ordering isn't equal to the numeric ordering) --> +    <fieldType name="integer" class="solr.IntField" omitNorms="true"/> +    <fieldType name="long" class="solr.LongField" omitNorms="true"/> +    <fieldType name="float" class="solr.FloatField" omitNorms="true"/> +    <fieldType name="double" class="solr.DoubleField" omitNorms="true"/> + + +    <!-- Numeric field types that manipulate the value into +         a string value that isn't human-readable in its internal form, +         but with a lexicographic ordering the same as the numeric ordering, +         so that range queries work correctly. --> +    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> +    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/> +    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/> +    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> + + +    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and +         is a more restricted form of the canonical representation of dateTime +         http://www.w3.org/TR/xmlschema-2/#dateTime +         The trailing "Z" designates UTC time and is mandatory. +         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z +         All other components are mandatory. + +         Expressions can also be used to denote calculations that should be +         performed relative to "NOW" to determine the value, ie... + +               NOW/HOUR +                  ... Round to the start of the current hour +               NOW-1DAY +                  ... Exactly 1 day prior to now +               NOW/DAY+6MONTHS+3DAYS +                  ... 6 months and 3 days in the future from the start of +                      the current day + +         Consult the DateField javadocs for more information. +      --> +    <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> + +    <!-- +          Numeric field types that manipulate the value into trie encoded strings which are not +          human readable in the internal form. Range searches on such fields use the fast Trie Range Queries +          which are much faster than range searches on the SortableNumberField types. + +          For the fast range search to work, trie fields must be indexed. Trie fields are <b>not</b> sortable +          in numerical order. Also, they cannot be used in function queries. If one needs sorting as well as +          fast range search, one should create a copy field specifically for sorting. Same workaround is +          suggested for using trie fields in function queries as well. + +          For each number being added to this field, multiple terms are generated as per the algorithm described in +          org.apache.lucene.search.trie package description. The possible number of terms depend on the precisionStep +          attribute and increase dramatically with higher precision steps (factor 2**precisionStep). The default +          value of precisionStep is 8. + +          Note that if you use a precisionStep of 32 for int/float and 64 for long/double, then multiple terms +          will not be generated, range search will be no faster than any other number field, +          but sorting will be possible. +     --> +    <fieldType name="tint" class="solr.TrieField" type="integer" omitNorms="true" positionIncrementGap="0" indexed="true" stored="false" /> +    <fieldType name="tfloat" class="solr.TrieField" type="float" omitNorms="true" positionIncrementGap="0" indexed="true" stored="false" /> +    <fieldType name="tlong" class="solr.TrieField" type="long" omitNorms="true" positionIncrementGap="0" indexed="true" stored="false" /> +    <fieldType name="tdouble" class="solr.TrieField" type="double" omitNorms="true" positionIncrementGap="0" indexed="true" stored="false" /> + +    <fieldType name="tdouble4" class="solr.TrieField" type="double" precisionStep="4" omitNorms="true" positionIncrementGap="0" indexed="true" stored="false" /> + +    <!-- +          This date field manipulates the value into a trie encoded strings for fast range searches. They follow the +          same format and semantics as the normal DateField and support the date math syntax except that they are +          not sortable and cannot be used in function queries. +    --> +    <fieldType name="tdate" class="solr.TrieField" type="date" omitNorms="true" positionIncrementGap="0" indexed="true" stored="false" /> + + +    <!-- The "RandomSortField" is not used to store or search any +         data.  You can declare fields of this type it in your schema +         to generate psuedo-random orderings of your docs for sorting +         purposes.  The ordering is generated based on the field name +         and the version of the index, As long as the index version +         remains unchanged, and the same field name is reused, +         the ordering of the docs will be consistent. +         If you want differend psuedo-random orderings of documents, +         for the same version of the index, use a dynamicField and +         change the name +     --> +    <fieldType name="random" class="solr.RandomSortField" indexed="true" /> + +    <!-- solr.TextField allows the specification of custom text analyzers +         specified as a tokenizer and a list of token filters. Different +         analyzers may be specified for indexing and querying. + +         The optional positionIncrementGap puts space between multiple fields of +         this type on the same document, with the purpose of preventing false phrase +         matching across fields. + +         For more info on customizing your analyzer chain, please see +         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters +     --> + +    <!-- One can also specify an existing Analyzer class that has a +         default constructor via the class attribute on the analyzer element +    <fieldType name="text_greek" class="solr.TextField"> +      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> +    </fieldType> +    --> + +    <!-- A text field that only splits on whitespace for exact matching of words --> +    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> +      <analyzer> +        <tokenizer class="solr.WhitespaceTokenizerFactory"/> +      </analyzer> +    </fieldType> + +    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of +        words on case-change, alpha numeric boundaries, and non-alphanumeric chars, +        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi". +        Synonyms and stopwords are customized by external files, and stemming is enabled. +        Duplicate tokens at the same position (which may result from Stemmed Synonyms or +        WordDelim parts) are removed. +        --> +    <fieldType name="text" class="solr.TextField" positionIncrementGap="100"> +      <analyzer type="index"> +        <tokenizer class="solr.WhitespaceTokenizerFactory"/> +        <!-- in this example, we will only use synonyms at query time +        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> +        --> +        <!-- Case insensitive stop word removal. +          add enablePositionIncrements=true in both the index and query +          analyzers to leave a 'gap' for more accurate phrase queries. +        --> +        <filter class="solr.StopFilterFactory" +                ignoreCase="true" +                words="stopwords.txt" +                enablePositionIncrements="true" +                /> +        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> +        <filter class="solr.LowerCaseFilterFactory"/> +        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> +        <filter class="solr.PorterStemFilterFactory"/> +        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> +      </analyzer> +      <analyzer type="query"> +        <tokenizer class="solr.WhitespaceTokenizerFactory"/> +        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> +        <filter class="solr.StopFilterFactory" +                ignoreCase="true" +                words="stopwords.txt" +                enablePositionIncrements="true" +                /> +        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> +        <filter class="solr.LowerCaseFilterFactory"/> +        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> +        <filter class="solr.PorterStemFilterFactory"/> +        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> +      </analyzer> +    </fieldType> + + +    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names, +         but may be good for SKUs.  Can insert dashes in the wrong place and still match. --> +    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" > +      <analyzer> +        <tokenizer class="solr.WhitespaceTokenizerFactory"/> +        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> +        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> +        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> +        <filter class="solr.LowerCaseFilterFactory"/> +        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> +        <filter class="solr.EnglishMinimalStemFilterFactory"/> +        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> +      </analyzer> +    </fieldType> + +    <!-- +     Setup simple analysis for spell checking +     --> +    <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" > +      <analyzer> +        <tokenizer class="solr.StandardTokenizerFactory"/> +        <filter class="solr.LowerCaseFilterFactory"/> +        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> +      </analyzer> +    </fieldType> + +    <!-- charFilter + "CharStream aware" WhitespaceTokenizer  --> +    <!-- +    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" > +      <analyzer> +        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> +        <tokenizer class="solr.CharStreamAwareWhitespaceTokenizerFactory"/> +      </analyzer> +    </fieldType> +    --> + +    <!-- This is an example of using the KeywordTokenizer along +         With various TokenFilterFactories to produce a sortable field +         that does not include some properties of the source text +      --> +    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> +      <analyzer> +        <!-- KeywordTokenizer does no actual tokenizing, so the entire +             input string is preserved as a single token +          --> +        <tokenizer class="solr.KeywordTokenizerFactory"/> +        <!-- The LowerCase TokenFilter does what you expect, which can be +             when you want your sorting to be case insensitive +          --> +        <filter class="solr.LowerCaseFilterFactory" /> +        <!-- The TrimFilter removes any leading or trailing whitespace --> +        <filter class="solr.TrimFilterFactory" /> +        <!-- The PatternReplaceFilter gives you the flexibility to use +             Java Regular expression to replace any sequence of characters +             matching a pattern with an arbitrary replacement string, +             which may include back refrences to portions of the orriginal +             string matched by the pattern. + +             See the Java Regular Expression documentation for more +             infomation on pattern and replacement string syntax. + +             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html +          --> +        <filter class="solr.PatternReplaceFilterFactory" +                pattern="([^a-z])" replacement="" replace="all" +        /> +      </analyzer> +    </fieldType> + +    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > +      <analyzer> +        <tokenizer class="solr.StandardTokenizerFactory"/> +        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> +      </analyzer> +    </fieldtype> + + +    <!-- since fields of this type are by default not stored or indexed, any data added to +         them will be ignored outright +     --> +    <fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" /> + + </types> + + + <fields> +   <!-- Valid attributes for fields: +     name: mandatory - the name for the field +     type: mandatory - the name of a previously defined type from the <types> section +     indexed: true if this field should be indexed (searchable or sortable) +     stored: true if this field should be retrievable +     compressed: [false] if this field should be stored using gzip compression +       (this will only apply if the field type is compressable; among +       the standard field types, only TextField and StrField are) +     multiValued: true if this field may contain multiple values per document +     omitNorms: (expert) set to true to omit the norms associated with +       this field (this disables length normalization and index-time +       boosting for the field, and saves some memory).  Only full-text +       fields or fields that need an index-time boost need norms. +     termVectors: [false] set to true to store the term vector for a given field. +       When using MoreLikeThis, fields used for similarity should be stored for +       best performance. +     termPositions: Store position information with the term vector.  This will increase storage costs. +     termOffsets: Store offset information with the term vector. This will increase storage costs. +   --> + +   <field name="messageId" type="string" indexed="true" stored="true" required="true" omitNorms="true" /> +   <field name="subject" type="string" indexed="true" stored="true" omitNorms="true" /> +   <field name="from" type="string" indexed="true" stored="true" omitNorms="true"/> +   <field name="sentDate" type="date" indexed="true" stored="true"/> +   <field name="xMailer" type="string" indexed="true" stored="true" omitNorms="true"/> + +   <field name="allTo" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> +   <field name="flags" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> +   <field name="content" type="text" indexed="true" stored="true" omitNorms="true" multiValued="true"/> +   <field name="attachment" type="text" indexed="true" stored="true" omitNorms="true" multiValued="true"/> +   <field name="attachmentNames" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> + +   <field name="catchAllField" type="text" indexed="true" stored="true" omitNorms="true" multiValued="true"/> + + </fields> + +  <copyField source="content" dest="catchAllField"/> +  <copyField source="attachmentNames" dest="catchAllField"/> +  <copyField source="attachment" dest="catchAllField"/> +  <copyField source="subject" dest="catchAllField"/> +  <copyField source="allTo" dest="catchAllField"/> + +  <!-- The unique key, Note that some mail servers may not send the message-id or they may send duplicate ones --> +  <uniqueKey>messageId</uniqueKey> + + <!-- field for the QueryParser to use when an explicit fieldname is absent --> + <defaultSearchField>catchAllField</defaultSearchField> + + <!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> + <solrQueryParser defaultOperator="OR"/> + +</schema> diff --git a/isuifang_solr/example-DIH/solr/mail/conf/solrconfig.xml b/isuifang_solr/example-DIH/solr/mail/conf/solrconfig.xml new file mode 100644 index 0000000..c0f35b5 --- /dev/null +++ b/isuifang_solr/example-DIH/solr/mail/conf/solrconfig.xml @@ -0,0 +1,792 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements.  See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License.  You may obtain a copy of the License at + +     http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + +  <luceneMatchVersion>LUCENE_36</luceneMatchVersion> + +  <!-- Set this to 'false' if you want solr to continue working after it has +       encountered an severe configuration error.  In a production environment, +       you may want solr to keep working even if one handler is mis-configured. + +       You may also set this to false using by setting the system property: +         -Dsolr.abortOnConfigurationError=false +     --> +  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> + +  <lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*jar$" /> +  <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" /> +   +  <indexDefaults> +   <!-- Values here affect all index writers and act as a default unless overridden. --> +    <useCompoundFile>false</useCompoundFile> + +    <mergeFactor>10</mergeFactor> +    <!-- +     If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. + +     --> +    <!--<maxBufferedDocs>1000</maxBufferedDocs>--> +    <!-- Tell Lucene when to flush documents to disk. +    Giving Lucene more memory for indexing means faster indexing at the cost of more RAM + +    If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. + +    --> +    <ramBufferSizeMB>32</ramBufferSizeMB> +    <maxMergeDocs>2147483647</maxMergeDocs> +    <maxFieldLength>10000</maxFieldLength> +    <writeLockTimeout>1000</writeLockTimeout> + +    <!-- +     Expert: +     The Merge Policy in Lucene controls how merging is handled by Lucene.  The default in 2.3 is the LogByteSizeMergePolicy, previous +     versions used LogDocMergePolicy. + +     LogByteSizeMergePolicy chooses segments to merge based on their size.  The Lucene 2.2 default, LogDocMergePolicy chose when +     to merge based on number of documents + +     Other implementations of MergePolicy must have a no-argument constructor +     --> +    <!--<mergePolicy class="org.apache.lucene.index.LogByteSizeMergePolicy"/>--> + +    <!-- +     Expert: +     The Merge Scheduler in Lucene controls how merges are performed.  The ConcurrentMergeScheduler (Lucene 2.3 default) +      can perform merges in the background using separate threads.  The SerialMergeScheduler (Lucene 2.2 default) does not. +     --> +    <!--<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>--> + +    <!-- +      This option specifies which Lucene LockFactory implementation to use. + +      single = SingleInstanceLockFactory - suggested for a read-only index +               or when there is no possibility of another process trying +               to modify the index. +      native = NativeFSLockFactory +      simple = SimpleFSLockFactory + +      (For backwards compatibility with Solr 1.2, 'simple' is the default +       if not specified.) +    --> +    <lockType>single</lockType> +  </indexDefaults> + +  <mainIndex> +    <!-- options specific to the main on-disk lucene index --> +    <useCompoundFile>false</useCompoundFile> +    <ramBufferSizeMB>32</ramBufferSizeMB> +    <mergeFactor>10</mergeFactor> +    <!-- Deprecated --> +    <!--<maxBufferedDocs>1000</maxBufferedDocs>--> +    <maxMergeDocs>2147483647</maxMergeDocs> +    <maxFieldLength>10000</maxFieldLength> + +    <!-- If true, unlock any held write or commit locks on startup. +         This defeats the locking mechanism that allows multiple +         processes to safely access a lucene index, and should be +         used with care. +         This is not needed if lock type is 'none' or 'single' +     --> +    <unlockOnStartup>false</unlockOnStartup> + +    <!-- +        Custom deletion policies can specified here. The class must +        implement org.apache.lucene.index.IndexDeletionPolicy. + +        http://lucene.apache.org/java/2_3_2/api/org/apache/lucene/index/IndexDeletionPolicy.html + +        The standard Solr IndexDeletionPolicy implementation supports deleting +        index commit points on number of commits, age of commit point and +        optimized status. + +        The latest commit point should always be preserved regardless +        of the criteria. +    --> +    <deletionPolicy class="solr.SolrDeletionPolicy"> +      <!-- Keep only optimized commit points --> +      <str name="keepOptimizedOnly">false</str> +      <!-- The maximum number of commit points to be kept --> +      <str name="maxCommitsToKeep">1</str> +      <!-- +          Delete all commit points once they have reached the given age. +          Supports DateMathParser syntax e.g. + +          <str name="maxCommitAge">30MINUTES</str> +          <str name="maxCommitAge">1DAY</str> +      --> +    </deletionPolicy> + +  </mainIndex> + +  <!--	Enables JMX if and only if an existing MBeanServer is found, use +  		this if you want to configure JMX through JVM parameters. Remove +  		this to disable exposing Solr configuration and statistics to JMX. + +		If you want to connect to a particular server, specify the agentId +		e.g. <jmx agentId="myAgent" /> + +		If you want to start a new MBeanServer, specify the serviceUrl +		e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> + +		For more details see http://wiki.apache.org/solr/SolrJmx +  --> +  <jmx /> + +  <!-- the default high-performance update handler --> +  <updateHandler class="solr.DirectUpdateHandler2"> + +    <!-- A prefix of "solr." for class names is an alias that +         causes solr to search appropriate packages, including +         org.apache.solr.(search|update|request|core|analysis) +     --> + +    <!-- Perform a <commit/> automatically under certain conditions: +         maxDocs - number of updates since last commit is greater than this +         maxTime - oldest uncommited update (in ms) is this long ago +    <autoCommit> +      <maxDocs>10000</maxDocs> +      <maxTime>1000</maxTime> +    </autoCommit> +    --> + +    <!-- The RunExecutableListener executes an external command. +         exe - the name of the executable to run +         dir - dir to use as the current working directory. default="." +         wait - the calling thread waits until the executable returns. default="true" +         args - the arguments to pass to the program.  default=nothing +         env - environment variables to set.  default=nothing +      --> +    <!-- A postCommit event is fired after every commit or optimize command +    <listener event="postCommit" class="solr.RunExecutableListener"> +      <str name="exe">solr/bin/snapshooter</str> +      <str name="dir">.</str> +      <bool name="wait">true</bool> +      <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> +      <arr name="env"> <str>MYVAR=val1</str> </arr> +    </listener> +    --> +    <!-- A postOptimize event is fired only after every optimize command, useful +         in conjunction with index distribution to only distribute optimized indicies +    <listener event="postOptimize" class="solr.RunExecutableListener"> +      <str name="exe">snapshooter</str> +      <str name="dir">solr/bin</str> +      <bool name="wait">true</bool> +    </listener> +    --> + +  </updateHandler> + + +  <query> +    <!-- Maximum number of clauses in a boolean query... can affect +        range or prefix queries that expand to big boolean +        queries.  An exception is thrown if exceeded.  --> +    <maxBooleanClauses>1024</maxBooleanClauses> + + +    <!-- There are two implementations of cache available for Solr, +         LRUCache, based on a synchronized LinkedHashMap, and +         FastLRUCache, based on a ConcurrentHashMap.  FastLRUCache has faster gets +         and slower puts in single threaded operation and thus is generally faster +         than LRUCache when the hit ratio of the cache is high (> 75%), and may be +         faster under other scenarios on multi-cpu systems. --> +    <!-- Cache used by SolrIndexSearcher for filters (DocSets), +         unordered sets of *all* documents that match a query. +         When a new searcher is opened, its caches may be prepopulated +         or "autowarmed" using data from caches in the old searcher. +         autowarmCount is the number of items to prepopulate.  For LRUCache, +         the autowarmed items will be the most recently accessed items. +       Parameters: +         class - the SolrCache implementation LRUCache or FastLRUCache +         size - the maximum number of entries in the cache +         initialSize - the initial capacity (number of entries) of +           the cache.  (seel java.util.HashMap) +         autowarmCount - the number of entries to prepopulate from +           and old cache. +         --> +    <filterCache +      class="solr.FastLRUCache" +      size="512" +      initialSize="512" +      autowarmCount="128"/> + +    <!-- Cache used to hold field values that are quickly accessible +         by document id.  The fieldValueCache is created by default +         even if not configured here. +      <fieldValueCache +        class="solr.FastLRUCache" +        size="512" +        autowarmCount="128" +        showItems="32" +      /> +    --> + +   <!-- queryResultCache caches results of searches - ordered lists of +         document ids (DocList) based on a query, a sort, and the range +         of documents requested.  --> +    <queryResultCache +      class="solr.LRUCache" +      size="512" +      initialSize="512" +      autowarmCount="32"/> + +  <!-- documentCache caches Lucene Document objects (the stored fields for each document). +       Since Lucene internal document ids are transient, this cache will not be autowarmed.  --> +    <documentCache +      class="solr.LRUCache" +      size="512" +      initialSize="512" +      autowarmCount="0"/> + +    <!-- If true, stored fields that are not requested will be loaded lazily. + +    This can result in a significant speed improvement if the usual case is to +    not load all stored fields, especially if the skipped fields are large compressed +    text fields. +    --> +    <enableLazyFieldLoading>true</enableLazyFieldLoading> + +    <!-- Example of a generic cache.  These caches may be accessed by name +         through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). +         The purpose is to enable easy caching of user/application level data. +         The regenerator argument should be specified as an implementation +         of solr.search.CacheRegenerator if autowarming is desired.  --> +    <!-- +    <cache name="myUserCache" +      class="solr.LRUCache" +      size="4096" +      initialSize="1024" +      autowarmCount="1024" +      regenerator="org.mycompany.mypackage.MyRegenerator" +      /> +    --> + +   <!-- An optimization that attempts to use a filter to satisfy a search. +         If the requested sort does not include score, then the filterCache +         will be checked for a filter matching the query. If found, the filter +         will be used as the source of document ids, and then the sort will be +         applied to that. +    <useFilterForSortedQuery>true</useFilterForSortedQuery> +   --> + +   <!-- An optimization for use with the queryResultCache.  When a search +         is requested, a superset of the requested number of document ids +         are collected.  For example, if a search for a particular query +         requests matching documents 10 through 19, and queryWindowSize is 50, +         then documents 0 through 49 will be collected and cached.  Any further +         requests in that range can be satisfied via the cache.  --> +    <queryResultWindowSize>50</queryResultWindowSize> + +    <!-- Maximum number of documents to cache for any entry in the +         queryResultCache. --> +    <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + +    <!-- This entry enables an int hash representation for filters (DocSets) +         when the number of items in the set is less than maxSize.  For smaller +         sets, this representation is more memory efficient, more efficient to +         iterate over, and faster to take intersections.  --> +    <HashDocSet maxSize="3000" loadFactor="0.75"/> + +    <!-- a newSearcher event is fired whenever a new searcher is being prepared +         and there is a current searcher handling requests (aka registered). --> +    <!-- QuerySenderListener takes an array of NamedList and executes a +         local query request for each NamedList in sequence. --> +    <listener event="newSearcher" class="solr.QuerySenderListener"> +      <arr name="queries"> +        <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> +        <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> +        <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst> +      </arr> +    </listener> + +    <!-- a firstSearcher event is fired whenever a new searcher is being +         prepared but there is no current registered searcher to handle +         requests or to gain autowarming data from. --> +    <listener event="firstSearcher" class="solr.QuerySenderListener"> +      <arr name="queries"> +        <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst> +        <lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst> +      </arr> +    </listener> + +    <!-- If a search request comes in and there is no current registered searcher, +         then immediately register the still warming searcher and use it.  If +         "false" then all requests will block until the first searcher is done +         warming. --> +    <useColdSearcher>false</useColdSearcher> + +    <!-- Maximum number of searchers that may be warming in the background +      concurrently.  An error is returned if this limit is exceeded. Recommend +      1-2 for read-only slaves, higher for masters w/o cache warming. --> +    <maxWarmingSearchers>2</maxWarmingSearchers> + +  </query> + +  <!-- +    Let the dispatch filter handler /select?qt=XXX +    handleSelect=true will use consistent error handling for /select and /update +    handleSelect=false will use solr1.1 style error formatting +    --> +  <requestDispatcher handleSelect="true" > +    <!--Make sure your system has some authentication before enabling remote streaming!  --> +    <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" /> + +    <!-- Set HTTP caching related parameters (for proxy caches and clients). + +         To get the behaviour of Solr 1.2 (ie: no caching related headers) +         use the never304="true" option and do not specify a value for +         <cacheControl> +    --> +    <!-- <httpCaching never304="true"> --> +    <httpCaching lastModifiedFrom="openTime" +                 etagSeed="Solr"> +       <!-- lastModFrom="openTime" is the default, the Last-Modified value +            (and validation against If-Modified-Since requests) will all be +            relative to when the current Searcher was opened. +            You can change it to lastModFrom="dirLastMod" if you want the +            value to exactly corrispond to when the physical index was last +            modified. + +            etagSeed="..." is an option you can change to force the ETag +            header (and validation against If-None-Match requests) to be +            differnet even if the index has not changed (ie: when making +            significant changes to your config file) + +            lastModifiedFrom and etagSeed are both ignored if you use the +            never304="true" option. +       --> +       <!-- If you include a <cacheControl> directive, it will be used to +            generate a Cache-Control header, as well as an Expires header +            if the value contains "max-age=" + +            By default, no Cache-Control header is generated. + +            You can use the <cacheControl> option even if you have set +            never304="true" +       --> +       <!-- <cacheControl>max-age=30, public</cacheControl> --> +    </httpCaching> +  </requestDispatcher> + + +  <!-- requestHandler plugins... incoming queries will be dispatched to the +     correct handler based on the path or the qt (query type) param. +     Names starting with a '/' are accessed with the a path equal to the +     registered name.  Names without a leading '/' are accessed with: +      http://host/app/select?qt=name +     If no qt is defined, the requestHandler that declares default="true" +     will be used. +  --> +  <requestHandler name="standard" class="solr.SearchHandler" default="true"> +    <!-- default values for query parameters --> +     <lst name="defaults"> +       <str name="echoParams">explicit</str> +       <!-- +       <int name="rows">10</int> +       <str name="fl">*</str> +       <str name="version">2.1</str> +        --> +     </lst> +  </requestHandler> + +<!-- Please refer to http://wiki.apache.org/solr/SolrReplication for details on configuring replication --> +<!--Master config--> +<!-- +<requestHandler name="/replication" class="solr.ReplicationHandler" > +    <lst name="master"> +        <str name="replicateAfter">commit</str> +         <str name="confFiles">schema.xml,stopwords.txt</str> +    </lst> +</requestHandler> +--> +<!-- Slave config--> +<!-- +<requestHandler name="/replication" class="solr.ReplicationHandler"> +    <lst name="slave"> +        <str name="masterUrl">http://localhost:8983/solr/replication</str> +        <str name="pollInterval">00:00:60</str> +     </lst> +</requestHandler> +--> + +  <!-- DisMaxRequestHandler allows easy searching across multiple fields +       for simple user-entered phrases.  It's implementation is now +       just the standard SearchHandler with a default query type +       of "dismax". +       see http://wiki.apache.org/solr/DisMaxRequestHandler +   --> +  <requestHandler name="dismax" class="solr.SearchHandler" > +    <lst name="defaults"> +     <str name="defType">dismax</str> +     <str name="echoParams">explicit</str> +     <float name="tie">0.01</float> +     <str name="qf"> +        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 +     </str> +     <str name="pf"> +        text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 +     </str> +     <str name="bf"> +        ord(popularity)^0.5 recip(rord(price),1,1000,1000)^0.3 +     </str> +     <str name="fl"> +        id,name,price,score +     </str> +     <str name="mm"> +        2<-1 5<-2 6<90% +     </str> +     <int name="ps">100</int> +     <str name="q.alt">*:*</str> +     <!-- example highlighter config, enable per-query with hl=true --> +     <str name="hl.fl">text features name</str> +     <!-- for this field, we want no fragmenting, just highlighting --> +     <str name="f.name.hl.fragsize">0</str> +     <!-- instructs Solr to return the field itself if no query terms are +          found --> +     <str name="f.name.hl.alternateField">name</str> +     <str name="f.text.hl.fragmenter">regex</str> <!-- defined below --> +    </lst> +  </requestHandler> + +  <!-- Note how you can register the same handler multiple times with +       different names (and different init parameters) +    --> +  <requestHandler name="partitioned" class="solr.SearchHandler" > +    <lst name="defaults"> +     <str name="defType">dismax</str> +     <str name="echoParams">explicit</str> +     <str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str> +     <str name="mm">2<-1 5<-2 6<90%</str> +     <!-- This is an example of using Date Math to specify a constantly +          moving date range in a config... +       --> +     <str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str> +    </lst> +    <!-- In addition to defaults, "appends" params can be specified +         to identify values which should be appended to the list of +         multi-val params from the query (or the existing "defaults"). + +         In this example, the param "fq=instock:true" will be appended to +         any query time fq params the user may specify, as a mechanism for +         partitioning the index, independent of any user selected filtering +         that may also be desired (perhaps as a result of faceted searching). + +         NOTE: there is *absolutely* nothing a client can do to prevent these +         "appends" values from being used, so don't use this mechanism +         unless you are sure you always want it. +      --> +    <lst name="appends"> +      <str name="fq">inStock:true</str> +    </lst> +    <!-- "invariants" are a way of letting the Solr maintainer lock down +         the options available to Solr clients.  Any params values +         specified here are used regardless of what values may be specified +         in either the query, the "defaults", or the "appends" params. + +         In this example, the facet.field and facet.query params are fixed, +         limiting the facets clients can use.  Faceting is not turned on by +         default - but if the client does specify facet=true in the request, +         these are the only facets they will be able to see counts for; +         regardless of what other facet.field or facet.query params they +         may specify. + +         NOTE: there is *absolutely* nothing a client can do to prevent these +         "invariants" values from being used, so don't use this mechanism +         unless you are sure you always want it. +      --> +    <lst name="invariants"> +      <str name="facet.field">cat</str> +      <str name="facet.field">manu_exact</str> +      <str name="facet.query">price:[* TO 500]</str> +      <str name="facet.query">price:[500 TO *]</str> +    </lst> +  </requestHandler> + + +  <!-- +   Search components are registered to SolrCore and used by Search Handlers + +   By default, the following components are avaliable: + +   <searchComponent name="query"     class="org.apache.solr.handler.component.QueryComponent" /> +   <searchComponent name="facet"     class="org.apache.solr.handler.component.FacetComponent" /> +   <searchComponent name="mlt"       class="org.apache.solr.handler.component.MoreLikeThisComponent" /> +   <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> +   <searchComponent name="stats"     class="org.apache.solr.handler.component.StatsComponent" /> +   <searchComponent name="debug"     class="org.apache.solr.handler.component.DebugComponent" /> + +   Default configuration in a requestHandler would look like: +    <arr name="components"> +      <str>query</str> +      <str>facet</str> +      <str>mlt</str> +      <str>highlight</str> +      <str>stats</str> +      <str>debug</str> +    </arr> + +    If you register a searchComponent to one of the standard names, that will be used instead. +    To insert components before or after the 'standard' components, use: + +    <arr name="first-components"> +      <str>myFirstComponentName</str> +    </arr> + +    <arr name="last-components"> +      <str>myLastComponentName</str> +    </arr> +  --> + +   <!-- The spell check component can return a list of alternative spelling +  suggestions.  --> +  <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> + +    <str name="queryAnalyzerFieldType">textSpell</str> + +    <lst name="spellchecker"> +      <str name="name">default</str> +      <str name="field">spell</str> +      <str name="spellcheckIndexDir">./spellchecker1</str> + +    </lst> +    <lst name="spellchecker"> +      <str name="name">jarowinkler</str> +      <str name="field">spell</str> +      <!-- Use a different Distance Measure --> +      <str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str> +      <str name="spellcheckIndexDir">./spellchecker2</str> + +    </lst> + +    <lst name="spellchecker"> +      <str name="classname">solr.FileBasedSpellChecker</str> +      <str name="name">file</str> +      <str name="sourceLocation">spellings.txt</str> +      <str name="characterEncoding">UTF-8</str> +      <str name="spellcheckIndexDir">./spellcheckerFile</str> +    </lst> +  </searchComponent> + +  <!-- A request handler utilizing the spellcheck component. +  ################################################################################################ +  NOTE: This is purely as an example.  The whole purpose of the SpellCheckComponent is to hook it into +  the request handler that handles (i.e. the standard or dismax SearchHandler) +  queries such that a separate request is not needed to get suggestions. + +  IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! +  ################################################################################################ +  --> +  <requestHandler name="/spellCheckCompRH" class="solr.SearchHandler"> +    <lst name="defaults"> +      <!-- omp = Only More Popular --> +      <str name="spellcheck.onlyMorePopular">false</str> +      <!-- exr = Extended Results --> +      <str name="spellcheck.extendedResults">false</str> +      <!--  The number of suggestions to return --> +      <str name="spellcheck.count">1</str> +    </lst> +    <arr name="last-components"> +      <str>spellcheck</str> +    </arr> +  </requestHandler> + +  <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> +    <lst name="defaults"> +      <str name="config">data-config.xml</str> +    </lst> +  </requestHandler> + +  <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/> +  <!-- A Req Handler for working with the tvComponent.  This is purely as an example. +  You will likely want to add the component to your already specified request handlers. --> +  <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler"> +    <lst name="defaults"> +      <bool name="tv">true</bool> +    </lst> +    <arr name="last-components"> +      <str>tvComponent</str> +    </arr> +  </requestHandler> + +<!-- +  <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler"> +    <lst name="defaults"> +      <str name="ext.map.Last-Modified">last_modified</str> +      <bool name="ext.ignore.und.fl">true</bool> +    </lst> +  </requestHandler> +--> + + + +  <searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/> + +  <requestHandler name="/autoSuggest" class="org.apache.solr.handler.component.SearchHandler"> +    <arr name="components"> +      <str>termsComp</str> +    </arr> +  </requestHandler> + + +  <!-- Update request handler. + +       Note: Since solr1.1 requestHandlers requires a valid content type header if posted in +       the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8' +       The response format differs from solr1.1 formatting and returns a standard error code. + +       To enable solr1.1 behavior, remove the /update handler or change its path +    --> +  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> + + +  <requestHandler name="/update/javabin" class="solr.BinaryUpdateRequestHandler" /> + +  <!-- +   Analysis request handler.  Since Solr 1.3.  Use to returnhow a document is analyzed.  Useful +   for debugging and as a token server for other types of applications +   --> +  <requestHandler name="/analysis" class="solr.AnalysisRequestHandler" /> + + +  <!-- CSV update handler, loaded on demand --> +  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> + + +  <!-- +   Admin Handlers - This will register all the standard admin RequestHandlers.  Adding +   this single handler is equivalent to registering: + +  <requestHandler name="/admin/luke"       class="org.apache.solr.handler.admin.LukeRequestHandler" /> +  <requestHandler name="/admin/system"     class="org.apache.solr.handler.admin.SystemInfoHandler" /> +  <requestHandler name="/admin/plugins"    class="org.apache.solr.handler.admin.PluginInfoHandler" /> +  <requestHandler name="/admin/threads"    class="org.apache.solr.handler.admin.ThreadDumpHandler" /> +  <requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" /> +  <requestHandler name="/admin/file"       class="org.apache.solr.handler.admin.ShowFileRequestHandler" > + +  If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using: +  <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" > +    <lst name="invariants"> +     <str name="hidden">synonyms.txt</str> +     <str name="hidden">anotherfile.txt</str> +    </lst> +  </requestHandler> +  --> +  <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> + +  <!-- Echo the request contents back to the client --> +  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > +    <lst name="defaults"> +     <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> +     <str name="echoHandler">true</str> +    </lst> +  </requestHandler> + +  <highlighting> +   <!-- Configure the standard fragmenter --> +   <!-- This could most likely be commented out in the "default" case --> +   <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> +    <lst name="defaults"> +     <int name="hl.fragsize">100</int> +    </lst> +   </fragmenter> + +   <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> +   <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> +    <lst name="defaults"> +      <!-- slightly smaller fragsizes work better because of slop --> +      <int name="hl.fragsize">70</int> +      <!-- allow 50% slop on fragment sizes --> +      <float name="hl.regex.slop">0.5</float> +      <!-- a basic sentence pattern --> +      <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> +    </lst> +   </fragmenter> + +   <!-- Configure the standard formatter --> +   <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> +    <lst name="defaults"> +     <str name="hl.simple.pre"><![CDATA[<em>]]></str> +     <str name="hl.simple.post"><![CDATA[</em>]]></str> +    </lst> +   </formatter> +  </highlighting> + +  <!-- An example dedup update processor that creates the "id" field on the fly +       based on the hash code of some other fields.  This example has overwriteDupes +       set to false since we are using the id field as the signatureField and Solr +       will maintain uniqueness based on that anyway. --> +  <!-- +  <updateRequestProcessorChain name="dedupe"> +    <processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory"> +      <bool name="enabled">true</bool> +      <str name="signatureField">id</str> +      <bool name="overwriteDupes">false</bool> +      <str name="fields">name,features,cat</str> +      <str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str> +    </processor> +    <processor class="solr.LogUpdateProcessorFactory" /> +    <processor class="solr.RunUpdateProcessorFactory" /> +  </updateRequestProcessorChain> +  --> + + +  <!-- queryResponseWriter plugins... query responses will be written using the +    writer specified by the 'wt' request parameter matching the name of a registered +    writer. +    The "default" writer is the default and will be used if 'wt' is not specified +    in the request. XMLResponseWriter will be used if nothing is specified here. +    The json, python, and ruby writers are also available by default. + +    <queryResponseWriter name="xml" class="solr.XMLResponseWriter" default="true"/> +    <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> +    <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> +    <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> +    <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> +    <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> + +    <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> +  --> + +  <!-- XSLT response writer transforms the XML output by any xslt file found +       in Solr's conf/xslt directory.  Changes to xslt files are checked for +       every xsltCacheLifetimeSeconds. +   --> +  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> +    <int name="xsltCacheLifetimeSeconds">5</int> +  </queryResponseWriter> + + +  <!-- example of registering a query parser +  <queryParser name="lucene" class="org.apache.solr.search.LuceneQParserPlugin"/> +  --> + +  <!-- example of registering a custom function parser +  <valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" /> +  --> + +  <!-- config for the admin interface --> +  <admin> +    <defaultQuery>solr</defaultQuery> + +    <!-- configure a healthcheck file for servers behind a loadbalancer +    <healthcheck type="file">server-enabled</healthcheck> +    --> +  </admin> + +</config> diff --git a/isuifang_solr/example-DIH/solr/mail/conf/stopwords.txt b/isuifang_solr/example-DIH/solr/mail/conf/stopwords.txt new file mode 100644 index 0000000..b5824da --- /dev/null +++ b/isuifang_solr/example-DIH/solr/mail/conf/stopwords.txt @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements.  See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License.  You may obtain a copy of the License at +# +#     http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +#Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with + diff --git a/isuifang_solr/example-DIH/solr/mail/conf/synonyms.txt b/isuifang_solr/example-DIH/solr/mail/conf/synonyms.txt new file mode 100644 index 0000000..b0e31cb --- /dev/null +++ b/isuifang_solr/example-DIH/solr/mail/conf/synonyms.txt @@ -0,0 +1,31 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License.  You may obtain a copy of the License at +# +#     http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaa => aaaa +bbb => bbbb1 bbbb2 +ccc => cccc1,cccc2 +a\=>a => b\=>b +a\,a => b\,b +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + | 
