diff options
Diffstat (limited to 'isuifang_solr/example-DIH/solr/tika/conf/solrconfig.xml')
-rw-r--r-- | isuifang_solr/example-DIH/solr/tika/conf/solrconfig.xml | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/isuifang_solr/example-DIH/solr/tika/conf/solrconfig.xml b/isuifang_solr/example-DIH/solr/tika/conf/solrconfig.xml new file mode 100644 index 0000000..794c6fd --- /dev/null +++ b/isuifang_solr/example-DIH/solr/tika/conf/solrconfig.xml @@ -0,0 +1,397 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + + <luceneMatchVersion>LUCENE_36</luceneMatchVersion> + + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, + you may want solr to keep working even if one handler is mis-configured. + + You may also set this to false using by setting the system property: + -Dsolr.abortOnConfigurationError=false + --> + <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> + + <lib dir="../../../../contrib/extraction/lib" /> + <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" /> + + <indexDefaults> + <!-- Values here affect all index writers and act as a default unless overridden. --> + <useCompoundFile>false</useCompoundFile> + + <mergeFactor>10</mergeFactor> + <!-- + If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. + + --> + <!--<maxBufferedDocs>1000</maxBufferedDocs>--> + <!-- Tell Lucene when to flush documents to disk. + Giving Lucene more memory for indexing means faster indexing at the cost of more RAM + + If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. + + --> + <ramBufferSizeMB>32</ramBufferSizeMB> + <maxMergeDocs>2147483647</maxMergeDocs> + <maxFieldLength>10000</maxFieldLength> + <writeLockTimeout>1000</writeLockTimeout> + + <!-- + Expert: + The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous + versions used LogDocMergePolicy. + + LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when + to merge based on number of documents + + Other implementations of MergePolicy must have a no-argument constructor + --> + <!--<mergePolicy class="org.apache.lucene.index.LogByteSizeMergePolicy"/>--> + + <!-- + Expert: + The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default) + can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not. + --> + <!--<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>--> + + <!-- + As long as Solr is the only process modifying your index, it is + safe to use Lucene's in process locking mechanism. But you may + specify one of the other Lucene LockFactory implementations in + the event that you have a custom situation. + + none = NoLockFactory (typically only used with read only indexes) + single = SingleInstanceLockFactory (suggested) + native = NativeFSLockFactory + simple = SimpleFSLockFactory + + ('simple' is the default for backwards compatibility with Solr 1.2) + --> + <lockType>single</lockType> + </indexDefaults> + + <mainIndex> + <!-- options specific to the main on-disk lucene index --> + <useCompoundFile>false</useCompoundFile> + <ramBufferSizeMB>32</ramBufferSizeMB> + <mergeFactor>10</mergeFactor> + <!-- Deprecated --> + <!--<maxBufferedDocs>1000</maxBufferedDocs>--> + <maxMergeDocs>2147483647</maxMergeDocs> + <maxFieldLength>10000</maxFieldLength> + + <!-- If true, unlock any held write or commit locks on startup. + This defeats the locking mechanism that allows multiple + processes to safely access a lucene index, and should be + used with care. + This is not needed if lock type is 'none' or 'single' + --> + <unlockOnStartup>false</unlockOnStartup> + </mainIndex> + + <!-- the default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + <!-- A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + + <!-- Limit the number of deletions Solr will buffer during doc updating. + + Setting this lower can help bound memory use during indexing. + --> + <maxPendingDeletes>100000</maxPendingDeletes> + + </updateHandler> + + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or prefix queries that expand to big boolean + queries. An exception is thrown if exceeded. --> + <maxBooleanClauses>1024</maxBooleanClauses> + + + <!-- Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. + When a new searcher is opened, its caches may be prepopulated + or "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For LRUCache, + the autowarmed items will be the most recently accessed items. + Parameters: + class - the SolrCache implementation (currently only LRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (seel java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + --> + <filterCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- queryResultCache caches results of searches - ordered lists of + document ids (DocList) based on a query, a sort, and the range + of documents requested. --> + <queryResultCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- documentCache caches Lucene Document objects (the stored fields for each document). + Since Lucene internal document ids are transient, this cache will not be autowarmed. --> + <documentCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- If true, stored fields that are not requested will be loaded lazily. + + This can result in a significant speed improvement if the usual case is to + not load all stored fields, especially if the skipped fields are large compressed + text fields. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- Example of a generic cache. These caches may be accessed by name + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). + The purpose is to enable easy caching of user/application level data. + The regenerator argument should be specified as an implementation + of solr.search.CacheRegenerator if autowarming is desired. --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="org.mycompany.mypackage.MyRegenerator" + /> + --> + + <!-- An optimization that attempts to use a filter to satisfy a search. + If the requested sort does not include score, then the filterCache + will be checked for a filter matching the query. If found, the filter + will be used as the source of document ids, and then the sort will be + applied to that. + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. --> + <queryResultWindowSize>50</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- This entry enables an int hash representation for filters (DocSets) + when the number of items in the set is less than maxSize. For smaller + sets, this representation is more memory efficient, more efficient to + iterate over, and faster to take intersections. --> + <HashDocSet maxSize="3000" loadFactor="0.75"/> + + <!-- a newSearcher event is fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka registered). --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. --> + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst> + </arr> + </listener> + + <!-- a firstSearcher event is fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. --> + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + </arr> + </listener> + + <!-- If a search request comes in and there is no current registered searcher, + then immediately register the still warming searcher and use it. If + "false" then all requests will block until the first searcher is done + warming. --> + <useColdSearcher>false</useColdSearcher> + + <!-- Maximum number of searchers that may be warming in the background + concurrently. An error is returned if this limit is exceeded. Recommend + 1-2 for read-only slaves, higher for masters w/o cache warming. --> + <maxWarmingSearchers>4</maxWarmingSearchers> + + </query> + + <!-- + Let the dispatch filter handler /select?qt=XXX + handleSelect=true will use consistent error handling for /select and /update + handleSelect=false will use solr1.1 style error formatting + --> + <requestDispatcher handleSelect="true" > + <!--Make sure your system has some authentication before enabling remote streaming! --> + <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" /> + + <!-- Set HTTP caching related parameters (for proxy caches and clients). + + To get the behaviour of Solr 1.2 (ie: no caching related headers) + use the never304="true" option and do not specify a value for + <cacheControl> + --> + <httpCaching never304="true"> + <!--httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"--> + <!-- lastModFrom="openTime" is the default, the Last-Modified value + (and validation against If-Modified-Since requests) will all be + relative to when the current Searcher was opened. + You can change it to lastModFrom="dirLastMod" if you want the + value to exactly corrispond to when the physical index was last + modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + differnet even if the index has not changed (ie: when making + significant changes to your config file) + + lastModifiedFrom and etagSeed are both ignored if you use the + never304="true" option. + --> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header, as well as an Expires header + if the value contains "max-age=" + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- <cacheControl>max-age=30, public</cacheControl> --> + </httpCaching> + </requestDispatcher> + + + <!-- requestHandler plugins... incoming queries will be dispatched to the + correct handler based on the path or the qt (query type) param. + Names starting with a '/' are accessed with the a path equal to the + registered name. Names without a leading '/' are accessed with: + http://host/app/select?qt=name + If no qt is defined, the requestHandler that declares default="true" + will be used. + --> + <requestHandler name="standard" class="solr.StandardRequestHandler" default="true"> + <!-- default values for query parameters --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <!-- + <int name="rows">10</int> + <str name="fl">*</str> + <str name="version">2.1</str> + --> + </lst> + </requestHandler> + + <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> + <lst name="defaults"> + <str name="config">tika-data-config.xml</str> + </lst> + </requestHandler> + + <!-- + + Search components are registered to SolrCore and used by Search Handlers + + By default, the following components are avaliable: + + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" /> + + If you register a searchComponent to one of the standard names, that will be used instead. + + --> + + <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <!-- + By default, this will register the following components: + + <arr name="components"> + <str>query</str> + <str>facet</str> + <str>mlt</str> + <str>highlight</str> + <str>debug</str> + </arr> + + To insert handlers before or after the 'standard' components, use: + + <arr name="first-components"> + <str>first</str> + </arr> + + <arr name="last-components"> + <str>last</str> + </arr> + + --> + </requestHandler> + + <!-- Update request handler. + + Note: Since solr1.1 requestHandlers requires a valid content type header if posted in + the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8' + The response format differs from solr1.1 formatting and returns a standard error code. + + To enable solr1.1 behavior, remove the /update handler or change its path + + "update.processor.class" is the class name for the UpdateRequestProcessor. It is initalized + only once. This can not be changed for each request. + --> + <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" > + <!-- + <str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str> + --> + </requestHandler> + + <!-- config for the admin interface --> + <admin> + <defaultQuery>*:*</defaultQuery> + + <!-- configure a healthcheck file for servers behind a loadbalancer + <healthcheck type="file">server-enabled</healthcheck> + --> + </admin> + +</config> + |