| [11663] | 1 | <?xml version="1.0" encoding="UTF-8" ?> |
|---|
| 2 | <!-- |
|---|
| 3 | Licensed to the Apache Software Foundation (ASF) under one or more |
|---|
| 4 | contributor license agreements. See the NOTICE file distributed with |
|---|
| 5 | this work for additional information regarding copyright ownership. |
|---|
| 6 | The ASF licenses this file to You under the Apache License, Version 2.0 |
|---|
| 7 | (the "License"); you may not use this file except in compliance with |
|---|
| 8 | the License. You may obtain a copy of the License at |
|---|
| 9 | |
|---|
| 10 | http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 11 | |
|---|
| 12 | Unless required by applicable law or agreed to in writing, software |
|---|
| 13 | distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 15 | See the License for the specific language governing permissions and |
|---|
| 16 | limitations under the License. |
|---|
| 17 | --> |
|---|
| 18 | |
|---|
| 19 | <!-- |
|---|
| 20 | This is the Solr schema file. This file should be named "schema.xml" and |
|---|
| 21 | should be in the conf directory under the solr home |
|---|
| 22 | (i.e. ./solr/conf/schema.xml by default) |
|---|
| 23 | or located where the classloader for the Solr webapp can find it. |
|---|
| 24 | |
|---|
| 25 | This example schema is the recommended starting point for users. |
|---|
| 26 | It should be kept correct and concise, usable out-of-the-box. |
|---|
| 27 | |
|---|
| 28 | For more information, on how to customize this file, please see |
|---|
| 29 | http://wiki.apache.org/solr/SchemaXml |
|---|
| 30 | |
|---|
| 31 | PERFORMANCE NOTE: this schema includes many optional features and should not |
|---|
| 32 | be used for benchmarking. To improve performance one could |
|---|
| 33 | - set stored="false" for all fields possible (esp large fields) when you |
|---|
| 34 | only need to search on the field but don't need to return the original |
|---|
| 35 | value. |
|---|
| 36 | - set indexed="false" if you don't need to search on the field, but only |
|---|
| 37 | return the field as a result of searching on other indexed fields. |
|---|
| 38 | - remove all unneeded copyField statements |
|---|
| 39 | - for best index size and searching performance, set "index" to false |
|---|
| 40 | for all general text fields, use copyField to copy them to the |
|---|
| 41 | catchall "text" field, and use that for searching. |
|---|
| 42 | - For maximum indexing performance, use the StreamingUpdateSolrServer |
|---|
| 43 | java client. |
|---|
| 44 | - Remember to run the JVM in server mode, and use a higher logging level |
|---|
| 45 | that avoids logging every request |
|---|
| 46 | --> |
|---|
| 47 | |
|---|
| 48 | <schema name="defineItems" version="1.4"> |
|---|
| 49 | <!-- attribute "name" is the name of this schema and is only used for display purposes. |
|---|
| 50 | Applications should change this to reflect the nature of the search collection. |
|---|
| 51 | version="1.4" is Solr's version number for the schema syntax and semantics. It should |
|---|
| 52 | not normally be changed by applications. |
|---|
| 53 | 1.0: multiValued attribute did not exist, all fields are multiValued by nature |
|---|
| 54 | 1.1: multiValued attribute introduced, false by default |
|---|
| 55 | 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields. |
|---|
| 56 | 1.3: removed optional field compress feature |
|---|
| 57 | 1.4: default auto-phrase (QueryParser feature) to off |
|---|
| 58 | --> |
|---|
| 59 | |
|---|
| 60 | <types> |
|---|
| 61 | <!-- field type definitions. The "name" attribute is |
|---|
| 62 | just a label to be used by field definitions. The "class" |
|---|
| 63 | attribute and any other attributes determine the real |
|---|
| 64 | behavior of the fieldType. |
|---|
| 65 | Class names starting with "solr" refer to java classes in the |
|---|
| 66 | org.apache.solr.analysis package. |
|---|
| 67 | --> |
|---|
| 68 | |
|---|
| 69 | <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> |
|---|
| 70 | <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 71 | |
|---|
| 72 | <!-- boolean type: "true" or "false" --> |
|---|
| 73 | <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 74 | <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> |
|---|
| 75 | <fieldtype name="binary" class="solr.BinaryField"/> |
|---|
| 76 | |
|---|
| 77 | <!-- The optional sortMissingLast and sortMissingFirst attributes are |
|---|
| 78 | currently supported on types that are sorted internally as strings |
|---|
| 79 | and on numeric types. |
|---|
| 80 | This includes "string","boolean", and, as of 3.5 (and 4.x), |
|---|
| 81 | int, float, long, date, double, including the "Trie" variants. |
|---|
| 82 | - If sortMissingLast="true", then a sort on this field will cause documents |
|---|
| 83 | without the field to come after documents with the field, |
|---|
| 84 | regardless of the requested sort order (asc or desc). |
|---|
| 85 | - If sortMissingFirst="true", then a sort on this field will cause documents |
|---|
| 86 | without the field to come before documents with the field, |
|---|
| 87 | regardless of the requested sort order. |
|---|
| 88 | - If sortMissingLast="false" and sortMissingFirst="false" (the default), |
|---|
| 89 | then default lucene sorting will be used which places docs without the |
|---|
| 90 | field first in an ascending sort and last in a descending sort. |
|---|
| 91 | --> |
|---|
| 92 | |
|---|
| 93 | <!-- |
|---|
| 94 | Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. |
|---|
| 95 | --> |
|---|
| 96 | <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 97 | <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 98 | <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 99 | <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 100 | |
|---|
| 101 | <!-- |
|---|
| 102 | Numeric field types that index each value at various levels of precision |
|---|
| 103 | to accelerate range queries when the number of values between the range |
|---|
| 104 | endpoints is large. See the javadoc for NumericRangeQuery for internal |
|---|
| 105 | implementation details. |
|---|
| 106 | |
|---|
| 107 | Smaller precisionStep values (specified in bits) will lead to more tokens |
|---|
| 108 | indexed per value, slightly larger index size, and faster range queries. |
|---|
| 109 | A precisionStep of 0 disables indexing at different precision levels. |
|---|
| 110 | --> |
|---|
| 111 | <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 112 | <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 113 | <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 114 | <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> |
|---|
| 115 | |
|---|
| 116 | <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and |
|---|
| 117 | is a more restricted form of the canonical representation of dateTime |
|---|
| 118 | http://www.w3.org/TR/xmlschema-2/#dateTime |
|---|
| 119 | The trailing "Z" designates UTC time and is mandatory. |
|---|
| 120 | Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
|---|
| 121 | All other components are mandatory. |
|---|
| 122 | |
|---|
| 123 | Expressions can also be used to denote calculations that should be |
|---|
| 124 | performed relative to "NOW" to determine the value, ie... |
|---|
| 125 | |
|---|
| 126 | NOW/HOUR |
|---|
| 127 | ... Round to the start of the current hour |
|---|
| 128 | NOW-1DAY |
|---|
| 129 | ... Exactly 1 day prior to now |
|---|
| 130 | NOW/DAY+6MONTHS+3DAYS |
|---|
| 131 | ... 6 months and 3 days in the future from the start of |
|---|
| 132 | the current day |
|---|
| 133 | |
|---|
| 134 | Consult the DateField javadocs for more information. |
|---|
| 135 | |
|---|
| 136 | Note: For faster range queries, consider the tdate type |
|---|
| 137 | --> |
|---|
| 138 | <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/> |
|---|
| 139 | |
|---|
| 140 | <!-- A Trie based date field for faster date range queries and date faceting. --> |
|---|
| 141 | <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/> |
|---|
| 142 | |
|---|
| 143 | |
|---|
| 144 | <!-- |
|---|
| 145 | Note: |
|---|
| 146 | These should only be used for compatibility with existing indexes (created with older Solr versions) |
|---|
| 147 | or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead. |
|---|
| 148 | |
|---|
| 149 | Plain numeric field types that store and index the text |
|---|
| 150 | value verbatim (and hence don't support range queries, since the |
|---|
| 151 | lexicographic ordering isn't equal to the numeric ordering) |
|---|
| 152 | --> |
|---|
| 153 | <fieldType name="pint" class="solr.IntField" omitNorms="true"/> |
|---|
| 154 | <fieldType name="plong" class="solr.LongField" omitNorms="true"/> |
|---|
| 155 | <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/> |
|---|
| 156 | <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/> |
|---|
| 157 | <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 158 | |
|---|
| 159 | |
|---|
| 160 | <!-- |
|---|
| 161 | Note: |
|---|
| 162 | These should only be used for compatibility with existing indexes (created with older Solr versions). |
|---|
| 163 | Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last |
|---|
| 164 | |
|---|
| 165 | Numeric field types that manipulate the value into |
|---|
| 166 | a string value that isn't human-readable in its internal form, |
|---|
| 167 | but with a lexicographic ordering the same as the numeric ordering, |
|---|
| 168 | so that range queries work correctly. |
|---|
| 169 | --> |
|---|
| 170 | <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 171 | <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 172 | <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 173 | <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> |
|---|
| 174 | |
|---|
| 175 | |
|---|
| 176 | <!-- The "RandomSortField" is not used to store or search any |
|---|
| 177 | data. You can declare fields of this type it in your schema |
|---|
| 178 | to generate pseudo-random orderings of your docs for sorting |
|---|
| 179 | purposes. The ordering is generated based on the field name |
|---|
| 180 | and the version of the index, As long as the index version |
|---|
| 181 | remains unchanged, and the same field name is reused, |
|---|
| 182 | the ordering of the docs will be consistent. |
|---|
| 183 | If you want different psuedo-random orderings of documents, |
|---|
| 184 | for the same version of the index, use a dynamicField and |
|---|
| 185 | change the name |
|---|
| 186 | --> |
|---|
| 187 | <fieldType name="random" class="solr.RandomSortField" indexed="true" /> |
|---|
| 188 | |
|---|
| 189 | <!-- solr.TextField allows the specification of custom text analyzers |
|---|
| 190 | specified as a tokenizer and a list of token filters. Different |
|---|
| 191 | analyzers may be specified for indexing and querying. |
|---|
| 192 | |
|---|
| 193 | The optional positionIncrementGap puts space between multiple fields of |
|---|
| 194 | this type on the same document, with the purpose of preventing false phrase |
|---|
| 195 | matching across fields. |
|---|
| 196 | |
|---|
| 197 | For more info on customizing your analyzer chain, please see |
|---|
| 198 | http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters |
|---|
| 199 | --> |
|---|
| 200 | |
|---|
| 201 | <!-- One can also specify an existing Analyzer class that has a |
|---|
| 202 | default constructor via the class attribute on the analyzer element |
|---|
| 203 | <fieldType name="text_greek" class="solr.TextField"> |
|---|
| 204 | <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> |
|---|
| 205 | </fieldType> |
|---|
| 206 | --> |
|---|
| 207 | |
|---|
| 208 | <!-- A text field that only splits on whitespace for exact matching of words --> |
|---|
| 209 | <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> |
|---|
| 210 | <analyzer> |
|---|
| 211 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|---|
| 212 | </analyzer> |
|---|
| 213 | </fieldType> |
|---|
| 214 | |
|---|
| 215 | <!-- A general text field that has reasonable, generic |
|---|
| 216 | cross-language defaults: it tokenizes with UAX29URLEmailTokenizerFactory, |
|---|
| 217 | splits CamelCase, dash-words & domain.tld - preserving the original, |
|---|
| 218 | removes stop words from case-insensitive "stopwords.txt" |
|---|
| 219 | (empty by default), and down cases. At query time only, it |
|---|
| 220 | also applies synonyms. --> |
|---|
| 221 | <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|---|
| 222 | <analyzer type="index"> |
|---|
| 223 | <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> |
|---|
| 224 | <filter class="solr.WordDelimiterFilterFactory" |
|---|
| 225 | splitOnCaseChange="1" |
|---|
| 226 | splitOnNumerics="0" |
|---|
| 227 | stemEnglishPossessive="0" |
|---|
| 228 | generateWordParts="1" |
|---|
| 229 | preserveOriginal="1" /> |
|---|
| 230 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> |
|---|
| 231 | <!-- in this example, we will only use synonyms at query time |
|---|
| 232 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|---|
| 233 | --> |
|---|
| 234 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 235 | </analyzer> |
|---|
| 236 | <analyzer type="query"> |
|---|
| 237 | <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> |
|---|
| 238 | <filter class="solr.WordDelimiterFilterFactory" |
|---|
| 239 | splitOnCaseChange="1" |
|---|
| 240 | splitOnNumerics="0" |
|---|
| 241 | stemEnglishPossessive="0" |
|---|
| 242 | generateWordParts="1" |
|---|
| 243 | preserveOriginal="1" /> |
|---|
| 244 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> |
|---|
| 245 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|---|
| 246 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 247 | </analyzer> |
|---|
| 248 | </fieldType> |
|---|
| 249 | |
|---|
| 250 | <!-- A text field with defaults appropriate for English: it |
|---|
| 251 | tokenizes with StandardTokenizer, removes English stop words |
|---|
| 252 | (stopwords_en.txt), down cases, protects words from protwords.txt, and |
|---|
| 253 | finally applies Porter's stemming. The query time analyzer |
|---|
| 254 | also applies synonyms from synonyms.txt. --> |
|---|
| 255 | <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> |
|---|
| 256 | <analyzer type="index"> |
|---|
| 257 | <tokenizer class="solr.StandardTokenizerFactory"/> |
|---|
| 258 | <!-- in this example, we will only use synonyms at query time |
|---|
| 259 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|---|
| 260 | --> |
|---|
| 261 | <!-- Case insensitive stop word removal. |
|---|
| 262 | add enablePositionIncrements=true in both the index and query |
|---|
| 263 | analyzers to leave a 'gap' for more accurate phrase queries. |
|---|
| 264 | --> |
|---|
| 265 | <filter class="solr.StopFilterFactory" |
|---|
| 266 | ignoreCase="true" |
|---|
| 267 | words="stopwords_en.txt" |
|---|
| 268 | enablePositionIncrements="true" |
|---|
| 269 | /> |
|---|
| 270 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 271 | <filter class="solr.EnglishPossessiveFilterFactory"/> |
|---|
| 272 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|---|
| 273 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|---|
| 274 | <filter class="solr.EnglishMinimalStemFilterFactory"/> |
|---|
| 275 | --> |
|---|
| 276 | <filter class="solr.PorterStemFilterFactory"/> |
|---|
| 277 | </analyzer> |
|---|
| 278 | <analyzer type="query"> |
|---|
| 279 | <tokenizer class="solr.StandardTokenizerFactory"/> |
|---|
| 280 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|---|
| 281 | <filter class="solr.StopFilterFactory" |
|---|
| 282 | ignoreCase="true" |
|---|
| 283 | words="stopwords_en.txt" |
|---|
| 284 | enablePositionIncrements="true" |
|---|
| 285 | /> |
|---|
| 286 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 287 | <filter class="solr.EnglishPossessiveFilterFactory"/> |
|---|
| 288 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|---|
| 289 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|---|
| 290 | <filter class="solr.EnglishMinimalStemFilterFactory"/> |
|---|
| 291 | --> |
|---|
| 292 | <filter class="solr.PorterStemFilterFactory"/> |
|---|
| 293 | </analyzer> |
|---|
| 294 | </fieldType> |
|---|
| 295 | |
|---|
| 296 | <!-- A text field with defaults appropriate for English, plus |
|---|
| 297 | aggressive word-splitting and autophrase features enabled. |
|---|
| 298 | This field is just like text_en, except it adds |
|---|
| 299 | WordDelimiterFilter to enable splitting and matching of |
|---|
| 300 | words on case-change, alpha numeric boundaries, and |
|---|
| 301 | non-alphanumeric chars. This means certain compound word |
|---|
| 302 | cases will work, for example query "wi fi" will match |
|---|
| 303 | document "WiFi" or "wi-fi". However, other cases will still |
|---|
| 304 | not match, for example if the query is "wifi" and the |
|---|
| 305 | document is "wi fi" or if the query is "wi-fi" and the |
|---|
| 306 | document is "wifi". |
|---|
| 307 | --> |
|---|
| 308 | <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|---|
| 309 | <analyzer type="index"> |
|---|
| 310 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|---|
| 311 | <!-- in this example, we will only use synonyms at query time |
|---|
| 312 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|---|
| 313 | --> |
|---|
| 314 | <!-- Case insensitive stop word removal. |
|---|
| 315 | add enablePositionIncrements=true in both the index and query |
|---|
| 316 | analyzers to leave a 'gap' for more accurate phrase queries. |
|---|
| 317 | --> |
|---|
| 318 | <filter class="solr.StopFilterFactory" |
|---|
| 319 | ignoreCase="true" |
|---|
| 320 | words="stopwords_en.txt" |
|---|
| 321 | enablePositionIncrements="true" |
|---|
| 322 | /> |
|---|
| 323 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
|---|
| 324 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 325 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|---|
| 326 | <filter class="solr.PorterStemFilterFactory"/> |
|---|
| 327 | </analyzer> |
|---|
| 328 | <analyzer type="query"> |
|---|
| 329 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|---|
| 330 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|---|
| 331 | <filter class="solr.StopFilterFactory" |
|---|
| 332 | ignoreCase="true" |
|---|
| 333 | words="stopwords_en.txt" |
|---|
| 334 | enablePositionIncrements="true" |
|---|
| 335 | /> |
|---|
| 336 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
|---|
| 337 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 338 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|---|
| 339 | <filter class="solr.PorterStemFilterFactory"/> |
|---|
| 340 | </analyzer> |
|---|
| 341 | </fieldType> |
|---|
| 342 | |
|---|
| 343 | <!-- Less flexible matching, but less false matches. Probably not ideal for product names, |
|---|
| 344 | but may be good for SKUs. Can insert dashes in the wrong place and still match. --> |
|---|
| 345 | <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|---|
| 346 | <analyzer> |
|---|
| 347 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|---|
| 348 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
|---|
| 349 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/> |
|---|
| 350 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
|---|
| 351 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 352 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|---|
| 353 | <filter class="solr.EnglishMinimalStemFilterFactory"/> |
|---|
| 354 | <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
|---|
| 355 | possible with WordDelimiterFilter in conjuncton with stemming. --> |
|---|
| 356 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|---|
| 357 | </analyzer> |
|---|
| 358 | </fieldType> |
|---|
| 359 | |
|---|
| 360 | <!-- Just like text_general except it reverses the characters of |
|---|
| 361 | each token, to enable more efficient leading wildcard queries. --> |
|---|
| 362 | <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|---|
| 363 | <analyzer type="index"> |
|---|
| 364 | <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> |
|---|
| 365 | <filter class="solr.WordDelimiterFilterFactory" |
|---|
| 366 | splitOnCaseChange="1" |
|---|
| 367 | splitOnNumerics="0" |
|---|
| 368 | stemEnglishPossessive="0" |
|---|
| 369 | generateWordParts="1" |
|---|
| 370 | preserveOriginal="1" /> |
|---|
| 371 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> |
|---|
| 372 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 373 | <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" |
|---|
| 374 | maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> |
|---|
| 375 | </analyzer> |
|---|
| 376 | <analyzer type="query"> |
|---|
| 377 | <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> |
|---|
| 378 | <filter class="solr.WordDelimiterFilterFactory" |
|---|
| 379 | splitOnCaseChange="1" |
|---|
| 380 | splitOnNumerics="0" |
|---|
| 381 | stemEnglishPossessive="0" |
|---|
| 382 | generateWordParts="1" |
|---|
| 383 | preserveOriginal="1" /> |
|---|
| 384 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|---|
| 385 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> |
|---|
| 386 | <filter class="solr.LowerCaseFilterFactory"/> |
|---|
| 387 | </analyzer> |
|---|
| 388 | </fieldType> |
|---|
| 389 | |
|---|
| 390 | <!-- charFilter + WhitespaceTokenizer --> |
|---|
| 391 | <!-- |
|---|
| 392 | <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > |
|---|
| 393 | <analyzer> |
|---|
| 394 | <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> |
|---|
| 395 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|---|
| 396 | </analyzer> |
|---|
| 397 | </fieldType> |
|---|
| 398 | --> |
|---|
| 399 | |
|---|
| 400 | <!-- This is an example of using the KeywordTokenizer along |
|---|
| 401 | With various TokenFilterFactories to produce a sortable field |
|---|
| 402 | that does not include some properties of the source text |
|---|
| 403 | --> |
|---|
| 404 | <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> |
|---|
| 405 | <analyzer> |
|---|
| 406 | <!-- KeywordTokenizer does no actual tokenizing, so the entire |
|---|
| 407 | input string is preserved as a single token |
|---|
| 408 | --> |
|---|
| 409 | <tokenizer class="solr.KeywordTokenizerFactory"/> |
|---|
| 410 | <!-- The LowerCase TokenFilter does what you expect, which can be |
|---|
| 411 | when you want your sorting to be case insensitive |
|---|
| 412 | --> |
|---|
| 413 | <filter class="solr.LowerCaseFilterFactory" /> |
|---|
| 414 | <!-- The TrimFilter removes any leading or trailing whitespace --> |
|---|
| 415 | <filter class="solr.TrimFilterFactory" /> |
|---|
| 416 | <!-- The PatternReplaceFilter gives you the flexibility to use |
|---|
| 417 | Java Regular expression to replace any sequence of characters |
|---|
| 418 | matching a pattern with an arbitrary replacement string, |
|---|
| 419 | which may include back references to portions of the original |
|---|
| 420 | string matched by the pattern. |
|---|
| 421 | |
|---|
| 422 | See the Java Regular Expression documentation for more |
|---|
| 423 | information on pattern and replacement string syntax. |
|---|
| 424 | |
|---|
| 425 | http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html |
|---|
| 426 | --> |
|---|
| 427 | <filter class="solr.PatternReplaceFilterFactory" |
|---|
| 428 | pattern="([^a-z])" replacement="" replace="all" |
|---|
| 429 | /> |
|---|
| 430 | </analyzer> |
|---|
| 431 | </fieldType> |
|---|
| 432 | |
|---|
| 433 | <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > |
|---|
| 434 | <analyzer> |
|---|
| 435 | <tokenizer class="solr.StandardTokenizerFactory"/> |
|---|
| 436 | <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> |
|---|
| 437 | </analyzer> |
|---|
| 438 | </fieldtype> |
|---|
| 439 | |
|---|
| 440 | <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" > |
|---|
| 441 | <analyzer> |
|---|
| 442 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|---|
| 443 | <!-- |
|---|
| 444 | The DelimitedPayloadTokenFilter can put payloads on tokens... for example, |
|---|
| 445 | a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f |
|---|
| 446 | Attributes of the DelimitedPayloadTokenFilterFactory : |
|---|
| 447 | "delimiter" - a one character delimiter. Default is | (pipe) |
|---|
| 448 | "encoder" - how to encode the following value into a playload |
|---|
| 449 | float -> org.apache.lucene.analysis.payloads.FloatEncoder, |
|---|
| 450 | integer -> o.a.l.a.p.IntegerEncoder |
|---|
| 451 | identity -> o.a.l.a.p.IdentityEncoder |
|---|
| 452 | Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. |
|---|
| 453 | --> |
|---|
| 454 | <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> |
|---|
| 455 | </analyzer> |
|---|
| 456 | </fieldtype> |
|---|
| 457 | |
|---|
| 458 | <!-- lowercases the entire field value, keeping it as a single token. --> |
|---|
| 459 | <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> |
|---|
| 460 | <analyzer> |
|---|
| 461 | <tokenizer class="solr.KeywordTokenizerFactory"/> |
|---|
| 462 | <filter class="solr.LowerCaseFilterFactory" /> |
|---|
| 463 | </analyzer> |
|---|
| 464 | </fieldType> |
|---|
| 465 | |
|---|
| 466 | <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100"> |
|---|
| 467 | <analyzer> |
|---|
| 468 | <tokenizer class="solr.PathHierarchyTokenizerFactory"/> |
|---|
| 469 | </analyzer> |
|---|
| 470 | </fieldType> |
|---|
| 471 | |
|---|
| 472 | <!-- since fields of this type are by default not stored or indexed, |
|---|
| 473 | any data added to them will be ignored outright. --> |
|---|
| 474 | <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> |
|---|
| 475 | |
|---|
| 476 | <!-- This point type indexes the coordinates as separate fields (subFields) |
|---|
| 477 | If subFieldType is defined, it references a type, and a dynamic field |
|---|
| 478 | definition is created matching *___<typename>. Alternately, if |
|---|
| 479 | subFieldSuffix is defined, that is used to create the subFields. |
|---|
| 480 | Example: if subFieldType="double", then the coordinates would be |
|---|
| 481 | indexed in fields myloc_0___double,myloc_1___double. |
|---|
| 482 | Example: if subFieldSuffix="_d" then the coordinates would be indexed |
|---|
| 483 | in fields myloc_0_d,myloc_1_d |
|---|
| 484 | The subFields are an implementation detail of the fieldType, and end |
|---|
| 485 | users normally should not need to know about them. |
|---|
| 486 | --> |
|---|
| 487 | <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> |
|---|
| 488 | |
|---|
| 489 | <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> |
|---|
| 490 | <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> |
|---|
| 491 | |
|---|
| 492 | <!-- |
|---|
| 493 | A Geohash is a compact representation of a latitude longitude pair in a single field. |
|---|
| 494 | See http://wiki.apache.org/solr/SpatialSearch |
|---|
| 495 | --> |
|---|
| 496 | <fieldtype name="geohash" class="solr.GeoHashField"/> |
|---|
| 497 | </types> |
|---|
| 498 | |
|---|
| 499 | |
|---|
| 500 | <fields> |
|---|
| 501 | <!-- Valid attributes for fields: |
|---|
| 502 | name: mandatory - the name for the field |
|---|
| 503 | type: mandatory - the name of a previously defined type from the |
|---|
| 504 | <types> section |
|---|
| 505 | indexed: true if this field should be indexed (searchable or sortable) |
|---|
| 506 | stored: true if this field should be retrievable |
|---|
| 507 | multiValued: true if this field may contain multiple values per document |
|---|
| 508 | omitNorms: (expert) set to true to omit the norms associated with |
|---|
| 509 | this field (this disables length normalization and index-time |
|---|
| 510 | boosting for the field, and saves some memory). Only full-text |
|---|
| 511 | fields or fields that need an index-time boost need norms. |
|---|
| 512 | termVectors: [false] set to true to store the term vector for a |
|---|
| 513 | given field. |
|---|
| 514 | When using MoreLikeThis, fields used for similarity should be |
|---|
| 515 | stored for best performance. |
|---|
| 516 | termPositions: Store position information with the term vector. |
|---|
| 517 | This will increase storage costs. |
|---|
| 518 | termOffsets: Store offset information with the term vector. This |
|---|
| 519 | will increase storage costs. |
|---|
| 520 | default: a value that should be used if no value is specified |
|---|
| 521 | when adding a document. |
|---|
| 522 | --> |
|---|
| 523 | <field name="doc_id" type="string" indexed="true" stored="true" required="true"/> |
|---|
| 524 | <field name="project" type="string" indexed="true" stored="true" required="true"/> |
|---|
| 525 | <field name="realm" type="string" indexed="true" stored="true" required="true"/> |
|---|
| 526 | <field name="id" type="string" indexed="true" stored="true" required="true"/> |
|---|
| 527 | <field name="parent_realm" type="string" indexed="true" stored="true"/> |
|---|
| 528 | <field name="parent_id" type="string" indexed="true" stored="true"/> |
|---|
| 529 | <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 530 | <field name="author" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 531 | <field name="changed" type="date" indexed="true" stored="true"/> |
|---|
| 532 | <field name="created" type="date" indexed="true" stored="true"/> |
|---|
| 533 | <field name="oneline" type="text_general_rev" indexed="true" stored="true"/> |
|---|
| 534 | <field name="tags" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 535 | <field name="involved" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 536 | <field name="popularity" type="int" indexed="true" stored="true"/> |
|---|
| 537 | <field name="body" type="text_general" indexed="true" stored="false"/> |
|---|
| 538 | <field name="body_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> |
|---|
| 539 | <field name="comments" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 540 | <field name="timestamp" type="date" indexed="true" stored="true" default="NOW"/> |
|---|
| 541 | |
|---|
| 542 | <!-- Dublin Core terms, mapped from ExtractingRequestHandler |
|---|
| 543 | The DC schema allows any field to be omitted or repeated so declare them multiValued="true". |
|---|
| 544 | required="false" is the default. |
|---|
| 545 | --> |
|---|
| 546 | <field name="dc_contributor" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 547 | <field name="dc_coverage" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 548 | <field name="dc_creator" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 549 | <field name="dc_date" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 550 | <field name="dc_description" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 551 | <field name="dc_format" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 552 | <field name="dc_identifier" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 553 | <field name="dc_language" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 554 | <field name="dc_publisher" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 555 | <field name="dc_relation" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 556 | <field name="dc_rights" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 557 | <field name="dc_source" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 558 | <field name="dc_subject" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 559 | <field name="dc_title" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 560 | <field name="dc_type" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|---|
| 561 | |
|---|
| 562 | <dynamicField name="ignored_*" type="ignored"/> |
|---|
| 563 | </fields> |
|---|
| 564 | |
|---|
| 565 | <!-- Field to use to determine and enforce document uniqueness. |
|---|
| 566 | Unless this field is marked with required="false", it will be a required field |
|---|
| 567 | --> |
|---|
| 568 | <uniqueKey>doc_id</uniqueKey> |
|---|
| 569 | |
|---|
| 570 | <!-- field for the QueryParser to use when an explicit fieldname is absent --> |
|---|
| 571 | <defaultSearchField>body_rev</defaultSearchField> |
|---|
| 572 | |
|---|
| 573 | <!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> |
|---|
| 574 | <!-- This behaviour is better configured in solrconfig.xml using q.op |
|---|
| 575 | http://wiki.apache.org/solr/SchemaXml#Default_query_parser_operator |
|---|
| 576 | http://wiki.apache.org/solr/ExtendedDisMax#mm_.28Minimum_.27Should.27_Match.29 |
|---|
| 577 | --> |
|---|
| 578 | <!-- solrQueryParser defaultOperator="OR"/--> |
|---|
| 579 | |
|---|
| 580 | <!-- copyField commands copy one field to another at the time a document |
|---|
| 581 | is added to the index. It's used either to index the same field differently, |
|---|
| 582 | or to add multiple fields to the same field for easier/faster searching. --> |
|---|
| 583 | |
|---|
| 584 | <copyField source="*" dest="body_rev" /> |
|---|
| 585 | |
|---|
| 586 | <!-- Similarity is the scoring routine for each document vs. a query. |
|---|
| 587 | A custom similarity may be specified here, but the default is fine |
|---|
| 588 | for most applications. --> |
|---|
| 589 | <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> --> |
|---|
| 590 | <!-- ... OR ... |
|---|
| 591 | Specify a SimilarityFactory class name implementation |
|---|
| 592 | allowing parameters to be used. |
|---|
| 593 | --> |
|---|
| 594 | <!-- |
|---|
| 595 | <similarity class="com.example.solr.CustomSimilarityFactory"> |
|---|
| 596 | <str name="paramkey">param value</str> |
|---|
| 597 | </similarity> |
|---|
| 598 | --> |
|---|
| 599 | |
|---|
| 600 | |
|---|
| 601 | </schema> |
|---|