Lucene 中获取没有存储的字段值的几种方法

一般来说,如果想要从Lucene索引中获取Field的值,那么需要在索引阶段设置Field.Store.YES才可以,然后在搜索阶段得到TopDocs对象之后,用它去获取ScoreDoc再取出Document,使用Document获取存储在索引中的值。但是我们都知道,存储字段是需要硬盘空间的,如果想要追求极致的存储空间并且获取Field的值,那么在不存储的情况下,如何获取呢?其实仔细思索一下,在我们只索引不存储的情况下,Lucene依然可以判断搜索是否命中,这说明在Lucene索引中依然存有一份Field的值,这样在搜索阶段才能判断是否匹配。本文就是探讨在这种情形下,使用Lucene的核心包获取没有存储的Field的值的几种方法,如果你还有其它不同的方法请留言。

  • testGetFieldByStore 演示存储Field值时如何获取
  • testGetFieldByTerms 演示通过Terms获取没有存储的Field值
  • testGetFieldByFieldDocWithSorted 演示通过FieldDoc获取没有存储的值
  • testGetFieldByTermVector 演示通过TermVector获取没有存储的值
  • testGetFieldByTermVectors 演示通过TermVectors获取没有存储的值

这里补充一下,在lucene-suggest包中,有LuceneDictionary类,通过该类的getEntryIterator方法也能获取没有存储的Field的值,不过其本质和通过Terms获取方式一样,在此不再列举。源码示例如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.junit.Test;
import java.io.IOException;
import static org.apache.lucene.search.SortField.Type.STRING;
/**
* <p>
* Created by wangxu on 2017/10/10 17:33.
* </p>
* <p>
* Description: Lucene 6.5.0
* </p>
*
* @author Wang Xu
* @version V1.0.0
* @since V1.0.0 <br/>
* WebSite: http://codepub.cn <br>
* Licence: Apache v2 License
*/
public class GetNonStoredFieldDemo {
private RAMDirectory ramDirectory = new RAMDirectory();
private IndexWriter indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(new WhitespaceAnalyzer()));
public GetNonStoredFieldDemo() throws IOException {
}
@Test
public void testGetFieldByStore() throws IOException {
initIndexForStore();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int count = indexSearcher.count(new MatchAllDocsQuery());
TopDocs search = indexSearcher.search(new MatchAllDocsQuery(), count);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
Document doc = indexSearcher.doc(scoreDoc.doc);
System.out.println(doc.get("IDX") + "=>" + doc.get("title"));
}
ramDirectory.close();
}
@Test
public void testGetFieldByTerms() throws IOException {
initIndexForTerms();
Fields fields = MultiFields.getFields(DirectoryReader.open(ramDirectory));
Terms idx = fields.terms("IDX");
Terms title = fields.terms("title");
//or you can use like this
//TermsEnum idxIter = MultiFields.getTerms(DirectoryReader.open(ramDirectory), "IDX").iterator();
TermsEnum idxIter = idx.iterator();
TermsEnum titleIter = title.iterator();
BytesRef bytesRef;
while ((bytesRef = idxIter.next()) != null) {
System.out.println(bytesRef.utf8ToString() + "=>" + titleIter.next().utf8ToString());
}
ramDirectory.close();
}
@Test
public void testGetFieldByFieldDocWithSorted() throws IOException {
initIndexForFieldDocWithSorted();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int count = indexSearcher.count(new MatchAllDocsQuery());
//must use method which returns TopFieldDocs
TopFieldDocs search = indexSearcher.search(new MatchAllDocsQuery(), count, new Sort(new SortField("IDX", STRING)));
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
Object[] fields = fieldDoc.fields;
if (fields[0] instanceof BytesRef) {
BytesRef temp = (BytesRef) fields[0];
System.out.println(temp.utf8ToString() + "=>" + indexSearcher.doc(scoreDoc.doc).get("title"));
}
}
ramDirectory.close();
}
@Test
public void testGetFieldByTermVector() throws IOException {
initIndexForTermVector();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int count = indexSearcher.count(new MatchAllDocsQuery());
TopDocs search = indexSearcher.search(new MatchAllDocsQuery(), count);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;
Terms idx = indexSearcher.getIndexReader().getTermVector(doc, "IDX");
TermsEnum iterator = idx.iterator();
BytesRef bytesRef;
while ((bytesRef = iterator.next()) != null) {
System.out.println(bytesRef.utf8ToString() + "=>" + indexSearcher.doc(doc).get("title"));
}
}
ramDirectory.close();
}
@Test
public void testGetFieldByTermVectors() throws IOException {
initIndexForTermVector();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int count = indexSearcher.count(new MatchAllDocsQuery());
TopDocs search = indexSearcher.search(new MatchAllDocsQuery(), count);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;
Fields termVectors = indexSearcher.getIndexReader().getTermVectors(doc);
Terms idx = termVectors.terms("IDX");
TermsEnum iterator = idx.iterator();
BytesRef bytesRef;
while ((bytesRef = iterator.next()) != null) {
System.out.println(bytesRef.utf8ToString() + "=>" + indexSearcher.doc(doc).get("title"));
}
}
ramDirectory.close();
}
private void initIndexForStore() throws IOException {
Document document = new Document();
document.add(new StringField("IDX", "TEST01", Field.Store.YES));
document.add(new StringField("title", "TITLE01", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new StringField("IDX", "TEST02", Field.Store.YES));
document.add(new StringField("title", "TITLE02", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new StringField("IDX", "TEST03", Field.Store.YES));
document.add(new StringField("title", "TITLE03", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new StringField("IDX", "TEST04", Field.Store.YES));
document.add(new StringField("title", "TITLE04", Field.Store.YES));
indexWriter.addDocument(document);
indexWriter.close();
}
private void initIndexForTerms() throws IOException {
Document document = new Document();
document.add(new StringField("IDX", "TEST01", Field.Store.NO));
document.add(new StringField("title", "TITLE01", Field.Store.NO));
indexWriter.addDocument(document);
document = new Document();
document.add(new StringField("IDX", "TEST02", Field.Store.NO));
document.add(new StringField("title", "TITLE02", Field.Store.NO));
indexWriter.addDocument(document);
document = new Document();
document.add(new StringField("IDX", "TEST03", Field.Store.NO));
document.add(new StringField("title", "TITLE03", Field.Store.NO));
indexWriter.addDocument(document);
document = new Document();
document.add(new StringField("IDX", "TEST04", Field.Store.NO));
document.add(new StringField("title", "TITLE04", Field.Store.NO));
indexWriter.addDocument(document);
indexWriter.close();
}
private void initIndexForTermVector() throws IOException {
FieldType fieldType = new FieldType();
fieldType.setStoreTermVectors(true);
fieldType.setIndexOptions(IndexOptions.DOCS);
Document document = new Document();
document.add(new Field("IDX", "TEST01", fieldType));
document.add(new StringField("title", "TITLE01", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field("IDX", "TEST02", fieldType));
document.add(new StringField("title", "TITLE02", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field("IDX", "TEST03", fieldType));
document.add(new StringField("title", "TITLE03", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field("IDX", "TEST04", fieldType));
document.add(new StringField("title", "TITLE04", Field.Store.YES));
indexWriter.addDocument(document);
indexWriter.close();
}
private void initIndexForFieldDocWithSorted() throws IOException {
Document document = new Document();
document.add(new SortedDocValuesField("IDX", new BytesRef("TEST01")));
document.add(new StringField("title", "TITLE01", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedDocValuesField("IDX", new BytesRef("TEST02")));
document.add(new StringField("title", "TITLE02", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedDocValuesField("IDX", new BytesRef("TEST03")));
document.add(new StringField("title", "TITLE03", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedDocValuesField("IDX", new BytesRef("TEST04")));
document.add(new StringField("title", "TITLE04", Field.Store.YES));
indexWriter.addDocument(document);
indexWriter.close();
}
}

坚持原创技术分享,您的支持将鼓励我继续创作!