System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir,indexWriterConfig());
//在这里进行索引的调试
}
public void close() throws IOException {
writer.close(); //4
}
private IndexWriterConfig indexWriterConfig()
{
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_47);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
return config;
}
public int index(String dataDir, FileFilter filter)
throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f: files) {
if (!f.isDirectory() &&
!f.isHidden() &&
f.exists() &&
f.canRead() &&
(filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs(); //5
}
private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() //6
.endsWith(".txt"); //6
}
}
/**
* 遍历每一个文件,然后读出文件中的每一行数据,当成一个document来处理
* @param f
* @throws Exception
*/
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
// Document doc = getDocument(f);
List<String> lists = readFileNoDup(f);
for(String list:lists){
Document doc = new Document();
doc.add(new Field("contents",list,TextField.TYPE_STORED));
writer.addDocument(doc);
}
//10
}
//读取一个文件
private List<String> readFile(File filePathAndName)throws IOException {
FileInputStream fis = new FileInputStream(filePathAndName);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
LineNumberReader lnr = new LineNumberReader(br);
List<String> returnValue = new ArrayList<String>();
int cnt = 0;
while (true) {
cnt++;
String tempStr = lnr.readLine();
if (tempStr == null)
break;
if (tempStr.length() < 2)
continue;
returnValue.add(tempStr);
}
lnr.close();
br.close();
isr.close();
fis.close();
return returnValue;
}
//读取一个文件并排重后返回
public static List<String> readFileNoDup(File filePathAndName)
throws IOException {
FileInputStream fis = new FileInputStream(filePathAndName);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
LineNumberReader lnr = new LineNumberReader(br);
Set<String> set = new HashSet<String>();
while (true) {
String tempStr = lnr.readLine();
if (tempStr == null)
break;
if (tempStr.length() < 2)
continue;
set.add(tempStr.trim());
}
lnr.close();
br.close();
isr.close();
fis.close();
List<String> returnValue = new ArrayList<String>(set.size());
returnValue.addAll(set);
return returnValue;
}
}
//对刚才已经建好的索引进行搜索
package lucene.home.clq;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;