需求描述:为某个文件夹A下的所有后缀名为.txt的文件创建索引,索引文件存放于文件夹B下
开发环境:Lucene 3.4.0 + eclipse indigo + jdk1.7.0,配置如下
为文件创建索引的是mytest包下的indexer类,具体代码如下:
View Code
package mytest; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.FileReader; // From chapter 1 public class Indexer { public static void main(String[] args) throws Exception { String indexDir = "E://eclipse//javaProject//lucene-3.4.0//indexDir"; //1 String dataDir = "E://eclipse//javaProject//lucene-3.4.0//dataDir"; //2 long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } private IndexWriter writer; public Indexer(String indexDir) throws IOException { Directory dir = FSDirectory.open(new File(indexDir)); writer = new IndexWriter(dir, //3 new StandardAnalyzer( //3 Version.LUCENE_34),//3 true, //3 IndexWriter.MaxFieldLength.UNLIMITED); //3 } public void close() throws IOException { writer.close(); //4 } public int index(String dataDir, FileFilter filter) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f: files) { if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { indexFile(f); } } return writer.numDocs(); //5 } private static class TextFilesFilter implements FileFilter { public boolean accept(File path) { return path.getName().toLowerCase() //6 .endsWith(".txt"); //6 } } protected Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); //7 doc.add(new Field("filename", f.getName(), //8 Field.Store.YES, Field.Index.NOT_ANALYZED));//8 doc.add(new Field("fullpath", f.getCanonicalPath(), //9 Field.Store.YES, Field.Index.NOT_ANALYZED));//9 return doc; } private void indexFile(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); //10 } } /* #1 Create index in this directory #2 Index *.txt files from this directory #3 Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents indexed #6 Index .txt files only, using FileFilter #7 Index file content #8 Index file name #9 Index file full path #10 Add document to Lucene index */
程序运行结果如下:
程序运行结束后,在指定目录下生成索引文件:
推荐一个自己业余时间开发的引擎,()