Lucene3.6第一篇--创建索引
lucene 3.6 source and Binary code download
import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; public class Indexer { public static void main(String[] args) throws Exception { // args=new String[2]; // args[0]="D:/workspace/LuceneAction/lib/IndexDir"; // args[1]="D:/workspace/HaiduShare/src/com/weishangye/share"; if (args.length != 2) { throw new Exception("Usege Java: " + Indexer.class.getName() + "<indexDir> <dataDir>"); } //索引文件存放路径 Directory indexDir = new SimpleFSDirectory(new File(args[0])); //原始数据文件 File dataDir = new File(args[1]); long start = System.currentTimeMillis(); //遍历文件夹创建索引文件 int numIndexed = Index(indexDir, dataDir); long end = System.currentTimeMillis(); System.out.println("Index " + numIndexed + "files took " + (end - start) + " milliseconds"); } public static int Index(Directory indexDir, File dataDir) throws IOException { if (!dataDir.exists() || !dataDir.isDirectory()) { throw new IOException(dataDir+"is not exist or a directory"); } IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); IndexWriter writer=new IndexWriter(indexDir,config); //是否合并索引文件(过时) //writer.setUseCompoundFile(false); IndexDirectory(writer,dataDir); int numIndexed=writer.numDocs(); //优化索引(过时) //writer.optimize(); writer.close(); return numIndexed; } private static void IndexDirectory(IndexWriter writer,File dir) throws IOException{ File[] files =dir.listFiles(); for (int i = 0; i < files.length; i++) { File f=files[i]; if (f.isDirectory()) { IndexDirectory(writer, f); } else { IndexFile(writer,f); } } } private static void IndexFile(IndexWriter writer,File f) throws IOException { if (f.isHidden()||!f.exists()||!f.canRead()) { return; } System.out.println("Indexing "+f.getCanonicalPath()); Document doc=new Document(); doc.add(new Field("contents", new FileReader(f))); doc.add(new Field("filename",new FileReader(f.getCanonicalPath()))); writer.addDocument(doc); } }
package com.meetlucene; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; public class Searcher { public static final String FIELD_FILENAME = "filename"; public static final String FIELD_CONTENTS = "contents"; public static void main(String[] args) throws Exception { args=new String[2]; args[0]="E:/workspace/LuceneAction/lib/IndexDir"; args[1]="Solr"; if (args.length!=2) { throw new Exception("Usage:Java "+Searcher.class.getName() +" <indexDir><query>"); } //Indexer所创建的索引目录 File indexDir=new File(args[0]); //Directory indexDir=new SimpleFSDirectory(); String q=args[1]; if (!indexDir.exists()||!indexDir.isDirectory()) { throw new Exception(indexDir+ " dose not exist or is not a directory"); } search(indexDir,q); } public static void search(File indexDir,String q) throws Exception{ Directory faDir=new SimpleFSDirectory(indexDir); IndexReader indexReader=IndexReader.open(faDir); IndexSearcher indexSearcher=new IndexSearcher(indexReader); Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36); QueryParser queryParser =new QueryParser(Version.LUCENE_36, FIELD_CONTENTS, analyzer); Query query=queryParser.parse(q); long start =System.currentTimeMillis(); //@deprecated Use {@link TopScoreDocCollector} and {@link TopDocs} instead. Hits will be removed in Lucene 3.0. //org.apache.lucene.search.Hit TopDocs topDocs=indexSearcher.search(query, 10); long end =System.currentTimeMillis(); System.out.println("Found " + topDocs.totalHits + " document(s)(in " + (end - start) + " milliseconds) that matched query \'" + q + "\':"); for (ScoreDoc sd : topDocs.scoreDocs) { Document doc=indexSearcher.doc(sd.doc); System.out.println(sd.doc+"-->"+doc.get(FIELD_FILENAME)+"-->"+doc.get(FIELD_CONTENTS)); } } }
版权声明:本文为a282421083原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。