免费注册 查看新帖 |

Chinaunix

  平台 论坛 博客 文库
论坛 程序设计 Java lucene
最近访问板块 发新帖
查看: 1790 | 回复: 0
打印 上一主题 下一主题

lucene [复制链接]

论坛徽章:
0
跳转到指定楼层
1 [收藏(0)] [报告]
发表于 2011-09-06 10:02 |只看该作者 |倒序浏览
  1. package com.lucene.search; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.RAMDirectory; public class TermVectorTest { Analyzer analyzer = new StandardAnalyzer(); RAMDirectory directory = new RAMDirectory(); public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(directory, analyzer, true); Document doc1 = new Document(); doc1.add(new Field("title", "java", Store.YES, Index.TOKENIZED)); doc1.add(new Field("author", "John", Store.YES, Index.TOKENIZED)); doc1.add(new Field("subject", "java一门编程语言", Store.YES, Index.TOKENIZED, TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc1); Document doc2 = new Document(); doc2.add(new Field("title", "english", Store.YES, Index.TOKENIZED)); doc2.add(new Field("author", "Lucy", Store.YES, Index.TOKENIZED)); doc2.add(new Field("subject", "英语用的人很多", Store.YES, Index.TOKENIZED, TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc2); Document doc3 = new Document(); doc3.add(new Field("title", "asp", Store.YES, Index.TOKENIZED)); doc3.add(new Field("author", "Lily", Store.YES, Index.TOKENIZED)); doc3.add(new Field("subject", "asp很多人用", Store.YES, Index.TOKENIZED, TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc3); indexWriter.optimize(); indexWriter.close(); } public void searcher() throws IOException { IndexSearcher searcher = new IndexSearcher(directory); // 搜索书名为java的索引 TermQuery query = new TermQuery(new Term("title", "java")); Hits hits = searcher.search(query); // 能找到一条记录 for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); System.out.println("书名:" + doc.get("title") + " 作者: " + doc.get("author") + " 简介: " + doc.get("subject")); System.out.println("相关的书:"); docsLike(hits.id(i)); } } // 在subject中模糊搜索与doc相进的索引 public void docsLike(int id) throws IOException { IndexReader reader = IndexReader.open(directory); TermFreqVector vector = reader.getTermFreqVector(id, "subject"); BooleanQuery query = new BooleanQuery(); for (int j = 0; j < vector.size(); j++) { TermQuery tq = new TermQuery(new Term("subject", vector.getTerms()[j])); query.add(tq, BooleanClause.Occur.SHOULD); } IndexSearcher searcher = new IndexSearcher(directory); Hits hits = searcher.search(query); printResult(hits); } // 显示结果 public void printResult(Hits hits) throws IOException { for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); System.out.println("书名:" + doc.get("title") + " 作者: " + doc.get("author") + " 简介: " + doc.get("subject")); } } public static void main(String[] args) throws IOException { TermVectorTest test = new TermVectorTest(); test.index(); test.searcher(); } }
复制代码
您需要登录后才可以回帖 登录 | 注册

本版积分规则 发表回复

  

北京盛拓优讯信息技术有限公司. 版权所有 京ICP备16024965号-6 北京市公安局海淀分局网监中心备案编号:11010802020122 niuxiaotong@pcpop.com 17352615567
未成年举报专区
中国互联网协会会员  联系我们:huangweiwei@itpub.net
感谢所有关心和支持过ChinaUnix的朋友们 转载本站内容请注明原作者名及出处

清除 Cookies - ChinaUnix - Archiver - WAP - TOP