以前做的都是一些应用系统,这次还是第一次接触搜索引擎开发的问题,这个搜索引擎我用的是比较通用的Lucence实现,自己也不是多懂,反正是马马虎虎弄出来了,欢迎高手丢砖。上代码:
1.加lucence核心jar包(我这里用的是Maven管理项目的,所以截图吧,lucence用的是2.4.0)
2.写lucence工具类:
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery;public class Digest {
private static String indexPath = null; /** * 构造lucence的document集合,将数据以document对象写入lucence 索引库目录 * * @param rs * @return * @throws SQLException */ public static void goodsListToLucence(List<Goods> goodslist,//这个goodslist是从数据库里查询出来的Model的列表,也就是要放入索引库的数据(可以随便弄) ServletContext context) { indexPath = context.getRealPath("") + "/lucence/"; /* lucence 索引库目录 */ File fileDir = new File(indexPath); if (!fileDir.exists()) { fileDir.mkdirs(); }List<Document> list = new ArrayList<Document>();
Document doc = null; //将数据便利放入Document中,并创建List<Document> for (Goods goods : goodslist) { doc = new Document(); doc.add(new Field("id", goods.getId() + "", Store.COMPRESS, Index.ANALYZED)); doc.add(new Field("code", goods.getCode(), Store.COMPRESS, Index.ANALYZED)); doc.add(new Field("goodstitle", goods.getGoodstitle(), Store.COMPRESS, Index.ANALYZED)); list.add(doc); }/**
*上面的都是在构造Document的list数据,因为Lucence好像只会去解析Document *下面的才是真正的重头戏把数据写入lucence 索引库目录 * */ IndexWriter indexWriter; try { indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); for (Document lucendoc : list) { indexWriter.addDocument(lucendoc); } // optimize()方法是对索引进行优化 indexWriter.optimize(); indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } /** * 测试 写入lucence和从lucence查询含有关键词的数据,并将关键词高亮显示 * (这里搜索的是goods表中的goodstitle和code,也就是拿着关键词到这两个字段里的数据去找,其中goodstitle要求对其关键字全模糊并且关键字高亮(这里高亮我没有用那个什么highar插件,自己随便写的字符串截取的方式实现的)) * 这里List<goods>就胡乱写些数据 * */ public static void main(String args[]) { /* *测试把数据写入lucence索引库目录 * */ List<Goods> goodsList = new ArrayList<Goods>(); Goods goods = new Goods(); goods.setId(1);goods.setGoodstitle("sdsdsdsddsdsds");goods.setCode("1233code"); goodsList.add(goods); goodsListToLucence(goodsList,ServletContext context);//第二个数据是lucence索引的目录路径(写入完成) /* *测试把数据从lucence索引库目录里根据关键字拿出来 * */ String[] fields={"code", "goodstitle"};//要参与关键字查询的字段 String querie="关键字"; BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD}; Query query; try { Query query = MultiFieldQueryParser.parse(querie, fields, clauses, new StandardAnalyzer());//多字段查询 Query mquery = new WildcardQuery(new Term("goodstitle", "*"+querie+"*"));//需要进行模糊查询的字段 BooleanQuery bquery = new BooleanQuery();//构造多重符合查询方式的query bquery.add(query, BooleanClause.Occur.SHOULD);//SHOULD或的意思 bquery.add(mquery, BooleanClause.Occur.SHOULD); Searcher searcher = new IndexSearcher(indexPath); Filter filter = null; TopDocs topDocs = searcher.search(query, filter, 10);Goods goods2 = null;
// 打印结果 for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); // 根据编号取出相应的文档 goods2 = new Goods(); goods2.setId(Long.parseLong(document.get("id"))); if(document.get("goodsId") !=null && !document.get("goodsId").equals("")) { goods2.setGoodsId(Long.parseLong(document.get("goodsId"))); } if(document.get("code") !=null && !document.get("code").equals("")) { goods2.setCode(document.get("code")); } if(document.get("goodstitle") !=null && !document.get("goodstitle").equals("")) { goods2.setGoodstitle(document.get("goodstitle").replaceAll(querie,"<font color='red'>"+ querie+ "</font>"));//高亮显示,我不想用那网上的,太复杂 } System.out.println(goods2);//查看从lucence索引目录中获得的数据 } } catch (Exception e) { e.printStackTrace(); } }