搜索引擎之全文搜索算法功能实现(基于Lucene)

之前做去转盘网的时候,我已经公开了非全文搜索的代码,需要的朋友希望能够前去阅读我的博客。本文主要讨论如何进行全文搜索,由于本人花了很长时间设计了新作:观点,观点对全文搜索的要求还是很高的,所以我又花了不少时间研究全文搜索,你可以先体验下:点我搜索。废话也不多说了,直接上代码:

public Map<String,Object> articleSearchAlgorithms(SearchCondition condition,IndexSearcher searcher) throws ParseException, IOException{ Map<String,Object> map =new HashMap<String,Object>(); String[] filedsList=condition.getFiledsList(); String keyWord=condition.getKeyWord(); int currentPage=condition.getCurrentPage(); int pageSize=condition.getPageSize(); String sortField=condition.getSortField(); boolean isASC=condition.isDESC(); String sDate=condition.getsDate(); String eDate=condition.geteDate(); String classify=condition.getClassify(); //过滤终结字符 keyWord=escapeExprSpecialWord(keyWord); BooleanQuery q1 = new BooleanQuery(); BooleanQuery q2 = new BooleanQuery(); BooleanQuery booleanQuery = new BooleanQuery(); //boolean查询 if(classify!=null&&(classify.equals("guanzhi")||classify.equals("opinion")||classify.equals("write"))){ String typeId="1";//默认言论 if(classify.equals("guanzhi")){ typeId="2"; } if(classify.equals("opinion")){ typeId="3"; } Query termQuery = new TermQuery(new Term("typeId",typeId)); q1.add(termQuery,BooleanClause.Occur.MUST); } if(sDate!=null&&eDate!=null){//是否范围查询由这两个参数决定 Query rangeQuery = new TermRangeQuery("writingTime", new BytesRef(sDate), new BytesRef(eDate),true, true); q1.add(rangeQuery,BooleanClause.Occur.MUST); } Sort sort = new Sort(); // 排序 sort.setSort(SortField.FIELD_SCORE); if(sortField!=null){ sort.setSort(new SortField(sortField, SortField.Type.STRING, isASC)); } int start = (currentPage - 1) * pageSize; int hm = start + pageSize; TopFieldCollector res = TopFieldCollector.create(sort,hm,false, false, false, false); //完全匹配查询 Term t0=new Term(filedsList[1],keyWord); TermQuery termQuery = new TermQuery(t0);//两种高度匹配的查询 q2.add(termQuery,BooleanClause.Occur.SHOULD); //前缀匹配 Term t1=new Term(filedsList[1],keyWord); PrefixQuery prefixQuery=new PrefixQuery(t1); q2.add(prefixQuery,BooleanClause.Occur.SHOULD); //短语,相似度匹配,适用于分词的内容 for(int i=0;i<filedsList.length;i++){ //多字段term查询算法 if(i!=1){ PhraseQuery phraseQuery=new PhraseQuery(); Term ts0=new Term(filedsList[i],keyWord); phraseQuery.add(ts0); FuzzyQuery fQuery=new FuzzyQuery(new Term(filedsList[i],keyWord),2);//最后相似度查询 q2.add(phraseQuery,BooleanClause.Occur.SHOULD); q2.add(fQuery,BooleanClause.Occur.SHOULD);//后缀相似的拿出来 } } MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_47,filedsList,analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.parse(keyWord); q2.add(query,BooleanClause.Occur.SHOULD); //必须加逻辑判断,否则结果是不同的 if(q1!=null && q1.toString().length()>0){ booleanQuery.add(q1,BooleanClause.Occur.MUST); } if(q2!=null && q2.toString().length()>0){ booleanQuery.add(q2,BooleanClause.Occur.MUST); } searcher.search(booleanQuery, res); long amount = res.getTotalHits(); TopDocs tds = res.topDocs(start, pageSize); map.put("amount",amount); map.put("tds",tds); map.put("query",booleanQuery); return map; }

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zyzwzj.html