直接上代码:
1 public class UserIndexService { 2 3 private final Log logger = LogFactory.getLog(UserIndexService. class); 4 private final String dirPath = "d:/temp/user"; 5 6 Analyzer analyzer = new IKAnalyzer(); 7 Directory directory = null; 8 IndexWriter writer = null; 9 IndexSearcher indexSearcher = null; 10 11 private void confirmDirs() { 12 File indexFile = new File(dirPath); 13 if (!indexFile.exists()) { 14 indexFile.mkdirs(); 15 } 16 if (!indexFile.exists() || !indexFile.canWrite()) { 17 if (logger.isDebugEnabled()) 18 logger.error("索引文件目录创建失败或不可写入!"); 19 } 20 } 21 22 public void init() { 23 confirmDirs(); 24 try { 25 File f = new File(dirPath); 26 directory = FSDirectory.open(f); 27 28 } catch (Exception e) { 29 if (logger.isDebugEnabled()) { 30 logger.error("解除索引文件锁定失败!" + e.getCause()); 31 } 32 } 33 } 34 35 public void createIndex(List<User> userList) { 36 init(); 37 try { 38 39 // 第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中), 40 // 第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度 41 writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED); 42 writer.setMergeFactor(500); 43 writer.setMaxBufferedDocs(155); 44 writer.setMaxFieldLength(Integer.MAX_VALUE); 45 writeIndex(writer, userList); 46 writer.optimize(); 47 writer.close(); 48 } catch (IOException e) { 49 // TODO Auto-generated catch block 50 e.printStackTrace(); 51 } 52 } 53 54 public List<User> search(String keyword) { 55 56 File indexFile = new File(dirPath); 57 if (!indexFile.exists()) { 58 return null; 59 } 60 Directory dir; 61 try { 62 dir = FSDirectory.open(indexFile); 63 indexSearcher = new IndexSearcher(dir); 64 indexSearcher.setSimilarity( new IKSimilarity()); 65 // 单字段查询,单条件查询 66 // Query query = IKQueryParser.parse("userInfo", keyword); 67 68 // 多字段,单条件查询 69 String[] fields = new String[] { "userInfo", "parameter1" }; 70 Query query = IKQueryParser.parseMultiField(fields, keyword); 71 72 // 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And) 73 // BooleanClause.Occur[]数组,它表示多个条件之间的关系, 74 // BooleanClause.Occur.MUST表示 and, 75 // BooleanClause.Occur.MUST_NOT表示not, 76 // BooleanClause.Occur.SHOULD表示or. 77 // String[] fields =new String[]{"userInfo","parameter1"}; 78 // BooleanClause.Occur[] flags=new 79 // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; 80 // Query query = IKQueryParser.parseMultiField(fields, 81 // keyword,flags); 82 83 // // 多Field,多条件查询分析 84 // String[] fields =new String[]{"userInfo","parameter1"}; 85 // String[] queries = new String[]{keyword,keyword}; 86 // Query query = IKQueryParser.parseMultiField(fields,queries); 87 88 // 多Field,多条件,多Occur 查询 89 // String[] fields =new String[]{"userInfo","parameter1"}; 90 // String[] queries = new String[]{keyword,keyword}; 91 // BooleanClause.Occur[] flags=new 92 // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; 93 // Query query = 94 // IKQueryParser.parseMultiField(fields,queries,flags); 95 96 // 搜索相似度最高的20条记录 97 TopDocs topDocs = indexSearcher.search(query, 20); 98 ScoreDoc[] hits = topDocs.scoreDocs; 99 return hitsToQuery(hits, query); 100 101 } catch (IOException e) { 102 // TODO Auto-generated catch block 103 e.printStackTrace(); 104 } 105 106 return null; 107 } 108 109 private List<User> hitsToQuery(ScoreDoc[] hits, Query query) { 110 List<User> list = new ArrayList<User>(); 111 try { 112 for ( int i = 0; i < hits.length; i++) { 113 User u = new User(); 114 Document doc = indexSearcher.doc(hits[i].doc); 115 u.setUserId(Integer.parseInt(doc.get("userId"))); 116 u.setUserName(doc.get("userName")); 117 u.setUserAge(Integer.parseInt(doc.get("userAge"))); 118 // 高亮设置 119 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( 120 "<font color=\"red\">", "</font>"); 121 Highlighter highlighter = new Highlighter(simpleHtmlFormatter, 122 new QueryScorer(query)); 123 TokenStream tokenStream = analyzer.tokenStream("text", 124 new StringReader(doc.get("userInfo"))); 125 String userInfo = highlighter.getBestFragment(tokenStream, doc 126 .get("userInfo")); 127 if (userInfo != null) { 128 u.setUserInfo(userInfo); 129 } else { 130 u.setUserInfo(doc.get("userInfo")); 131 } 132 133 SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter( 134 "<font color=\"red\">", "</font>"); 135 Highlighter highlighter1 = new Highlighter( 136 simpleHtmlFormatter1, new QueryScorer(query)); 137 TokenStream tokenStream1 = analyzer.tokenStream("text1", 138 new StringReader(doc.get("parameter1"))); 139 String p1 = highlighter1.getBestFragment(tokenStream1, doc 140 .get("parameter1")); 141 if (p1 != null) { 142 u.setParameter1(p1); 143 } else { 144 u.setParameter1(doc.get("parameter1")); 145 } 146 147 u.setParameter2(doc.get("parameter2")); 148 u.setParameter3(doc.get("parameter3")); 149 u.setParameter4(doc.get("parameter4")); 150 list.add(u); 151 } 152 153 indexSearcher.close(); 154 return list; 155 } catch (CorruptIndexException e) { 156 // TODO Auto-generated catch block 157 e.printStackTrace(); 158 } catch (IOException e) { 159 // TODO Auto-generated catch block 160 e.printStackTrace(); 161 } catch (InvalidTokenOffsetsException e) { 162 // TODO Auto-generated catch block 163 e.printStackTrace(); 164 } 165 return null; 166 } 167 168 public void writeIndex(IndexWriter writer, List<User> userList) { 169 170 try { 171 for (User u : userList) { 172 Document doc = getDoc(u); 173 writer.addDocument(doc); 174 } 175 } catch (IOException e) { 176 // TODO Auto-generated catch block 177 e.printStackTrace(); 178 } 179 180 } 181 182 private Document getDoc(User user) { 183 System.out.println("用户ID 为" + user.getUserId() + " 索引被创建"); 184 Document doc = new Document(); 185 addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED); 186 addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED); // Index.NOT_ANALYZED 187 // 不分词,但建立索引 188 addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED); // Index.ANALYZED 189 // 分词并且建立索引 190 addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED); 191 addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED); 192 addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED); 193 addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED); 194 addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED); 195 return doc; 196 } 197 198 private void addField2Doc(Document doc, Object bean, String name, Store s, 199 Index i) { 200 String value; 201 try { 202 value = BeanUtils.getProperty(bean, name); 203 if (value != null) { 204 doc.add( new Field(name, value, s, i, 205 Field.TermVector.WITH_POSITIONS_OFFSETS)); 206 } 207 } catch (IllegalAccessException e) { 208 logger.error("get bean property error", e); 209 } catch (InvocationTargetException e) { 210 logger.error("get bean property error", e); 211 } catch (NoSuchMethodException e) { 212 logger.error("get bean property error", e); 213 }
214 }
下面继续:
1 /** 2 * 没有排序,有高亮,有分页 3 * 4 * @param pageNo 5 * @param pageSize 6 * @param keyword 7 * @return 8 */ 9 public PageBean getPageQuery( int pageNo, int pageSize, String keyword) { 10 List result = new ArrayList(); 11 File indexFile = new File(dirPath); 12 if (!indexFile.exists()) { 13 return null; 14 } 15 Directory dir; 16 try { 17 dir = FSDirectory.open(indexFile); 18 indexSearcher = new IndexSearcher(dir); 19 indexSearcher.setSimilarity( new IKSimilarity()); 20 21 String[] fields = new String[] { "userInfo", "parameter1" }; 22 BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 23 BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; 24 Query query = IKQueryParser.parseMultiField(fields, keyword, flags); 25 26 TopScoreDocCollector topCollector = TopScoreDocCollector.create( 27 indexSearcher.maxDoc(), true); 28 indexSearcher.search(query, topCollector); 29 // 查询当页的记录 30 ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize, 31 pageSize).scoreDocs; 32 33 // String[] highlightCol = {"userInfo", "parameter1"}; 34 // 高亮设置 35 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( 36 "<font color=\"red\">", "</font>"); 37 Highlighter highlighter = new Highlighter(simpleHtmlFormatter, 38 new QueryScorer(query)); 39 40 for (ScoreDoc scdoc : docs) { 41 User u = new User(); 42 Document doc = indexSearcher.doc(scdoc.doc); 43 // 44 // for (Fieldable fa : doc.getFields()) { 45 // System.out.println(fa.name()); 46 // String value = doc.get(fa.name()); 47 // for (String col : highlightCol) { 48 // if(fa.name().equals(col)) { 49 // // 设置高显内容 50 // TokenStream tokenStream = analyzer.tokenStream("text",new 51 // StringReader(value)); 52 // value = highlighter.getBestFragment(tokenStream, value); 53 // } 54 // } 55 // 56 // } 57 58 u.setUserId(Integer.parseInt(doc.get("userId"))); 59 u.setUserName(doc.get("userName")); 60 u.setUserAge(Integer.parseInt(doc.get("userAge"))); 61 62 TokenStream tokenStream = analyzer.tokenStream("text", 63 new StringReader(doc.get("userInfo"))); 64 String userInfo = highlighter.getBestFragment(tokenStream, doc 65 .get("userInfo")); 66 if (userInfo != null) { 67 u.setUserInfo(userInfo); 68 } else { 69 u.setUserInfo(doc.get("userInfo")); 70 } 71 72 TokenStream tokenStream1 = analyzer.tokenStream("text1", 73 new StringReader(doc.get("parameter1"))); 74 String p1 = highlighter.getBestFragment(tokenStream1, doc 75 .get("parameter1")); 76 if (p1 != null) { 77 u.setParameter1(p1); 78 } else { 79 u.setParameter1(doc.get("parameter1")); 80 } 81 82 u.setParameter2(doc.get("parameter2")); 83 u.setParameter3(doc.get("parameter3")); 84 u.setParameter4(doc.get("parameter4")); 85 result.add(u); 86 87 } 88 PageBean pb = new PageBean(); 89 pb.setCurrentPage(pageNo); // 当前页 90 pb.setPageSize(pageSize); 91 pb.setAllRow(topCollector.getTotalHits()); // hit中的记录数目 92 pb.setList(result); 93 return pb; 94 95 } catch (IOException e) { 96 // TODO Auto-generated catch block 97 e.printStackTrace(); 98 } catch (InvalidTokenOffsetsException e) { 99 // TODO Auto-generated catch block 100 e.printStackTrace(); 101 } 102 103 return null;
104 }
再来:
1 /** 2 * 排序,有高亮,有分页 3 * 4 * @param pageNo 5 * @param pageSize 6 * @param keyword 7 * @return 8 */ 9 public PageBean getPageQuery2( int pageNo, int pageSize, String keyword) { 10 List result = new ArrayList(); 11 File indexFile = new File(dirPath); 12 if (!indexFile.exists()) { 13 return null; 14 } 15 Directory dir; 16 try { 17 dir = FSDirectory.open(indexFile); 18 indexSearcher = new IndexSearcher(dir); 19 indexSearcher.setSimilarity( new IKSimilarity()); 20 21 String[] fields = new String[] { "userInfo", "parameter1" }; 22 BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 23 BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; 24 Query query = IKQueryParser.parseMultiField(fields, keyword, flags); 25 26 // 多字段排序,设置在前面的会优先排序 27 SortField[] sortFields = new SortField[2]; 28 SortField sortField = new SortField("userId", SortField.INT, false); // false升序,true降序 29 SortField FIELD_SEX = new SortField("userAge", SortField.INT, true); 30 sortFields[0] = sortField; 31 sortFields[1] = FIELD_SEX; 32 Sort sort = new Sort(sortFields); 33 34 TopDocs topDocs = indexSearcher.search(query, null, 50, sort); 35 36 if (topDocs.totalHits != 0) { 37 // for(ScoreDoc sd : topDocs.scoreDocs) { 38 // 39 // } 40 // 高亮设置 41 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); 42 Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); 43 44 for ( int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) { 45 ScoreDoc scdoc = topDocs.scoreDocs[i]; 46 User u = new User(); 47 Document doc = indexSearcher.doc(scdoc.doc); 48 u.setUserId(Integer.parseInt(doc.get("userId"))); 49 u.setUserName(doc.get("userName")); 50 u.setUserAge(Integer.parseInt(doc.get("userAge"))); 51 TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(doc.get("userInfo"))); 52 String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo")); 53 if (userInfo != null) { 54 u.setUserInfo(userInfo); 55 } else { 56 u.setUserInfo(doc.get("userInfo")); 57 } 58 59 TokenStream tokenStream1 = analyzer.tokenStream("text1", new StringReader(doc.get("parameter1"))); 60 String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1")); 61 if (p1 != null) { 62 u.setParameter1(p1); 63 } else { 64 u.setParameter1(doc.get("parameter1")); 65 } 66 67 u.setParameter2(doc.get("parameter2")); 68 u.setParameter3(doc.get("parameter3")); 69 u.setParameter4(doc.get("parameter4")); 70 result.add(u); 71 72 } 73 PageBean pb = new PageBean(); 74 pb.setCurrentPage(pageNo); // 当前页 75 pb.setPageSize(pageSize); 76 pb.setAllRow(topDocs.totalHits); // hit中的记录数目 77 pb.setList(result); 78 return pb; 79 80 } 81 } catch (IOException e) { 82 // TODO Auto-generated catch block 83 e.printStackTrace(); 84 } catch (InvalidTokenOffsetsException e) { 85 // TODO Auto-generated catch block 86 e.printStackTrace(); 87 } 88 89 return null; 90 } 91 92 /** 93 * 删除索引 94 * @param userId 95 */ 96 public void deleIndex(String userId){ 97 98 try { 99 File f = new File(dirPath); 100 directory = FSDirectory.open(f); 101 IndexReader reader = IndexReader.open(directory, false); 102 Term term = new Term("userId", userId); 103 reader.deleteDocuments(term); 104 reader.close(); 105 } catch (IOException e) { 106 // TODO Auto-generated catch block 107 e.printStackTrace(); 108 } 109 110 111 } 112 113 } 114 115 116 高亮设置集成抽取成一个方法 117 118 public String toHighlighter(Query query,Document doc,String field){ 119 try { 120 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); 121 Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); 122 TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field))); 123 String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); 124 125 return highlighterStr == null ? doc.get(field):highlighterStr; 126 } catch (IOException e) { 127 // TODO Auto-generated catch block 128 e.printStackTrace(); 129 } catch (InvalidTokenOffsetsException e) { 130 // TODO Auto-generated catch block 131 e.printStackTrace(); 132 } 133 return null;
134 }