lucene全文检索功能示例代码

2026-04-23 06:26:15

在一次使用百度编辑器插件完成公司交给的开发任务中，我们需要把录好的文章信息通过高级搜索关键字快速检索文章标题和文章正文。之前是把文章标题和文章正文揉在一起放在MySQL数据库中通过模糊查询检索，发现太慢，后来领导要求优化，偶然发现lucene挺好用的，在此记录以备不时之需，也欢迎借鉴！

工具/原料

java+spring+springmvc+mybatis+maven库+MySQL数据库

方法/步骤

最开始需要我们在本地创建两个文件夹测试，一个是源文件夹，该文件夹下是需要检索的txt格式的文章，另一个文件夹是目标文件夹（最初为空），通过检索后在该文件夹下生成特定的索引。

下面是具体代码时刻：

创建一个Lucene类把下面所有代码放在类里面就行，妥妥的！

private static IndexManager indexManager;

private static String content="";

private static String INDEX_DIR = "D:\\luceneIndex";

private static String DATA_DIR = "D:\\luceneData";

private static Analyzer analyzer = null;

private static Directory directory = null;

private static IndexWriter indexWriter = null;

/**

* 创建索引管理器

* @return 返回索引管理器对象

public IndexManager getManager(){

if(indexManager == null){

this.indexManager = new IndexManager();

}

return indexManager;

}

/**

* 创建当前文件目录的索引

* @param path 当前文件目录

* @return 是否成功

public static boolean createIndex(String path){

Date date1 = new Date();

List<File> fileList = getFileList(path);

for (File file : fileList) {

content = "";

//获取文件后缀

String type = file.getName().substring(file.getName().lastIndexOf(".")+1);

if("txt".equalsIgnoreCase(type)){

content += txt2String(file);

}else if("doc".equalsIgnoreCase(type)){

content += doc2String(file);

}else if("xls".equalsIgnoreCase(type)){

content += xls2String(file);

}

System.out.println("name :"+file.getName());

System.out.println("path :"+file.getPath());

// System.out.println("content :"+content);

System.out.println();

try{

analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

directory = FSDirectory.open(new File(INDEX_DIR));

File indexFile = new File(INDEX_DIR);

if (!indexFile.exists()) {

indexFile.mkdirs();

}

IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);

indexWriter = new IndexWriter(directory, config);

Document document = new Document();

document.add(new TextField("filename", file.getName(), Store.YES));

document.add(new TextField("content", content, Store.YES));

document.add(new TextField("path", file.getPath(), Store.YES));

indexWriter.addDocument(document);

indexWriter.commit();

closeWriter();

}catch(Exception e){

e.printStackTrace();

}

content = "";

}

Date date2 = new Date();

System.out.println("创建索引-----耗时：" + (date2.getTime() - date1.getTime()) + "ms\n");

return true;

}

/**

* 读取txt文件的内容

* @param file 想要读取的文件对象

* @return 返回文件内容

public static String txt2String(File file){

String result = "";

try{

BufferedReader br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件

String s = null;

while((s = br.readLine())!=null){//使用readLine方法，一次读一行

result = result + "\n" +s;

}

br.close();

}catch(Exception e){

e.printStackTrace();

}

return result;

}

/**

* 读取doc文件内容

* @param file 想要读取的文件对象

* @return 返回文件内容

public static String doc2String(File file){

String result = "";

try{

FileInputStream fis = new FileInputStream(file);

HWPFDocument doc = new HWPFDocument(fis);

Range rang = doc.getRange();

result += rang.text();

fis.close();

}catch(Exception e){

e.printStackTrace();

}

return result;

}

/**

* 读取xls文件内容

* @param file 想要读取的文件对象

* @return 返回文件内容

public static String xls2String(File file){

String result = "";

try{

FileInputStream fis = new FileInputStream(file);

StringBuilder sb = new StringBuilder();

jxl.Workbook rwb = Workbook.getWorkbook(fis);

Sheet[] sheet = rwb.getSheets();

for (int i = 0; i < sheet.length; i++) {

Sheet rs = rwb.getSheet(i);

for (int j = 0; j < rs.getRows(); j++) {

Cell[] cells = rs.getRow(j);

for(int k=0;k<cells.length;k++)

sb.append(cells[k].getContents());

}

fis.close();

result += sb.toString();

}catch(Exception e){

e.printStackTrace();

}

return result;

}

/**

* 查找索引，返回符合条件的文件

* @param text 查找的字符串

* @return 符合条件的文件List

public static void searchIndex(String text){

Date date1 = new Date();

try{

directory = FSDirectory.open(new File(INDEX_DIR));

analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

DirectoryReader ireader = DirectoryReader.open(directory);

IndexSearcher isearcher = new IndexSearcher(ireader);

QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);

Query query = parser.parse(text);

ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;

for (int i = 0; i < hits.length; i++) {

Document hitDoc = isearcher.doc(hits[i].doc);

System.out.println("____________________________");

System.out.println(hitDoc.get("filename"));

System.out.println(hitDoc.get("content"));

System.out.println(hitDoc.get("path"));

System.out.println("____________________________");

}

ireader.close();

directory.close();

}catch(Exception e){

e.printStackTrace();

}

Date date2 = new Date();

System.out.println("查看索引-----耗时：" + (date2.getTime() - date1.getTime()) + "ms\n");

}

/**

* 过滤目录下的文件

* @param dirPath 想要获取文件的目录

* @return 返回文件list

public static List<File> getFileList(String dirPath) {

File[] files = new File(dirPath).listFiles();

List<File> fileList = new ArrayList<File>();

for (File file : files) {

if (isTxtFile(file.getName())) {

fileList.add(file);

}

return fileList;

}

/**

* 判断是否为目标文件，目前支持txt xls doc格式

* @param fileName 文件名称

* @return 如果是文件类型满足过滤条件，返回true；否则返回false

public static boolean isTxtFile(String fileName) {

if (fileName.lastIndexOf(".txt") > 0) {

return true;

}else if (fileName.lastIndexOf(".xls") > 0) {

return true;

}else if (fileName.lastIndexOf(".doc") > 0) {

return true;

}

return false;

}

public static void closeWriter() throws Exception {

if (indexWriter != null) {

indexWriter.close();

}

/**

* 删除文件目录下的所有文件

* @param file 要删除的文件目录

* @return 如果成功，返回true.

public static boolean deleteDir(File file){

if(file.isDirectory()){

File[] files = file.listFiles();

for(int i=0; i<files.length; i++){

deleteDir(files[i]);

}

file.delete();

return true;

}

写main方法测试上面的代码

public static void main(String[] args){

File fileIndex = new File(INDEX_DIR);

if(deleteDir(fileIndex)){

fileIndex.mkdir();

}else{

fileIndex.mkdir();

}

createIndex(DATA_DIR);

searchIndex("好的");

}

声明：本网站引用、摘录或转载内容仅供网站访问者交流或参考，不代表本站立场，如存在版权或非法内容，请联系站长删除，联系邮箱：site.kefu@qq.com。