IKAnalyzer是一个功能强大、易于使用、性能优异的中文分词工具包,适用于各种需要进行中文分词的场景,如搜索引擎、自然语言处理、文本挖掘等。
Springboot如何集成IKAnalyzer分词组件
引入IKAnalyzer分词组件包
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>8.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>8.11.1</version>
</dependency>
<dependency>
<groupId>com.github.magese</groupId>
<artifactId>ik-analyzer</artifactId>
<version>8.5.0</version>
</dependency>
IkAnalyzer配置
package com.example.demo.config;
import org.apache.lucene.analysis.Analyzer;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.wltea.analyzer.lucene.IKAnalyzer;
@Configuration
public class IkAnalyzerConfig {
@Bean
public Analyzer ikAnalyzer() {
return new IKAnalyzer(true);
}
}
自定义IkAnalyzerService
package com.example.demo.service;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
public class IkAnalyzerService {
@Resource
private Analyzer analyzer;
public List<String> textHandler(String text) {
List<String> result = new ArrayList<>();
// 将文本转换为Reader对象
StringReader reader = new StringReader(text);
try {
// 获取TokenStream对象
TokenStream tokenStream = analyzer.tokenStream("content", reader);
// 获取CharTermAttribute对象,用于获取分词结果
CharTermAttribute charTermAttr = tokenStream.addAttribute(CharTermAttribute.class);
// 重置TokenStream,准备读取分词结果
tokenStream.reset();
// 循环读取分词结果
while (tokenStream.incrementToken()) {
result.add(charTermAttr.toString());
}
tokenStream.close();
} catch (IOException e) {
log.error("IkAnalyzerService.textHandler() IOException:{}", e.getMessage(), e);
}
return result;
}
}
验证
测试代码
package com.example.demo;
import com.alibaba.fastjson.JSON;
import com.example.demo.service.IkAnalyzerService;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import javax.annotation.Resource;
import java.util.List;
@Slf4j
@SpringBootTest
public class ApplicationTests {
@Resource
private IkAnalyzerService ikAnalyzerService;
@Test
public void contextLoads() {
List<String> result = ikAnalyzerService.textHandler("这个电影很精彩,推荐!!!");
log.info("{}", JSON.toJSONString(result));
result = ikAnalyzerService.textHandler("他在家正发呆呢");
log.info("{}", JSON.toJSONString(result));
result = ikAnalyzerService.textHandler("人家都说他是个好领导,我们可以相信他吗");
log.info("{}", JSON.toJSONString(result));
}
}