很多人都用过正则,但文章或许会给你一种全新的认识(思考)
以下内容适合高效率正则匹配(比较适合正则匹配场景较多的情况)
效率提升精华:本地缓存+减少编译次数(对effective java的思考,以及对数据库连接中TCP耗时的思考,如果条件允许能够收集系统中经常使用的正则表达式,也可以在系统初始化时候进行加载到内存中)+(可选项:收集数据,通过job或者线程提前加载到正则工具中)
maven:version部分自己从maven参考下载
<dependency> <groupId>com.github.ben-manes.caffeine</groupId> <artifactId>caffeine</artifactId> </dependency>
import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.commons.lang3.StringUtils; import org.springframework.util.StopWatch; import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.regex.Pattern; /** * description: 正则表达式工具类 */ public class RegUtils { private RegUtils() { } public static final Cache<String, Pattern> REG_PATTERNS = Caffeine.newBuilder().maximumSize(512).build(); /** * 获取pattern * @param expression * @return */ public static Pattern getRegPattern(String expression) { if (StringUtils.isEmpty(expression)) { throw new IllegalArgumentException("expression is empty"); } Pattern ifPresent = REG_PATTERNS.getIfPresent(expression); if (Objects.nonNull(ifPresent)) { return ifPresent; } Pattern compile = null; try { compile = Pattern.compile(expression); } catch (Exception e) { throw new IllegalArgumentException("expression error:" + expression + " " + e.getMessage()); } if (Objects.nonNull(compile)) { //数据放到mq中,并写入到db中后续进行分析,进行提前加载 REG_PATTERNS.put(expression, compile); } return compile; } public static void main(String[] args) { String expression = "[0-9]{3}[-]{1}[0-9]{4}"; String data = "333-8889"; StopWatch stopWatch = new StopWatch(expression); stopWatch.start(); boolean matches = getRegPattern(expression).matcher(data).matches(); stopWatch.stop(); System.out.println(stopWatch.prettyPrint()); List<String> strings = new ArrayList<>(); strings.add("333-8889");// true for (String string : strings) { StopWatch stopWatch1 = new StopWatch(expression); stopWatch1.start(); boolean matches1 = getRegPattern(expression).matcher(string).matches(); stopWatch1.stop(); System.out.println(matches1); System.out.println(stopWatch1.prettyPrint()); } } }
对比结果
第一次编译之后放入缓存,第二次则直接从缓存中获取,效率和速度上有个质的变化