MySQL表的编码格式为utf-8,现在表内数据量巨大,已不能新建表更改表的编码格式。
采用手动编码将有问题的数据转化为utf8进行存储
工具类
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author chencongcong
* @date 2023/6/28 2:26 PM
* @Description:
*/
public class EmojiConvertUtils {
/**
* 将emoji标签转换成utf8字符集保存进数据库
*
* @param str 待转换的字符串
* @return emoji表情转换为utf-8
*/
public static String emojiConvert(String str) {
String patternString = "([\\x{10000}-\\x{10ffff}\\ud800-\\udfff])";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
try {
matcher.appendReplacement(sb, "[[" + URLEncoder.encode(matcher.group(1), "UTF-8") + "]]");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* 还原utf8数据库中保存的含转换后emoji表情的字符串
*
* @param str 待转换的字符串
* @return 原emoji表情
*/
public static String emojiRecovery(String str) {
String patternString = "\\[\\[(.*?)\\]\\]";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
try {
matcher.appendReplacement(sb,
URLDecoder.decode(matcher.group(1), "UTF-8"));
} catch (UnsupportedEncodingException e) {
return "";
}
}
matcher.appendTail(sb);
return sb.toString();
}
运行示例 😁 🚒
public static void main(String[] args) {
String str = "你好\uD83D\uDE01\uD83D\uDE92";
String result = EmojiConvertUtils.emojiConvert(str);
System.out.println(result);
String recovery = EmojiConvertUtils.emojiRecovery(result);
System.out.println(recovery);
}