icu
International Components for Unicode,https://github.com/unicode-org/icu.git
https://icu.unicode.org/
帮助文档:
https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html
- i18n,Internationalization (in/i18n) library
- io,Ustdio/iostream library (icuio),c++读取文件是ansi的需要编码转换,使用icu、u_fopen可以读取unicode
编码检测
/*
* data, 传入参数, 需要探测的字符串
* len, 传入参数, 探测字符串长度
* detected 传出参数, 探测的最有可能的字符编码名称, 调用者需要释放该字段
**/
bool detectTextEncoding(const char *data, int32_t len, char **detected) {
UCharsetDetector *csd;
const UCharsetMatch **csm;
int32_t match, matchCount = 0;
UErrorCode status = U_ZERO_ERROR;
csd = ucsdet_open(&status);
if (status != U_ZERO_ERROR)
return false;
ucsdet_setText(csd, data, len, &status);
if (status != U_ZERO_ERROR)
return false;
csm = ucsdet_detectAll(csd, &matchCount, &status);
if (status != U_ZERO_ERROR)
return false;
#if 0 //打印出探测的可能的编码
for(match = 0; match < matchCount; match += 1)
{
const char *name = ucsdet_getName(csm[match], &status);
const char *lang = ucsdet_getLanguage(csm[match], &status);
int32_t confidence = ucsdet_getConfidence(csm[match], &status);
if (lang == NULL || strlen(lang) == 0)
lang = "**";
printf("%s (%s) %d
", name, lang, confidence);
}
#endif
if (matchCount > 0) {
*detected = strdup(ucsdet_getName(csm[0], &status)); //分配了内存, 需要释放
if (status != U_ZERO_ERROR)
return false;
}
printf("charset = %s
", *detected);
ucsdet_close(csd);
return true;
}
编码转换
/*
* toConverterName, 转换后的字符编码
* fromConverterName, 转换前的字符编码
* target, 存储转换后的字符串, 传出参数
* targetCapacity, 存储容量,target的大小
* source, 需要转换的字符串
* sourceLength, source的大小
**/
int convert(const char *toConverterName, const char *fromConverterName,
char *target, int32_t targetCapacity, const char *source, int32_t sourceLength) {
UErrorCode error = U_ZERO_ERROR;
ucnv_convert(toConverterName, fromConverterName, target, targetCapacity, source, sourceLength, &error);
return error;
}
conv = ucnv_open("iso-8859-3", &status);
/* Convert from ISO-8859-3 to Unicode */
len = ucnv_toUChars(conv, target, targetSize, source, sourceLen, &status);
ucnv_close(conv);
i18n国际化
resouce tree structure:
c打开resource
UErrorCode status = U_ZERO_ERROR;
UResourceBundle* icuRoot = ures_open(NULL, "root", &status);
if (U_SUCCESS(status)) {
//ures_getStringByKey
ures_close(icuRoot);
}
c++打开resource
UErrorCode status = U_ZERO_ERROR;
// we rely on automatic construction of Locale object from a char*
ResourceBundle myResource("myPackage", "de_AT", status);
if (U_SUCCESS(status)) {
}
查询key
UResourceBundle *zones = ures_getByKey(icuRoot, "zoneStrings", NULL, &status);
if (U_SUCCESS(status)) {
ures_close(zones);
}
bundle file
root {
menu {
id { "mainmenu" }
items {
{
id { "file" }
name { "&File" }
items {
{
id { "open" }
name { "&Open" }
}
{
id { "save" }
name { "&Save" }
}
{
id { "exit" }
name { "&Exit" }
}
}
}
}
}
}
生成binary resouce bundlefile
genrb -d dest_dirname root.txt en.txt