diff --git a/include/utils/StringUtils.h b/include/utils/StringUtils.h index 6d3afa15..62dca319 100644 --- a/include/utils/StringUtils.h +++ b/include/utils/StringUtils.h @@ -51,3 +51,5 @@ unsigned char ChartoHex(char ch); std::vector splitStr(const String& str, const String& delimiter); bool strInVector(const String& str, const std::vector& vec); + +String getUtf8CharByIndex(const String& utf8str, int index); diff --git a/src/utils/StringUtils.cpp b/src/utils/StringUtils.cpp index d6c8c3f7..80cfa61a 100644 --- a/src/utils/StringUtils.cpp +++ b/src/utils/StringUtils.cpp @@ -233,4 +233,29 @@ bool strInVector(const String& str, const std::vector& vec) { if (vec[i] == str) return true; } return false; +} + +String getUtf8CharByIndex(const String& utf8str, int index) { + if (index < 0) index = 0; + + int len = utf8str.length(); + int charCount = 0; + int i = 0; + while (i < len) { + int charLen = 1; + unsigned char c = utf8str[i]; + if ((c & 0x80) == 0x00) charLen = 1; // 0xxxxxxx + else if ((c & 0xE0) == 0xC0) charLen = 2; // 110xxxxx + else if ((c & 0xF0) == 0xE0) charLen = 3; // 1110xxxx + else if ((c & 0xF8) == 0xF0) charLen = 4; // 11110xxx + + if (charCount == index) { + return utf8str.substring(i, i + charLen); + } + + if (i + charLen >= len) return utf8str.substring(i, i + charLen); + i += charLen; + charCount++; + } + return ""; } \ No newline at end of file