/**
* Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without
* space will be put into a Token, One Hanzi character which has pinyin will be treated as a
* Token. If these is no China collator, the empty token array is returned.
*/
public ArrayList<Token> get(final String input) {
ArrayList<Token> tokens = new ArrayList<Token>();
if (!mHasChinaCollator || TextUtils.isEmpty(input)) {
// return empty tokens.
return tokens;
}
final int inputLength = input.length();
final StringBuilder sb = new StringBuilder();
int tokenType = Token.LATIN;
// Go through the input, create a new token when
// a. Token type changed
// b. Get the Pinyin of current charater.
// c. current character is space.
for (int i = 0; i < inputLength; i++) {
final char character = input.charAt(i);
if (character == ' ') {
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
} else if (character < 256) {
if (tokenType != Token.LATIN && sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokenType = Token.LATIN;
sb.append(character);
} else if (character < FIRST_UNIHAN) {
if (tokenType != Token.UNKNOWN && sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokenType = Token.UNKNOWN;
sb.append(character);
} else {
Token t = getToken(character);
if (t.type == Token.PINYIN) {
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokens.add(t);
tokenType = Token.PINYIN;
} else {
if (tokenType != t.type && sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokenType = t.type;
sb.append(character);
}
}
}
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
return tokens;
}
Android 中汉字转化为拼音(9)
内容版权声明:除非注明,否则皆为本站原创文章。