Android 中汉字转化为拼音(10)

private void addToken(
            final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) {
        String str = sb.toString();
        tokens.add(new Token(tokenType, str, str));
        sb.setLength(0);
    }
   

//The fillowing lines are provided and maintained by Mediatek inc.
    private class DialerSearchToken extends Token {
     static final int FIRSTCASE = 0;
     static final int UPPERCASE = 1;
     static final int LOWERCASE = 2;
    }
   
    public String getTokensForDialerSearch(final String input, StringBuilder offsets){
       
        if(offsets == null || input == null || TextUtils.isEmpty(input)){
         // return empty tokens
         return null;
        }
       
     StringBuilder subStrSet = new StringBuilder();
        ArrayList<Token> tokens = new ArrayList<Token>();
        ArrayList<String> shortSubStrOffset = new ArrayList<String>();
        final int inputLength = input.length();
        final StringBuilder subString = new StringBuilder();
        final StringBuilder subStrOffset = new StringBuilder();
        int tokenType = Token.LATIN;
        int caseTypePre = DialerSearchToken.FIRSTCASE;
        int caseTypeCurr = DialerSearchToken.UPPERCASE;
        int mPos = 0;
       
        // Go through the input, create a new token when
        // a. Token type changed
        // b. Get the Pinyin of current charater.
        // c. current character is space.
        // d. Token case changed from lower case to upper case,
        // e. the first character is always a separated one
        // f character == '+' || character == '#' || character == '*' || character == ',' || character == ';'
        for (int i = 0; i < inputLength; i++) {
            final char character = input.charAt(i);
            if (character == '-' || character == ',' ){
             mPos++;
            } else if (character == ' ') {
                if (subString.length() > 0) {
                    addToken(subString, tokens, tokenType);
                    addOffsets(subStrOffset, shortSubStrOffset);
                }
                addSubString(tokens,shortSubStrOffset,subStrSet,offsets);
             mPos++;
                caseTypePre = DialerSearchToken.FIRSTCASE;
            } else if (character < 256) {
                if (tokenType != Token.LATIN && subString.length() > 0) {
                    addToken(subString, tokens, tokenType);
                    addOffsets(subStrOffset, shortSubStrOffset);
                 }
                caseTypeCurr = (character>='A' && character<='Z')?DialerSearchToken.UPPERCASE:DialerSearchToken.LOWERCASE;
                if(caseTypePre == DialerSearchToken.LOWERCASE && caseTypeCurr == DialerSearchToken.UPPERCASE){
                 addToken(subString, tokens, tokenType);
                 addOffsets(subStrOffset, shortSubStrOffset);
                }
                caseTypePre = caseTypeCurr;
                tokenType = Token.LATIN;
                Character c = Character.toUpperCase(character);
                if(c != null){
                 subString.append(c);
                 subStrOffset.append((char)mPos);
                }
                mPos++;
            } else if (character < FIRST_UNIHAN) {
                  //Comment out. Do not cover unknown characters SINCE they can not be input.
//                if (tokenType != Token.UNKNOWN && subString.length() > 0) {
//                    addToken(subString, tokens, tokenType);
//                    addOffsets(subStrOffset, shortSubStrOffset);
//                    caseTypePre = Token.FIRSTCASE;
//                }
//                tokenType = Token.UNKNOWN;
//                Character c = Character.toUpperCase(character);
//                if(c != null){
//                 subString.append(c);
//                 subStrOffset.append((char)(mPos));
//                }
                mPos++;
            } else {
             Token t = getToken(character);
                int tokenSize = t.target.length();
                //Current type is PINYIN
                if (t.type == Token.PINYIN) {
                    if (subString.length() > 0) {
                        addToken(subString, tokens, tokenType);
                        addOffsets(subStrOffset, shortSubStrOffset);
                    }
                    tokens.add(t);
                    for(int j=0; j < tokenSize;j++)
                     subStrOffset.append((char)mPos);
                    addOffsets(subStrOffset,shortSubStrOffset);
                    tokenType = Token.PINYIN;
                    caseTypePre = DialerSearchToken.FIRSTCASE;
                    mPos++;
                } else {
                 //Comment out. Do not cover special characters SINCE they can not be input.
//                    if (tokenType != t.type && subString.length() > 0) {
//                        addToken(subString, tokens, tokenType);
//                        addOffsets(subStrOffset, shortSubStrOffset);
//                        caseTypePre = Token.FIRSTCASE;
//                    }else{
//                     caseTypeCurr = (character>='A' && character<='Z')?Token.UPPERCASE:Token.LOWERCASE;
//                     if(caseTypePre == Token.LOWERCASE && caseTypeCurr == Token.UPPERCASE){
//                      addToken(subString, tokens, tokenType);
//                      addOffsets(subStrOffset, shortSubStrOffset);
//                     }
//                     caseTypePre = caseTypeCurr;
//                    }
//                    tokenType = t.type;
//                    Character c = Character.toUpperCase(character);
//                    if(c != null){
//                     subString.append(c);
//                     subStrOffset.append(mPos);
//                    }
                    mPos++;
                }
            }
            //IF the name string is too long, cut it off to meet the storage request of dialer search.
            if(mPos > 127)
             break;
        }
        if (subString.length() > 0) {
            addToken(subString, tokens, tokenType);
            addOffsets(subStrOffset, shortSubStrOffset);
        }
        addSubString(tokens,shortSubStrOffset,subStrSet,offsets);
        return subStrSet.toString();
    }
   
    private void addOffsets(final StringBuilder sb, final ArrayList<String> shortSubStrOffset){
     String str = sb.toString();
     shortSubStrOffset.add(str);
     sb.setLength(0);
    }
   
    private void addSubString(final ArrayList<Token> tokens, final ArrayList<String> shortSubStrOffset,
          StringBuilder subStrSet, StringBuilder offsets){
     if(tokens == null || tokens.isEmpty())
      return;
    
     int size = tokens.size();
     int len = 0;
     StringBuilder mShortSubStr = new StringBuilder();
     StringBuilder mShortSubStrOffsets = new StringBuilder();
     StringBuilder mShortSubStrSet = new StringBuilder();
     StringBuilder mShortSubStrOffsetsSet = new StringBuilder();
    
     for(int i=size-1; i>=0 ; i--){
      String mTempStr = tokens.get(i).target;
      len += mTempStr.length();
      String mTempOffset = shortSubStrOffset.get(i);
      if(mShortSubStr.length()>0){
       mShortSubStr.deleteCharAt(0);
       mShortSubStrOffsets.deleteCharAt(0);
      }
      mShortSubStr.insert(0, mTempStr);
      mShortSubStr.insert(0,(char)len);
      mShortSubStrOffsets.insert(0,mTempOffset);
      mShortSubStrOffsets.insert(0,(char)len);
      mShortSubStrSet.insert(0,mShortSubStr);
      mShortSubStrOffsetsSet.insert(0, mShortSubStrOffsets);
     }
    
     subStrSet.append(mShortSubStrSet);
     offsets.append(mShortSubStrOffsetsSet);
     tokens.clear();
     shortSubStrOffset.clear();
    }
    //The previous lines are provided and maintained by Mediatek inc.   
}


使用方法

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:http://www.heiqu.com/ppsxj.html