Emoji 特殊字符处理,对UTF-8编码的MYSQL数据库插入Emoji等 特殊字符时会报错

  Bob

    MYSQL数据库存微信用户名的时候,一定要用utf8mb4 鬼知道用户名里会有什么表情

    解决办法:

    1. MYSQL字符设置为utf8mb4

    2. 过滤掉Emoji 特殊字符

     

    public class EmojiFilter {
    
    
        /**
         * 检测是否有emoji字符
         * 
         * @param source
         * @return 一旦含有就抛出
         */
        public static boolean containsEmoji(String source) {
            if (StringUtils.isEmpty(source)) {
                return false;
            }
            int len = source.length();
            for (int i = 0; i < len; i++) {
                char codePoint = source.charAt(i);
                if (isEmojiCharacter(codePoint)) {
                    // do nothing,判断到了这里表明,确认有表情字符
                    return true;
                }
            }
            return false;
        }
    
    
        private static boolean isEmojiCharacter(char codePoint) {
            return (codePoint == 0x0) || (codePoint == 0x9) || (codePoint == 0xA)
                    || (codePoint == 0xD)
                    || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                    || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                    || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF));
        }
    
    
        /**
         * 过滤emoji 或者 其他非文字类型的字符
         * 
         * @param source
         * @return
         */
        public static String filterEmoji(String source) {
            if (!containsEmoji(source)) {
                return source;// 如果不包含,直接返回
            }
            // 到这里铁定包含
            StringBuilder buf = null;
            int len = source.length();
            for (int i = 0; i < len; i++) {
                char codePoint = source.charAt(i);
                if (isEmojiCharacter(codePoint)) {
                    if (buf == null) {
                        buf = new StringBuilder(source.length());
                    }
                    buf.append(codePoint);
                } else {
                }
            }
            if (buf == null) {
                return source;// 如果没有找到 emoji表情,则返回源字符串
            } else {
                if (buf.length() == len) {// 这里的意义在于尽可能少的toString,因为会重新生成字符串
                    buf = null;
                    return source;
                } else {
                    return buf.toString();
                }
            }
        }
    }

    如有疑问或同行交流欢迎加群讨论:铂金信息技术交流群 151258054