java.text.Normalizer.Form的实例源码

项目：solo-spring 文件：URICoder.java

@H_502_8@/** * Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder. * * @p@R_404_6460@m s * The string the encode (assuming ASCII ch@R_404_6460@cters only) * @p@R_404_6460@m e * A ch@R_404_6460@cter that does not require encoding if found in the * string. */ private static String encode_UTF8(String s,char e) { // Todo: normalizer requires Java 6! String n = (normalizer.isnormalized(s,Form.NFKC)) ? s : normalizer.normalize(s,Form.NFKC); // convert String to UTF-8 ByteBuffer bb = UTF8.encode(n); // URI encode StringBuffer sb = new StringBuffer(); while (bb.hasRemaining()) { int b = bb.get() & 0xff; if (isUnreserved(b) || b == e) { sb.append((char) b); } else { appendEscape(sb,(byte) b); } } return sb.toString(); }

项目：solo-spring 文件：URICoder.java

@H_502_8@/** * Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder. * * @p@R_404_6460@m s * The string the encode (assuming ASCII ch@R_404_6460@cters only) */ private static String minimalEncode_UTF8(String s) { // Todo: normalizer requires Java 6! String n = (normalizer.isnormalized(s,Form.NFKC); // convert String to UTF-8 ByteBuffer bb = UTF8.encode(n); // URI encode StringBuffer sb = new StringBuffer(); while (bb.hasRemaining()) { int b = bb.get() & 0xff; if (isLegal(b)) { sb.append((char) b); } else { appendEscape(sb,(byte) b); } } return sb.toString(); }

项目：Openjsharp 文件：CDataTransferer.java

@H_502_8@@Override public Object translateBytes(byte[] bytes,DataFlavor flavor,long format,Transferable transferable) throws IOException { if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) { String charset = getDefaultTextCharset(); if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) { try { charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor),"UTF-8"); } catch (UnsupportedFlavorException cannotHappen) { } } return new URL(new String(bytes,charset)); } if (format == CF_STRING) { bytes = normalizer.normalize(new String(bytes,"UTF8"),Form.NFC).getBytes("UTF8"); } return super.translateBytes(bytes,flavor,format,transferable); }

项目：bibliome-java-utils 文件：Strings.java

@H_502_8@/** * Remove diacritics from the specified string. * @p@R_404_6460@m s * @return a copy of the specified string with diacritics removed. */ public static final String removeDiacritics(String s) { String n = normalizer.normalize(s,Form.NFD); StringBuilder sb = null; for (int i = 0; i < n.length(); ++i) { char c = n.ch@R_404_6460@t(i); UnicodeBlock b = UnicodeBlock.of(c); if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) { if (sb == null) { sb = new StringBuilder(n.length()); sb.append(n.substring(0,i)); } continue; } if (sb != null) sb.append(c); } if (sb == null) return n; return sb.toString(); }

项目：mycore 文件：MCRUtils.java

@H_502_8@private static String getHash(int iterations,byte[] salt,String text,String algorithm) throws NoSuchAlgorithmException { MessageDigest digest; if (--iterations < 0) { iterations = 0; } byte[] data; try { digest = MessageDigest.getInstance(algorithm); text = normalizer.normalize(text,Form.NFC); if (salt != null) { digest.update(salt); } data = digest.digest(text.getBytes("UTF-8")); for (int i = 0; i < iterations; i++) { data = digest.digest(data); } } catch (UnsupportedEncodingException e) { throw new MCRException("Could not get " + algorithm + " checksum",e); } return toHexString(data); }

项目：testarea-pdfBox2 文件：TextSection.java

@H_502_8@String toString(List<List<TextPosition>> words) { StringBuilder stringBuilder = new StringBuilder(); boolean first = true; for (List<TextPosition> word : words) { if (first) first = false; else stringBuilder.append(' '); for (TextPosition textPosition : word) { stringBuilder.append(textPosition.getUnicode()); } } // cf. http://stackoverflow.com/a/7171932/1729265 return normalizer.normalize(stringBuilder,Form.NFKC); }

项目：MyVidCoRe 文件：Hash.java

@H_502_8@private static String getHash(int iterations,String str,String algorithm) throws NoSuchAlgorithmException,UnsupportedEncodingException { MessageDigest digest; int it = iterations; if (--it < 0) { it = 0; } byte[] data; digest = MessageDigest.getInstance(algorithm); String text = normalizer.normalize(str,Form.NFC); if (salt != null) { digest.update(salt); } data = digest.digest(text.getBytes("UTF-8")); for (int i = 0; i < it; i++) { data = digest.digest(data); } return Hash.toHexString(data); }

项目：packagedrone 文件：Users.java

@H_502_8@public static String hashIt ( final String salt,String data ) { data = normalizer.normalize ( data,Form.NFC ); final byte[] strData = data.getBytes ( StandardCharsets.UTF_8 ); final byte[] saltData = salt.getBytes ( StandardCharsets.UTF_8 ); final byte[] first = new byte[saltData.length + strData.length]; System.arraycopy ( saltData,first,saltData.length ); System.arraycopy ( strData,saltData.length,strData.length ); final MessageDigest md = createDigest (); byte[] digest = md.digest ( first ); final byte[] current = new byte[saltData.length + digest.length]; for ( int i = 0; i < 1000; i++ ) { System.arraycopy ( saltData,current,saltData.length ); System.arraycopy ( digest,digest.length ); digest = md.digest ( current ); } return Base64.getEncoder ().encodetoString ( digest ); }

项目：isetools 文件：AccentCharNode.java

@H_502_8@@Override public Fragment expanded() { char[] cs = super.innerText().toch@R_404_6460@rray(); String accent = charMap.get(innerText().substring(0,1)); if (accent == null) { accent = "\uFFFD"; Message m = Message.builder("char.accent.unkNown") .fromNode(this) .addNote("Ch@R_404_6460@cter " + text + " cannot be expanded.") .build(); Log.getInstance().add(m); } String str = "" + cs[1] + accent; str = normalizer.normalize(str,Form.NFC); return wrap("ACCENT",str); }

项目：berlioz 文件：URICoder.java

@H_502_8@/** * Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder. * * @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only) * @p@R_404_6460@m e A ch@R_404_6460@cter that does not require encoding if found in the string. */ private static String encodeUTF8(String s,char e) { String n = (normalizer.isnormalized(s,Form.NFKC); // convert String to UTF-8 ByteBuffer bb = StandardCharsets.UTF_8.encode(n); // URI encode StringBuilder sb = new StringBuilder(); while (bb.hasRemaining()) { int b = bb.get() & 0xff; if (isUnreserved(b) || b == e) { sb.append((char) b); } else { appendEscape(sb,(byte) b); } } return sb.toString(); }

项目：berlioz 文件：URICoder.java

@H_502_8@/** * Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder. * * @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only) */ private static String minimalEncodeUTF8(String s) { String n = (normalizer.isnormalized(s,Form.NFKC); // convert String to UTF-8 ByteBuffer bb = StandardCharsets.UTF_8.encode(n); // URI encode StringBuilder sb = new StringBuilder(); while (bb.hasRemaining()) { int b = bb.get() & 0xff; if (isLegal(b)) { sb.append((char) b); } else { appendEscape(sb,(byte) b); } } return sb.toString(); }

项目：infobip-open-jdk-8 文件：CDataTransferer.java

@H_502_8@@Override public Object translateBytes(byte[] bytes,transferable); }

项目：jdk8u-dev-jdk 文件：CDataTransferer.java

@H_502_8@@Override public Object translateBytes(byte[] bytes,transferable); }

项目：furi 文件：URICoder.java

@H_502_8@/** * Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder. * * @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only) * @p@R_404_6460@m e A ch@R_404_6460@cter that does not require encoding if found in the string. */ private static String encode_UTF8(String s,char e) { // Todo: normalizer requires Java 6! String n = (normalizer.isnormalized(s,Form.NFKC); // convert String to UTF-8 ByteBuffer bb = UTF8.encode(n); // URI encode StringBuffer sb = new StringBuffer(); while (bb.hasRemaining()) { int b = bb.get() & 0xff; if (isUnreserved(b) || b == e) { sb.append((char) b); } else { appendEscape(sb,(byte) b); } } return sb.toString(); }

项目：furi 文件：URICoder.java

@H_502_8@/** * Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder. * * @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only) */ private static String minimalEncode_UTF8(String s) { // Todo: normalizer requires Java 6! String n = (normalizer.isnormalized(s,Form.NFKC); // convert String to UTF-8 ByteBuffer bb = UTF8.encode(n); // URI encode StringBuffer sb = new StringBuffer(); while (bb.hasRemaining()) { int b = bb.get() & 0xff; if (isLegal(b)) { sb.append((char) b); } else { appendEscape(sb,(byte) b); } } return sb.toString(); }

项目：voj 文件：SlugifyUtils.java

@H_502_8@/** * 获取字符串的Slug. * @p@R_404_6460@m str - 待获取Slug的字符串 * @return 字符串对应的Slug */ public static String getSlug(String str) { if ( str == null ) { return ""; } // Rid of White Spaces String NowhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-"); // Processing Non-ASCII Ch@R_404_6460@cters try { NowhiteSpace = URLEncoder.encode(NowhiteSpace,"UTF-8"); } catch (UnsupportedEncodingException e) { // Never reach here } // Slugify String String normalized = normalizer.normalize(NowhiteSpace,Form.NFD); return normalized.toLowerCase(); }

项目：package-drone 文件：Users.java

@H_502_8@public static String hashIt ( final String salt,digest.length ); digest = md.digest ( current ); } return Base64.getEncoder ().encodetoString ( digest ); }

项目：srimporter 文件：SheetSerializer.java

@H_502_8@static String normalizeAccents(String regularstring) { if (!g_bnormalize) return regularstring; // leave the accents String normalizedString = regularstring.replace("Ã©","e"); normalizedString = normalizer.normalize(normalizedString,Form.NFD); StringBuilder sb = new StringBuilder(normalizedString); for (int i = 0; i < sb.length(); i++) { if (Ch@R_404 [email protected](sb.ch@R_404_6460@t(i)) == Ch@R_404 [email protected]_SPACING_MARK) { sb.delete(i,1); } } regularstring = sb.toString(); return regularstring; }

项目：wikipedia_indexer 文件：AccentsDefault.java

@H_502_8@public void apply(TokenStream stream) throws TokenizerException { if (stream == null) return; stream.reset(); while (stream.hasNext()) { String token = stream.next(); // String tmp = normalizer.normalize(token,Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+",""); String tmp = normalizer.normalize(token,Form.NFD); tmp = tmp.replaceAll("[\\p{InCombiningDiacriticalMarks}]",""); // .replaceAll("\\p{InCombiningDiacriticalMarks}+",""); if(!token.equals(tmp)) { stream.prevIoUs(); stream.set(tmp); stream.next(); } } }

项目：devops-cstack 文件：AlphaNumericsCh@R_404 [email protected]

@H_502_8@public static String convertToAlphaNumerics(String value) { logger.debug("Before : " + value); value = normalizer.normalize(value,Form.NFD); value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]",""); value = value.replaceAll("[^-_a-zA-Z0-9\\s]","").replace(" ",""); logger.debug("After : " + value); return value; }

项目：devops-cstack 文件：AlphaNumericsCh@R_404 [email protected]

@H_502_8@public static String deAccent(String value) { logger.debug("Before : " + value); String nfdnormalizedString = normalizer.normalize(value,Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); value = pattern.matcher(nfdnormalizedString).replaceAll(""); logger.debug("After : " + value); return value; }

项目：devops-cstack 文件：AlphaNumericsCh@R_404 [email protected]

@H_502_8@public static String convertToAlphaNumerics(String value,Integer countApp) throws UnsupportedEncodingException { value = new String(value.getBytes("ISO-8859-1"),"UTF-8"); value = normalizer.normalize(value,Form.NFD); value = value.replaceAll("[^\\p{ASCII}]","") .replaceAll("[^a-zA-Z0-9\\s]",""); if (value.equalsIgnoreCase("")) { value = "default" + countApp; } return value; }

项目：sunbird-utils 文件：Slug.java

@H_502_8@public static String makeSlug(String input,boolean transliterate) { String origInput = input; // Validate the input if (input == null) { ProjectLogger.log("Provided input value is null"); return input; } // Remove extra spaces input = input.trim(); // Remove URL encoding input = urlDecode(input); // If transliterate is required if (transliterate) { // Tranlisterate & cleanup String transliterated = transliterate(input); // transliterated = removeDuplicateChars(transliterated); input = transliterated; } // Replace all whitespace with dashes input = WHITESPACE.matcher(input).replaceAll("-"); // Remove all accent chars input = normalizer.normalize(input,Form.NFD); // Remove all non-latin special ch@R_404_6460@cters input = NONLATIN.matcher(input).replaceAll(""); // Remove any consecutive dashes input = normalizeDashes(input); // Validate before returning validateResult(input,origInput); // Slug is always lowercase return input.toLowerCase(Locale.ENGLISH); }

项目：openjdk-jdk10 文件：Pattern.java

@H_502_8@/** * Attempts to compose input by combining the first ch@R_404_6460@cter * with the first combining mark following it. Returns a String * that is the composition of the leading ch@R_404_6460@cter with its first * combining mark followed by the remaining combining marks. Returns * null if the first two ch@R_404_6460@cters cannot be further composed. */ private static String compoSEO nestep(String input) { int len = countChars(input,2); String firstTwoCh@R_404_6460@cters = input.substring(0,len); String result = normalizer.normalize(firstTwoCh@R_404_6460@cters,normalizer.Form.NFC); if (result.equals(firstTwoCh@R_404_6460@cters)) return null; else { String remainder = input.substring(len); return result + remainder; } }

项目：openjdk-jdk10 文件：Pattern.java

@H_502_8@boolean match(Matcher matcher,int i,CharSequence seq) { if (i < matcher.to) { int ch0 = Ch@R_404 [email protected](seq,i); int n = Ch@R_404 [email protected](ch0); int j = i + n; while (j < matcher.to) { int ch1 = Ch@R_404 [email protected](seq,j); if (Grapheme.isBoundary(ch0,ch1)) break; ch0 = ch1; j += Ch@R_404 [email protected](ch1); } if (i + n == j) { // single,assume nfc cp if (predicate.is(ch0)) return next.match(matcher,j,seq); } else { while (i + n < j) { String nfc = normalizer.normalize( seq.toString().substring(i,j),normalizer.Form.NFC); if (nfc.codePointCount(0,nfc.length()) == 1) { if (predicate.is(nfc.codePointAt(0)) && next.match(matcher,seq)) { return true; } } ch0 = Ch@R_404 [email protected](seq,j); j -= Ch@R_404 [email protected](ch0); } } if (j < matcher.to) return false; } matcher.hitEnd = true; return false; }

项目：openjdk-jdk10 文件：CDataTransferer.java

@H_502_8@@Override public Object translateBytes(byte[] bytes,Transferable transferable) throws IOException { if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) { String charset = Charset.defaultCharset().name(); if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) { try { charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor),StandardCharsets.UTF_8); } catch (UnsupportedFlavorException cannotHappen) { } } String xml = new String(bytes,charset); // macosx pasteboard returns a property list that consists of one URL // let's extract it. return new URL(extractURL(xml)); } if(isUriListFlavor(flavor) && format == CF_FILE) { // dragQueryFile works fine with files and url,// it parses and extracts values from property list. // maxosx always returns property list for // CF_URL and CF_FILE String[] strings = dragQueryFile(bytes); if(strings == null) { return null; } bytes = String.join(System.getProperty("line.sep@R_404_6460@tor"),strings).getBytes(); // Now we extracted uri from xml,Now we should treat it as // regular string that allows to translate data to target represantation // class by base method format = CF_STRING; } else if (format == CF_STRING) { bytes = normalizer.normalize(new String(bytes,Form.NFC).getBytes("UTF8"); } return super.translateBytes(bytes,transferable); }

项目：smarti 文件：StringUtils.java

@H_502_8@/** * provides the slug name for the parsed input * @p@R_404_6460@m input * @return */ //from https://stackoverflow.com/questions/1657193/java-code-library-for-generating-slugs-for-use-in-pretty-urls public static String toSlug(String input) { String Nowhitespace = WHITESPACE.matcher(input).replaceAll("-"); String normalized = normalizer.normalize(Nowhitespace,Form.NFD); String slug = NONLATIN.matcher(normalized).replaceAll(""); return slug.toLowerCase(Locale.ROOT); }

项目：openjdk9 文件：Pattern.java

@H_502_8@/** * Attempts to compose input by combining the first ch@R_404_6460@cter * with the first combining mark following it. Returns a String * that is the composition of the leading ch@R_404_6460@cter with its first * combining mark followed by the remaining combining marks. Returns * null if the first two ch@R_404_6460@cters cannot be further composed. */ private static String compoSEO nestep(String input) { int len = countChars(input,normalizer.Form.NFC); if (result.equals(firstTwoCh@R_404_6460@cters)) return null; else { String remainder = input.substring(len); return result + remainder; } }

项目：openjdk9 文件：Pattern.java

@H_502_8@boolean match(Matcher matcher,j); j -= Ch@R_404 [email protected](ch0); } } if (j < matcher.to) return false; } matcher.hitEnd = true; return false; }

项目：mycore 文件：MCRTextnormalizer.java

@H_502_8@public static String normalizeText(String text) { text = text.toLowerCase(Locale.getDefault()); text = new MCRHyphennormalizer().normalize(text).replace("-"," "); text = normalizer.normalize(text,Form.NFD).replaceAll("\\p{M}",""); //canonical decomposition,remove accents text = text.replace("ue","u").replace("oe","o").replace("ae","a").replace("ß","s").replace("ss","s"); text = text.replaceAll("[^a-z0-9]\\s]",""); //remove all non-alphabetic ch@R_404_6460@cters // text = text.replaceAll("\\b.{1,3}\\b"," ").trim(); // remove all words with fewer than four ch@R_404_6460@cters text = text.replaceAll("\\p{Punct}"," ").trim(); // remove all punctuation text = text.replaceAll("\\s+"," "); // normalize whitespace return text; }

项目：mycore 文件：MCRNameMerger.java

@H_502_8@private String normalize(String nameFragment) { String text = nameFragment.toLowerCase(Locale.getDefault()); text = new MCRHyphennormalizer().normalize(text).replace("-",""); // canonical decomposition,then remove accents text = text.replace("ue",""); //remove all non-alphabetic ch@R_404_6460@cters text = text.replaceAll("\\p{Punct}"," "); // normalize whitespace return text.trim(); }

项目：eSDK_EC_SDK_Java 文件：StringUtils.java

@H_502_8@public static boolean isNumber(String str) { if (null == str) { return false; } str = normalizer.normalize(str,Form.NFKC); return str.matches("\\d+"); }

项目：zest-writer 文件：ZdsHttp.java

@H_502_8@/** * Transform any string on slug. Just alphanumeric,dash or underscore ch@R_404_6460@cters. * @p@R_404_6460@m input string to convert on slug * @return slug string */ public static String toSlug(String input) { String Nowhitespace = Constant.WHITESPACE.matcher(input).replaceAll("-"); String normalized = normalizer.normalize(Nowhitespace,Form.NFD); String slug = Constant.NONLATIN.matcher(normalized).replaceAll(""); return slug.toLowerCase(Locale.ENGLISH); }

项目：engerek 文件：DiacriticsFilter.java

@H_502_8@@Override public <T extends Object> PrismPropertyValue<T> apply(PrismPropertyValue<T> propertyValue) { Validate.notNull(propertyValue,"Node must not be null."); String text = getStringValue(propertyValue); if (StringUtils.isEmpty(text)) { return propertyValue; } String newValue = normalizer.normalize(text,Form.NFD).replaceAll( "\\p{InCombiningDiacriticalMarks}+",""); propertyValue.setValue((T) newValue); return propertyValue; }

项目：site 文件：PostEntity.java

@H_502_8@final String generateSlug(final String suggestedSlug,final String newTitle) { String rv = suggestedSlug; if (rv == null || rv.trim().isEmpty()) { rv = normalizer.normalize(newTitle.toLowerCase(),Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}|[^\\w\\s]","").replaceAll("[\\s-]+"," ").trim().replaceAll("\\s","-"); } return rv; }

项目：EventManager-JEE 文件：Event.java

@H_502_8@public static String generateSlug(String input,Date createdAt) { Pattern NONLATIN = Pattern.compile("[^\\w-]"); Pattern WHITESPACE = Pattern.compile("[\\s]"); SecureRandom random = new SecureRandom(createdAt.toString().getBytes()); String Nowhitespace = WHITESPACE.matcher(input).replaceAll("-"); String normalized = normalizer.normalize(Nowhitespace,Form.NFD); String slug = NONLATIN.matcher(normalized).replaceAll(""); String lowerCase = slug.toLowerCase(Locale.ENGLISH); String unique = lowerCase + "-" + new BigInteger(130,random).toString(32).substring(0,6); return unique; }

java.text.Normalizer.Form的实例源码

相关推荐