项目:solo-spring
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
* @param e
* A character that does not require encoding if found in the
* string.
*/
private static String encode_UTF8(String s,char e) {
// Todo: normalizer requires Java 6!
String n = (normalizer.isnormalized(s,Form.NFKC)) ? s : normalizer.normalize(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
项目:solo-spring
文件:URICoder.java
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
*/
private static String minimalEncode_UTF8(String s) {
// Todo: normalizer requires Java 6!
String n = (normalizer.isnormalized(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source,mode)).
*
* Unlike quickCheck(),this function returns a definitive result,* never a "maybe".
* For NFD,NFKD,and FCD,both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe",this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isnormalized(String str,normalizer.Form form,int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.tochararray(),str.length(),false,normalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.tochararray(),normalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.tochararray(),normalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.tochararray(),normalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
@Override
public Object translateBytes(byte[] bytes,DataFlavor flavor,long format,Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor),"UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes,charset));
}
if (format == CF_STRING) {
bytes = normalizer.normalize(new String(bytes,"UTF8"),Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes,flavor,format,transferable);
}
项目:OperatieBRP
文件:ZoekCriterium.java
public void setWaarde(final String waarde) {
this.waarde = waarde;
if (waarde == null) {
this.slimZoekenWaarde = null;
} else if (waarde.startsWith("\\")) {
this.exact = true;
this.slimZoekenWaarde = waarde.substring(1);
} else if (waarde.endsWith("*")) {
this.wildcard = true;
this.slimZoekenWaarde = waarde.substring(0,waarde.length() - 1);
} else {
this.slimZoekenWaarde = waarde;
}
if (waarde != null && !this.exact) {
if (!waarde.matches(".*[A-Z].*") && attribuut.isstring()) {
this.caseInsensitive = true;
}
String normalizedWaarde = normalizer.normalize(waarde,normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
if (pattern.matcher(normalizedWaarde).find()) {
this.diakriet = true;
}
}
}
项目:OperatieBRP
文件:Utils.java
/**
* Converteer een naam naar een java enumeratie naam.
* @param javaNameBase naam
* @return enumeratie naam
*/
public static String convertToJavaEnumName(final String javaNameBase) {
if (javaNameBase.startsWith(LIteraL)) {
return StringEscapeUtils.unescapeJava(javaNameBase.replaceAll(String.format("^%s",LIteraL),""));
} else {
String result = javaNameBase;
// Unaccent
result = normalizer.normalize(result,normalizer.Form.NFD);
// Replace whitespace with underscore
result = result.replaceAll("(\\s|-)","_");
// Uppercase
result = result.toupperCase();
// Remove unsupported characters
result = result.replaceAll("[^A-Z0-9_]","");
// Remove duplicate seperators
result = result.replaceAll("_{2,}","_");
return result;
}
}
项目:bibliome-java-utils
文件:Strings.java
/**
* Remove diacritics from the specified string.
* @param s
* @return a copy of the specified string with diacritics removed.
*/
public static final String removeDiacritics(String s) {
String n = normalizer.normalize(s,Form.NFD);
StringBuilder sb = null;
for (int i = 0; i < n.length(); ++i) {
char c = n.charat(i);
UnicodeBlock b = UnicodeBlock.of(c);
if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
if (sb == null) {
sb = new StringBuilder(n.length());
sb.append(n.substring(0,i));
}
continue;
}
if (sb != null)
sb.append(c);
}
if (sb == null)
return n;
return sb.toString();
}
项目:mapr-music
文件:SlugUtil.java
/**
* Converts specified string to it's slug representation,which can be used to generate readable and SEO-friendly
* URLs.
*
* @param input string,which will be converted.
* @return slug representation of string,which can be used to generate readable and SEO-friendly
* URLs.
*/
public static String toSlug(String input) {
String transliterated = transliterator.transform(input);
String Nowhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = normalizer.normalize(Nowhitespace,normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
项目:mapr-music
文件:SlugService.java
/**
* Converts specified string to it's slug representation,which can be used to generate readable and SEO-friendly
* URLs.
*/
public String toSlug(String input) {
String transliterated = transliterator.transform(input);
String Nowhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = normalizer.normalize(Nowhitespace,normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
项目:TensorFlowDetector-App
文件:MainActivity.java
public static String convertToAlphaNumerics(String value) {
logger.debug("Before : " + value);
value = normalizer.normalize(value,Form.NFD);
value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]","");
value = value.replaceAll("[^-_a-zA-Z0-9\\s]","").replace(" ","");
logger.debug("After : " + value);
return value;
}
public static String deAccent(String value) {
logger.debug("Before : " + value);
String nfdnormalizedString = normalizer.normalize(value,Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
value = pattern.matcher(nfdnormalizedString).replaceAll("");
logger.debug("After : " + value);
return value;
}
项目:devops-cstack
文件:Snapshot.java
项目:devops-cstack
文件:Snapshot.java
public void setFullTag(String fullTag) {
if (fullTag != null) {
fullTag = fullTag.toLowerCase();
fullTag = normalizer.normalize(fullTag,normalizer.Form.NFD);
fullTag = fullTag.replaceAll("[\\p{InCombiningDiacriticalMarks}]","");
fullTag = fullTag.replaceAll("[^a-z0-9-]","");
}
this.fullTag = fullTag;
}
public static String convertToAlphaNumerics(String value,Integer countApp)
throws UnsupportedEncodingException {
value = new String(value.getBytes("ISO-8859-1"),"UTF-8");
value = normalizer.normalize(value,Form.NFD);
value = value.replaceAll("[^\\p{ASCII}]","")
.replaceAll("[^a-zA-Z0-9\\s]","");
if (value.equalsIgnoreCase("")) {
value = "default" + countApp;
}
return value;
}
项目:directory-ldap-api
文件:PrepareString.java
项目:alfresco-repository
文件:NameBasedUserNameGenerator.java
private String cleanseName(String name)
{
// Replace whitespace with _
String result= name.trim().toLowerCase().replaceAll("\\s+","_");
// Remove accents from characters and strips out non-alphanumeric chars.
return normalizer.normalize(result,normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+","");
}
项目:chromium-net-for-android
文件:NetStringUtil.java
/**
* Attempts to convert text in a given character set to a Unicode string,* and normalize it. Returns null on failure.
* @param text ByteBuffer containing the character array to convert.
* @param charsetName Character set it's in encoded in.
* @return: Unicode string on success,null on failure.
*/
@CalledByNative
private static String convertToUnicodeAndnormalize(
ByteBuffer text,String charsetName) {
String unicodeString = convertToUnicode(text,charsetName);
if (unicodeString == null) return null;
return normalizer.normalize(unicodeString,normalizer.Form.NFC);
}
private String normalize(final String input) {
String text = normalizer.normalize(input,normalizer.Form.NFKD);
text = PATTERN_norMALIZE_NON_ASCII.matcher(text).replaceAll(EMPTY);
text = PATTERN_norMALIZE_SEParaTOR.matcher(text).replaceAll(underscoreSeparator ? "_" : "-");
text = PATTERN_norMALIZE_TRIM_DASH.matcher(text).replaceAll(EMPTY);
return text;
}
项目:armadillo
文件:HkdfMessageDigest.java
@Override
public String derive(String providedMessage,String usageName) {
Objects.requireNonNull(providedMessage);
Objects.requireNonNull(usageName);
return Bytes.wrap(HKDF.fromHmacSha512().extractAndExpand(salt,Bytes.from(providedMessage,normalizer.Form.NFKD).array(),Bytes.from(usageName,outLength)).encodeHex();
}
项目:armadillo
文件:DefaultEncryptionProtocol.java
private byte[] keyDerivationFunction(String contentKey,byte[] fingerprint,byte[] contentSalt,byte[] preferenceSalt,@Nullable char[] password) {
Bytes ikm = Bytes.wrap(fingerprint).append(contentSalt).append(Bytes.from(contentKey,normalizer.Form.NFKD));
if (password != null) {
ikm.append(keyStretchingFunction.stretch(contentSalt,password,32));
}
return HKDF.fromHmacSha512().extractAndExpand(preferenceSalt,ikm.array(),"DefaultEncryptionProtocol".getBytes(),keyLengthBit / 8);
}
项目:ARCLib
文件:Utils.java
项目:CommentView
文件:Validator.java
public int getTweetLength(String text) {
text = normalizer.normalize(text,normalizer.Form.NFC);
int length = text.codePointCount(0,text.length());
for (Extractor.Entity urlEntity : extractor.extractURLsWithindices(text)) {
length += urlEntity.start - urlEntity.end;
length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
}
return length;
}
项目:creacoinj
文件:BIP38PrivateKey.java
public ECKey decrypt(String passphrase) throws BadPassphraseException {
String normalizedPassphrase = normalizer.normalize(passphrase,normalizer.Form.NFC);
ECKey key = ecMultiply ? decryptEC(normalizedPassphrase) : decryptNoEC(normalizedPassphrase);
Sha256Hash hash = Sha256Hash.twiceOf(key.toAddress(params).toString().getBytes(Charsets.US_ASCII));
byte[] actualAddressHash = Arrays.copyOfRange(hash.getBytes(),4);
if (!Arrays.equals(actualAddressHash,addressHash))
throw new BadPassphraseException();
return key;
}
public CharSequence normalize(final CharSequence name) {
if(!normalizer.isnormalized(name,normalizer.Form.NFC)) {
// Canonical decomposition followed by canonical composition (default)
final String normalized = normalizer.normalize(name,normalizer.Form.NFC);
if(log.isDebugEnabled()) {
log.debug(String.format("normalized string %s to %s",name,normalized));
}
return normalized;
}
return name;
}
项目:bytes-java
文件:BytesConstructorTests.java
private void checkString(String string,Charset charset) {
Bytes b = Bytes.from(string,charset);
assertArrayEquals(string.getBytes(charset),b.array());
assertEquals(new String(string.getBytes(charset),charset),b.encodeCharset(charset));
if (charset != StandardCharsets.UTF_8) {
Bytes bUtf8 = Bytes.from(string);
assertArrayEquals(string.getBytes(StandardCharsets.UTF_8),bUtf8.array());
assertEquals(new String(string.getBytes(StandardCharsets.UTF_8),StandardCharsets.UTF_8),bUtf8.encodeUtf8());
} else {
Bytes bnormalized = Bytes.from(string,normalizer.Form.NFKD);
assertArrayEquals(normalizer.normalize(string,normalizer.Form.NFKD).getBytes(charset),bnormalized.array());
}
}
public static void main(String[] args) {
String s = "São Paulo";
System.out.println(normalizer.isnormalized(s,normalizer.Form.NFKD));
System.out.println(s);
s = normalizer.normalize(s,normalizer.Form.NFKD);
System.out.println(normalizer.isnormalized(s,normalizer.Form.NFKD));
System.out.println(s);
// Todo: how can I print the difference?
}
@Override
public boolean isNameCompatible(String cn,JavaFileObject.Kind kind) {
cn.getClass();
// null check
if (kind == Kind.OTHER && getKind() != kind) {
return false;
}
String n = cn + kind.extension;
if (name.equals(n)) {
return true;
}
if (isMacOS && normalizer.isnormalized(name,normalizer.Form.NFD)
&& normalizer.isnormalized(n,normalizer.Form.NFC)) {
// On Mac OS X it is quite possible to file name and class
// name normalized in a different way - in that case we have to normalize file name
// to the normal Form Compised (NFC)
String normName = normalizer.normalize(name,normalizer.Form.NFC);
if (normName.equals(n)) {
this.name = normName;
return true;
}
}
if (name.equalsIgnoreCase(n)) {
try {
// allow for Windows
return file.getCanonicalFile().getName().equals(n);
} catch (IOException e) {
}
}
return false;
}
/**
* The pattern is converted to normalizedD form and then a pure group
* is constructed to match canonical equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
// Convert pattern into normalizedD form
normalizedPattern = normalizer.normalize(pattern,normalizer.Form.NFD);
patternLength = normalizedPattern.length();
// Modify pattern to match canonical equivalences
StringBuilder newPattern = new StringBuilder(patternLength);
for(int i=0; i<patternLength; ) {
int c = normalizedPattern.codePointAt(i);
StringBuilder sequenceBuffer;
if ((Character.getType(c) == Character.NON_SPACING_MARK)
&& (lastCodePoint != -1)) {
sequenceBuffer = new StringBuilder();
sequenceBuffer.appendCodePoint(lastCodePoint);
sequenceBuffer.appendCodePoint(c);
while(Character.getType(c) == Character.NON_SPACING_MARK) {
i += Character.charCount(c);
if (i >= patternLength)
break;
c = normalizedPattern.codePointAt(i);
sequenceBuffer.appendCodePoint(c);
}
String ea = produceEquivalentAlternation(
sequenceBuffer.toString());
newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
newPattern.append("(?:").append(ea).append(")");
} else if (c == '[' && lastCodePoint != '\\') {
i = normalizeCharClass(newPattern,i);
} else {
newPattern.appendCodePoint(c);
}
lastCodePoint = c;
i += Character.charCount(c);
}
normalizedPattern = newPattern.toString();
}
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private String compoSEOnestep(String input) {
int len = countChars(input,2);
String firstTwoCharacters = input.substring(0,len);
String result = normalizer.normalize(firstTwoCharacters,normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
/**
* normalizes a <code>String</code> using the given normalization form.
*
* @param str the input string to be normalized.
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static String normalize(String str,int options) {
int len = str.length();
boolean asciiOnly = true;
if (len < 80) {
for (int i = 0; i < len; i++) {
if (str.charat(i) > 127) {
asciiOnly = false;
break;
}
}
} else {
char[] a = str.tochararray();
for (int i = 0; i < len; i++) {
if (a[i] > 127) {
asciiOnly = false;
break;
}
}
}
switch (form) {
case NFC :
return asciiOnly ? str : NFC.normalize(str,options);
case NFD :
return asciiOnly ? str : NFD.normalize(str,options);
case NFKC :
return asciiOnly ? str : NFKC.normalize(str,options);
case NFKD :
return asciiOnly ? str : NFKD.normalize(str,options);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
项目:guereza
文件:SimpleIndexer.java
private Stream<String> getWords(final String sentence) {
return Arrays.stream(sentence.split(REGEX_SPACE))
.map(String::toLowerCase)
.map(s -> normalizer.normalize(s,normalizer.Form.NFD))
.map(s -> s.replaceAll(REGEX_ALPHANUM,""))
.map(this::stemmed)
.filter(s -> !s.isEmpty())
.filter(w -> !StopWords.match(w));
}
项目:sunbird-utils
文件:Slug.java
public static String makeSlug(String input,boolean transliterate) {
String origInput = input;
// Validate the input
if (input == null) {
ProjectLogger.log("Provided input value is null");
return input;
}
// Remove extra spaces
input = input.trim();
// Remove URL encoding
input = urlDecode(input);
// If transliterate is required
if (transliterate) {
// Tranlisterate & cleanup
String transliterated = transliterate(input);
// transliterated = removeDuplicateChars(transliterated);
input = transliterated;
}
// Replace all whitespace with dashes
input = WHITESPACE.matcher(input).replaceAll("-");
// Remove all accent chars
input = normalizer.normalize(input,Form.NFD);
// Remove all non-latin special characters
input = NONLATIN.matcher(input).replaceAll("");
// Remove any consecutive dashes
input = normalizeDashes(input);
// Validate before returning
validateResult(input,origInput);
// Slug is always lowercase
return input.toLowerCase(Locale.ENGLISH);
}
项目:Java_CTe
文件:XmlUtil.java
public static String removeAcentos(String str) {
str = str.replaceAll("\r","");
str = str.replaceAll("\t","");
str = str.replaceAll("\n","");
str = str.replaceAll("&","E");
str = str.replaceAll(">\\s+<","><");
CharSequence cs = new StringBuilder(str == null ? "" : str);
return normalizer.normalize(cs,normalizer.Form.NFKD).replaceAll("\\p{InCombiningDiacriticalMarks}+","");
}
项目:jdk8u-jdk
文件:Pattern.java
/**
* The pattern is converted to normalizedD form and then a pure group
* is constructed to match canonical equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
// Convert pattern into normalizedD form
normalizedPattern = normalizer.normalize(pattern,i);
} else {
newPattern.appendCodePoint(c);
}
lastCodePoint = c;
i += Character.charCount(c);
}
normalizedPattern = newPattern.toString();
}
项目:jdk8u-jdk
文件:Pattern.java
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private String compoSEOnestep(String input) {
int len = countChars(input,normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
/**
* normalizes a <code>String</code> using the given normalization form.
*
* @param str the input string to be normalized.
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static String normalize(String str,options);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
项目:BIP39
文件:WordListHashing.java
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。