项目:solo-spring
文件:URICoder.java
@H_502_8@/**
* Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder.
*
* @p@R_404_6460@m s
* The string the encode (assuming ASCII ch@R_404_6460@cters only)
* @p@R_404_6460@m e
* A ch@R_404_6460@cter that does not require encoding if found in the
* string.
*/
private static String encode_UTF8(String s,char e) {
// Todo: normalizer requires Java 6!
String n = (normalizer.isnormalized(s,Form.NFKC)) ? s : normalizer.normalize(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
项目:solo-spring
文件:URICoder.java
@H_502_8@/**
* Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder.
*
* @p@R_404_6460@m s
* The string the encode (assuming ASCII ch@R_404_6460@cters only)
*/
private static String minimalEncode_UTF8(String s) {
// Todo: normalizer requires Java 6!
String n = (normalizer.isnormalized(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
@H_502_8@@Override
public Object translateBytes(byte[] bytes,DataFlavor flavor,long format,Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor),"UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes,charset));
}
if (format == CF_STRING) {
bytes = normalizer.normalize(new String(bytes,"UTF8"),Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes,flavor,format,transferable);
}
项目:bibliome-java-utils
文件:Strings.java
@H_502_8@/**
* Remove diacritics from the specified string.
* @p@R_404_6460@m s
* @return a copy of the specified string with diacritics removed.
*/
public static final String removeDiacritics(String s) {
String n = normalizer.normalize(s,Form.NFD);
StringBuilder sb = null;
for (int i = 0; i < n.length(); ++i) {
char c = n.ch@R_404_6460@t(i);
UnicodeBlock b = UnicodeBlock.of(c);
if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
if (sb == null) {
sb = new StringBuilder(n.length());
sb.append(n.substring(0,i));
}
continue;
}
if (sb != null)
sb.append(c);
}
if (sb == null)
return n;
return sb.toString();
}
项目:mycore
文件:MCRUtils.java
@H_502_8@private static String getHash(int iterations,byte[] salt,String text,String algorithm)
throws NoSuchAlgorithmException {
MessageDigest digest;
if (--iterations < 0) {
iterations = 0;
}
byte[] data;
try {
digest = MessageDigest.getInstance(algorithm);
text = normalizer.normalize(text,Form.NFC);
if (salt != null) {
digest.update(salt);
}
data = digest.digest(text.getBytes("UTF-8"));
for (int i = 0; i < iterations; i++) {
data = digest.digest(data);
}
} catch (UnsupportedEncodingException e) {
throw new MCRException("Could not get " + algorithm + " checksum",e);
}
return toHexString(data);
}
@H_502_8@String toString(List<List<TextPosition>> words)
{
StringBuilder stringBuilder = new StringBuilder();
boolean first = true;
for (List<TextPosition> word : words)
{
if (first)
first = false;
else
stringBuilder.append(' ');
for (TextPosition textPosition : word)
{
stringBuilder.append(textPosition.getUnicode());
}
}
// cf. http://stackoverflow.com/a/7171932/1729265
return normalizer.normalize(stringBuilder,Form.NFKC);
}
项目:MyVidCoRe
文件:Hash.java
@H_502_8@private static String getHash(int iterations,String str,String algorithm)
throws NoSuchAlgorithmException,UnsupportedEncodingException {
MessageDigest digest;
int it = iterations;
if (--it < 0) {
it = 0;
}
byte[] data;
digest = MessageDigest.getInstance(algorithm);
String text = normalizer.normalize(str,Form.NFC);
if (salt != null) {
digest.update(salt);
}
data = digest.digest(text.getBytes("UTF-8"));
for (int i = 0; i < it; i++) {
data = digest.digest(data);
}
return Hash.toHexString(data);
}
项目:packagedrone
文件:Users.java
@H_502_8@public static String hashIt ( final String salt,String data )
{
data = normalizer.normalize ( data,Form.NFC );
final byte[] strData = data.getBytes ( StandardCharsets.UTF_8 );
final byte[] saltData = salt.getBytes ( StandardCharsets.UTF_8 );
final byte[] first = new byte[saltData.length + strData.length];
System.arraycopy ( saltData,first,saltData.length );
System.arraycopy ( strData,saltData.length,strData.length );
final MessageDigest md = createDigest ();
byte[] digest = md.digest ( first );
final byte[] current = new byte[saltData.length + digest.length];
for ( int i = 0; i < 1000; i++ )
{
System.arraycopy ( saltData,current,saltData.length );
System.arraycopy ( digest,digest.length );
digest = md.digest ( current );
}
return Base64.getEncoder ().encodetoString ( digest );
}
@H_502_8@@Override
public Fragment expanded() {
char[] cs = super.innerText().toch@R_404_6460@rray();
String accent = charMap.get(innerText().substring(0,1));
if (accent == null) {
accent = "\uFFFD";
Message m = Message.builder("char.accent.unkNown")
.fromNode(this)
.addNote("Ch@R_404_6460@cter " + text + " cannot be expanded.")
.build();
Log.getInstance().add(m);
}
String str = "" + cs[1] + accent;
str = normalizer.normalize(str,Form.NFC);
return wrap("ACCENT",str);
}
项目:berlioz
文件:URICoder.java
@H_502_8@/**
* Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder.
*
* @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only)
* @p@R_404_6460@m e A ch@R_404_6460@cter that does not require encoding if found in the string.
*/
private static String encodeUTF8(String s,char e) {
String n = (normalizer.isnormalized(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = StandardCharsets.UTF_8.encode(n);
// URI encode
StringBuilder sb = new StringBuilder();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
项目:berlioz
文件:URICoder.java
@H_502_8@/**
* Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder.
*
* @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only)
*/
private static String minimalEncodeUTF8(String s) {
String n = (normalizer.isnormalized(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = StandardCharsets.UTF_8.encode(n);
// URI encode
StringBuilder sb = new StringBuilder();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
项目:infobip-open-jdk-8
文件:CDataTransferer.java
@H_502_8@@Override
public Object translateBytes(byte[] bytes,transferable);
}
项目:jdk8u-dev-jdk
文件:CDataTransferer.java
@H_502_8@@Override
public Object translateBytes(byte[] bytes,transferable);
}
项目:furi
文件:URICoder.java
@H_502_8@/**
* Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder.
*
* @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only)
* @p@R_404_6460@m e A ch@R_404_6460@cter that does not require encoding if found in the string.
*/
private static String encode_UTF8(String s,char e) {
// Todo: normalizer requires Java 6!
String n = (normalizer.isnormalized(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
项目:furi
文件:URICoder.java
@H_502_8@/**
* Encodes a string containing non ASCII ch@R_404_6460@cters using an UTF-8 encoder.
*
* @p@R_404_6460@m s The string the encode (assuming ASCII ch@R_404_6460@cters only)
*/
private static String minimalEncode_UTF8(String s) {
// Todo: normalizer requires Java 6!
String n = (normalizer.isnormalized(s,Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb,(byte) b);
}
}
return sb.toString();
}
项目:voj
文件:SlugifyUtils.java
@H_502_8@/**
* 获取字符串的Slug.
* @p@R_404_6460@m str - 待获取Slug的字符串
* @return 字符串对应的Slug
*/
public static String getSlug(String str) {
if ( str == null ) {
return "";
}
// Rid of White Spaces
String NowhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-");
// Processing Non-ASCII Ch@R_404_6460@cters
try {
NowhiteSpace = URLEncoder.encode(NowhiteSpace,"UTF-8");
} catch (UnsupportedEncodingException e) {
// Never reach here
}
// Slugify String
String normalized = normalizer.normalize(NowhiteSpace,Form.NFD);
return normalized.toLowerCase();
}
项目:package-drone
文件:Users.java
项目:srimporter
文件:SheetSerializer.java
@H_502_8@static String normalizeAccents(String regularstring) {
if (!g_bnormalize)
return regularstring; // leave the accents
String normalizedString = regularstring.replace("é","e");
normalizedString = normalizer.normalize(normalizedString,Form.NFD);
StringBuilder sb = new StringBuilder(normalizedString);
for (int i = 0; i < sb.length(); i++) {
if (Ch@R_404[email protected](sb.ch@R_404_6460@t(i)) == Ch@R_404[email protected]_SPACING_MARK) {
sb.delete(i,1);
}
}
regularstring = sb.toString();
return regularstring;
}
项目:wikipedia_indexer
文件:AccentsDefault.java
@H_502_8@public void apply(TokenStream stream) throws TokenizerException {
if (stream == null)
return;
stream.reset();
while (stream.hasNext()) {
String token = stream.next();
// String tmp = normalizer.normalize(token,Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+","");
String tmp = normalizer.normalize(token,Form.NFD);
tmp = tmp.replaceAll("[\\p{InCombiningDiacriticalMarks}]","");
// .replaceAll("\\p{InCombiningDiacriticalMarks}+","");
if(!token.equals(tmp)) {
stream.prevIoUs();
stream.set(tmp);
stream.next();
}
}
}
@H_502_8@public static String convertToAlphaNumerics(String value) {
logger.debug("Before : " + value);
value = normalizer.normalize(value,Form.NFD);
value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]","");
value = value.replaceAll("[^-_a-zA-Z0-9\\s]","").replace(" ","");
logger.debug("After : " + value);
return value;
}
@H_502_8@public static String deAccent(String value) {
logger.debug("Before : " + value);
String nfdnormalizedString = normalizer.normalize(value,Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
value = pattern.matcher(nfdnormalizedString).replaceAll("");
logger.debug("After : " + value);
return value;
}
@H_502_8@public static String convertToAlphaNumerics(String value,Integer countApp)
throws UnsupportedEncodingException {
value = new String(value.getBytes("ISO-8859-1"),"UTF-8");
value = normalizer.normalize(value,Form.NFD);
value = value.replaceAll("[^\\p{ASCII}]","")
.replaceAll("[^a-zA-Z0-9\\s]","");
if (value.equalsIgnoreCase("")) {
value = "default" + countApp;
}
return value;
}
项目:sunbird-utils
文件:Slug.java
@H_502_8@public static String makeSlug(String input,boolean transliterate) {
String origInput = input;
// Validate the input
if (input == null) {
ProjectLogger.log("Provided input value is null");
return input;
}
// Remove extra spaces
input = input.trim();
// Remove URL encoding
input = urlDecode(input);
// If transliterate is required
if (transliterate) {
// Tranlisterate & cleanup
String transliterated = transliterate(input);
// transliterated = removeDuplicateChars(transliterated);
input = transliterated;
}
// Replace all whitespace with dashes
input = WHITESPACE.matcher(input).replaceAll("-");
// Remove all accent chars
input = normalizer.normalize(input,Form.NFD);
// Remove all non-latin special ch@R_404_6460@cters
input = NONLATIN.matcher(input).replaceAll("");
// Remove any consecutive dashes
input = normalizeDashes(input);
// Validate before returning
validateResult(input,origInput);
// Slug is always lowercase
return input.toLowerCase(Locale.ENGLISH);
}
项目:openjdk-jdk10
文件:Pattern.java
@H_502_8@/**
* Attempts to compose input by combining the first ch@R_404_6460@cter
* with the first combining mark following it. Returns a String
* that is the composition of the leading ch@R_404_6460@cter with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two ch@R_404_6460@cters cannot be further composed.
*/
private static String compoSEOnestep(String input) {
int len = countChars(input,2);
String firstTwoCh@R_404_6460@cters = input.substring(0,len);
String result = normalizer.normalize(firstTwoCh@R_404_6460@cters,normalizer.Form.NFC);
if (result.equals(firstTwoCh@R_404_6460@cters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
项目:openjdk-jdk10
文件:Pattern.java
@H_502_8@boolean match(Matcher matcher,int i,CharSequence seq) {
if (i < matcher.to) {
int ch0 = Ch@R_404[email protected](seq,i);
int n = Ch@R_404[email protected](ch0);
int j = i + n;
while (j < matcher.to) {
int ch1 = Ch@R_404[email protected](seq,j);
if (Grapheme.isBoundary(ch0,ch1))
break;
ch0 = ch1;
j += Ch@R_404[email protected](ch1);
}
if (i + n == j) { // single,assume nfc cp
if (predicate.is(ch0))
return next.match(matcher,j,seq);
} else {
while (i + n < j) {
String nfc = normalizer.normalize(
seq.toString().substring(i,j),normalizer.Form.NFC);
if (nfc.codePointCount(0,nfc.length()) == 1) {
if (predicate.is(nfc.codePointAt(0)) &&
next.match(matcher,seq)) {
return true;
}
}
ch0 = Ch@R_404[email protected](seq,j);
j -= Ch@R_404[email protected](ch0);
}
}
if (j < matcher.to)
return false;
}
matcher.hitEnd = true;
return false;
}
项目:openjdk-jdk10
文件:CDataTransferer.java
@H_502_8@@Override
public Object translateBytes(byte[] bytes,Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
String charset = Charset.defaultCharset().name();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor),StandardCharsets.UTF_8);
} catch (UnsupportedFlavorException cannotHappen) {
}
}
String xml = new String(bytes,charset);
// macosx pasteboard returns a property list that consists of one URL
// let's extract it.
return new URL(extractURL(xml));
}
if(isUriListFlavor(flavor) && format == CF_FILE) {
// dragQueryFile works fine with files and url,// it parses and extracts values from property list.
// maxosx always returns property list for
// CF_URL and CF_FILE
String[] strings = dragQueryFile(bytes);
if(strings == null) {
return null;
}
bytes = String.join(System.getProperty("line.sep@R_404_6460@tor"),strings).getBytes();
// Now we extracted uri from xml,Now we should treat it as
// regular string that allows to translate data to target represantation
// class by base method
format = CF_STRING;
} else if (format == CF_STRING) {
bytes = normalizer.normalize(new String(bytes,Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes,transferable);
}
项目:smarti
文件:StringUtils.java
@H_502_8@/**
* provides the slug name for the parsed input
* @p@R_404_6460@m input
* @return
*/ //from https://stackoverflow.com/questions/1657193/java-code-library-for-generating-slugs-for-use-in-pretty-urls
public static String toSlug(String input) {
String Nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = normalizer.normalize(Nowhitespace,Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ROOT);
}
项目:openjdk9
文件:Pattern.java
@H_502_8@/**
* Attempts to compose input by combining the first ch@R_404_6460@cter
* with the first combining mark following it. Returns a String
* that is the composition of the leading ch@R_404_6460@cter with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two ch@R_404_6460@cters cannot be further composed.
*/
private static String compoSEOnestep(String input) {
int len = countChars(input,normalizer.Form.NFC);
if (result.equals(firstTwoCh@R_404_6460@cters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
项目:openjdk9
文件:Pattern.java
@H_502_8@boolean match(Matcher matcher,j);
j -= Ch@R_404[email protected](ch0);
}
}
if (j < matcher.to)
return false;
}
matcher.hitEnd = true;
return false;
}
@H_502_8@public static String normalizeText(String text) {
text = text.toLowerCase(Locale.getDefault());
text = new MCRHyphennormalizer().normalize(text).replace("-"," ");
text = normalizer.normalize(text,Form.NFD).replaceAll("\\p{M}",""); //canonical decomposition,remove accents
text = text.replace("ue","u").replace("oe","o").replace("ae","a").replace("ß","s").replace("ss","s");
text = text.replaceAll("[^a-z0-9]\\s]",""); //remove all non-alphabetic ch@R_404_6460@cters
// text = text.replaceAll("\\b.{1,3}\\b"," ").trim(); // remove all words with fewer than four ch@R_404_6460@cters
text = text.replaceAll("\\p{Punct}"," ").trim(); // remove all punctuation
text = text.replaceAll("\\s+"," "); // normalize whitespace
return text;
}
项目:mycore
文件:MCRNameMerger.java
@H_502_8@private String normalize(String nameFragment) {
String text = nameFragment.toLowerCase(Locale.getDefault());
text = new MCRHyphennormalizer().normalize(text).replace("-",""); // canonical decomposition,then remove accents
text = text.replace("ue",""); //remove all non-alphabetic ch@R_404_6460@cters
text = text.replaceAll("\\p{Punct}"," "); // normalize whitespace
return text.trim();
}
项目:eSDK_EC_SDK_Java
文件:StringUtils.java
项目:zest-writer
文件:ZdsHttp.java
@H_502_8@/**
* Transform any string on slug. Just alphanumeric,dash or underscore ch@R_404_6460@cters.
* @p@R_404_6460@m input string to convert on slug
* @return slug string
*/
public static String toSlug(String input) {
String Nowhitespace = Constant.WHITESPACE.matcher(input).replaceAll("-");
String normalized = normalizer.normalize(Nowhitespace,Form.NFD);
String slug = Constant.NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
项目:engerek
文件:DiacriticsFilter.java
@H_502_8@@Override
public <T extends Object> PrismPropertyValue<T> apply(PrismPropertyValue<T> propertyValue) {
Validate.notNull(propertyValue,"Node must not be null.");
String text = getStringValue(propertyValue);
if (StringUtils.isEmpty(text)) {
return propertyValue;
}
String newValue = normalizer.normalize(text,Form.NFD).replaceAll(
"\\p{InCombiningDiacriticalMarks}+","");
propertyValue.setValue((T) newValue);
return propertyValue;
}
项目:site
文件:PostEntity.java
@H_502_8@final String generateSlug(final String suggestedSlug,final String newTitle) {
String rv = suggestedSlug;
if (rv == null || rv.trim().isEmpty()) {
rv = normalizer.normalize(newTitle.toLowerCase(),Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}|[^\\w\\s]","").replaceAll("[\\s-]+"," ").trim().replaceAll("\\s","-");
}
return rv;
}
项目:EventManager-JEE
文件:Event.java
@H_502_8@public static String generateSlug(String input,Date createdAt) {
Pattern NONLATIN = Pattern.compile("[^\\w-]");
Pattern WHITESPACE = Pattern.compile("[\\s]");
SecureRandom random = new SecureRandom(createdAt.toString().getBytes());
String Nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = normalizer.normalize(Nowhitespace,Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
String lowerCase = slug.toLowerCase(Locale.ENGLISH);
String unique = lowerCase + "-" + new BigInteger(130,random).toString(32).substring(0,6);
return unique;
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。