项目:picocli
文件:CommandLine.java
private int copy(BreakIterator line,Text text,Text columnValue,int offset) {
// Deceive the BreakIterator to ensure no line breaks after '-' character
line.setText(text.plainString().replace("-","\u00ff"));
int done = 0;
for (int start = line.first(),end = line.next(); end != BreakIterator.DONE; start = end,end = line.next()) {
Text word = text.substring(start,end); //.replace("\u00ff","-"); // not needed
if (columnValue.maxLength >= offset + done + length(word)) {
done += copy(word,columnValue,offset + done); // Todo localized length
} else {
break;
}
}
if (done == 0 && length(text) > columnValue.maxLength) {
// The value is a single word that is too big to be written to the column. Write as much as we can.
done = copy(text,offset);
}
return done;
}
private Vector testLastAndPrevIoUs(BreakIterator bi,String text) {
int p = bi.last();
int lastP = p;
Vector<String> result = new Vector<String>();
if (p != text.length())
errln("last() returned " + p + " instead of " + text.length());
while (p != BreakIterator.DONE) {
p = bi.prevIoUs();
if (p != BreakIterator.DONE) {
if (p >= lastP)
errln("prevIoUs() Failed to move backward: prevIoUs() on position "
+ lastP + " yielded " + p);
result.insertElementAt(text.substring(p,lastP),0);
}
else {
if (lastP != 0)
errln("prevIoUs() returned DONE prematurely: offset was "
+ lastP + " instead of 0");
}
lastP = p;
}
return result;
}
项目:intellij-spring-assistant
文件:Util.java
@NotNull
public static String getFirstSentenceWithoutDot(String fullSentence) {
if (containsChar(fullSentence,'.')) {
BreakIterator breakIterator = getSentenceInstance(Locale.US);
breakIterator.setText(fullSentence);
fullSentence = fullSentence.substring(breakIterator.first(),breakIterator.next()).trim();
}
if (isNotEmpty(fullSentence)) {
String withoutDot = endsWithChar(fullSentence,'.') ?
fullSentence.substring(0,fullSentence.length() - 1) :
fullSentence;
return replace(withoutDot,"\n","");
} else {
return "";
}
}
public void TestBug4153072() {
BreakIterator iter = BreakIterator.getWordInstance();
String str = "...Hello,World!...";
int begin = 3;
int end = str.length() - 3;
boolean gotException = false;
boolean dummy;
iter.setText(new StringCharacterIterator(str,begin,end,begin));
for (int index = -1; index < begin + 1; ++index) {
try {
dummy = iter.isBoundary(index);
if (index < begin)
errln("Didn't get exception with offset = " + index +
" and begin index = " + begin);
}
catch (IllegalArgumentException e) {
if (index >= begin)
errln("Got exception with offset = " + index +
" and begin index = " + begin);
}
}
}
@Override
public String[] split(String text) {
boundary.setText(text);
ArrayList<String> sentences = new ArrayList<>();
int start = boundary.first();
for (int end = boundary.next();
end != BreakIterator.DONE;
start = end,end = boundary.next()) {
sentences.add(text.substring(start,end).trim());
}
String[] array = new String[sentences.size()];
for (int i = 0; i < array.length; i++) {
array[i] = sentences.get(i);
}
return array;
}
private StyleSpans<Collection<String>> highlightMisspelled(String text) {
StyleSpansBuilder<Collection<String>> spansBuilder = new StyleSpansBuilder<>();
BreakIterator wb = BreakIterator.getWordInstance();
wb.setText(text);
int lastIndex = wb.first();
int lastKeywordEnd = 0;
while(lastIndex != BreakIterator.DONE) {
int firstIndex = lastIndex;
lastIndex = wb.next();
if(lastIndex != BreakIterator.DONE
&& Character.isLetterOrDigit(text.charat(firstIndex))) {
String word = text.substring(firstIndex,lastIndex).toLowerCase();
if(!dictionary.contains(word)) {
spansBuilder.add(Collections.emptyList(),firstIndex - lastKeywordEnd);
spansBuilder.add(Collections.singleton("underlined"),lastIndex - firstIndex);
lastKeywordEnd = lastIndex;
}
}
}
spansBuilder.add(Collections.emptyList(),text.length() - lastKeywordEnd);
return spansBuilder.create();
}
项目:openjdk-jdk10
文件:Columns.java
private List<String> piecesOfEmbeddedLine( String line,int width ) {
List<String> pieces = new ArrayList<String>();
BreakIterator words = BreakIterator.getLineInstance( Locale.US );
words.setText( line );
StringBuilder nextPiece = new StringBuilder();
int start = words.first();
for ( int end = words.next(); end != DONE; start = end,end = words.next() )
nextPiece = processNextWord( line,nextPiece,start,width,pieces );
if ( nextPiece.length() > 0 )
pieces.add( nextPiece.toString() );
return pieces;
}
项目:elasticsearch_my
文件:FastVectorHighlighter.java
private static BoundaryScanner getBoundaryScanner(Field field) {
final FieldOptions fieldOptions = field.fieldOptions();
final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale();
switch(fieldOptions.boundaryScannerType()) {
case SENTENCE:
if (boundaryScannerLocale != null) {
return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale));
}
return DEFAULT_SENTENCE_BOUNDARY_SCANNER;
case WORD:
if (boundaryScannerLocale != null) {
return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale));
}
return DEFAULT_WORD_BOUNDARY_SCANNER;
default:
if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
|| fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(),fieldOptions.boundaryChars());
}
return DEFAULT_SIMPLE_BOUNDARY_SCANNER;
}
}
项目:openjdk-jdk10
文件:TextComponent.java
/**
* Needed to unify forward and backward searching.
* The method assumes that s is the text assigned to words.
*/
private int findWordLimit(int index,BreakIterator words,boolean direction,String s) {
// Fix for 4256660 and 4256661.
// Words iterator is different from character and sentence iterators
// in that end of one word is not necessarily start of another word.
// Please see java.text.BreakIterator JavaDoc. The code below is
// based on nextWordStartAfter example from BreakIterator.java.
int last = (direction == NEXT) ? words.following(index)
: words.preceding(index);
int current = (direction == NEXT) ? words.next()
: words.prevIoUs();
while (current != BreakIterator.DONE) {
for (int p = Math.min(last,current); p < Math.max(last,current); p++) {
if (Character.isLetter(s.charat(p))) {
return last;
}
}
last = current;
current = (direction == NEXT) ? words.next()
: words.prevIoUs();
}
return BreakIterator.DONE;
}
项目:lams
文件:ThaiTokenizer.java
@Override
protected boolean incrementWord() {
int start = wordBreaker.current();
if (start == BreakIterator.DONE) {
return false; // BreakIterator exhausted
}
// find the next set of boundaries,skipping over non-tokens
int end = wordBreaker.next();
while (end != BreakIterator.DONE &&
!Character.isLetterOrDigit(Character.codePointAt(buffer,sentenceStart + start,sentenceEnd))) {
start = end;
end = wordBreaker.next();
}
if (end == BreakIterator.DONE) {
return false; // BreakIterator exhausted
}
clearattributes();
termAtt.copyBuffer(buffer,end - start);
offsetAtt.setoffset(correctOffset(offset + sentenceStart + start),correctOffset(offset + sentenceStart + end));
return true;
}
项目:lams
文件:SegmentingTokenizerBase.java
/**
* return true if there is a token from the buffer,or null if it is
* exhausted.
*/
private boolean incrementSentence() throws IOException {
if (length == 0) // we must refill the buffer
return false;
while (true) {
int start = iterator.current();
if (start == BreakIterator.DONE)
return false; // BreakIterator exhausted
// find the next set of boundaries
int end = iterator.next();
if (end == BreakIterator.DONE)
return false; // BreakIterator exhausted
setNextSentence(start,end);
if (incrementWord()) {
return true;
}
}
}
项目:powertext
文件:DocumentWordTokenizer.java
/**
* Creates a new DocumentWordTokenizer to work on a document
* @param document The document to spell check
*/
public DocumentWordTokenizer(Document document) {
this.document = document;
//Create a text segment over the entire document
text = new Segment();
sentenceIterator = BreakIterator.getSentenceInstance();
try {
document.getText(0,document.getLength(),text);
sentenceIterator.setText(text);
// robert: use text.getBeginIndex(),not 0,for segment's first offset
currentWordPos = getNextWordStart(text,text.getBeginIndex());
//If the current word pos is -1 then the string was all white space
if (currentWordPos != -1) {
currentWordEnd = getNextWordEnd(text,currentWordPos);
nextWordPos = getNextWordStart(text,currentWordEnd);
} else {
moretokens = false;
}
} catch (BadLocationException ex) {
moretokens = false;
}
}
项目:openjdk-jdk10
文件:Bug4533872.java
void TestIsBoundary() {
iter = BreakIterator.getWordInstance(Locale.US);
for (int i = 0; i < given.length; i++) {
iter.setText(given[i]);
start = iter.first();
end = iter.next();
while (end < given[i].length()) {
if (!iter.isBoundary(end)) {
errln("Word break failure: isBoundary() This should be a boundary. Index=" +
end + " for " + given[i]);
}
end = iter.next();
}
}
}
DocCommentParser(ParserFactory fac,DiagnosticSource diagSource,Comment comment) {
this.fac = fac;
this.diagSource = diagSource;
this.comment = comment;
names = fac.names;
m = fac.docTreeMaker;
Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
Options options = fac.options;
boolean useBreakIterator = options.isSet("breakIterator");
if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
sentenceBreaker = BreakIterator.getSentenceInstance(locale);
initTagParsers();
}
/**
* Needed to unify forward and backward searching.
* The method assumes that s is the text assigned to words.
*/
private int findWordLimit(int index,current); p++) {
if (Character.isLetter(s.charat(p))) {
return last;
}
}
last = current;
current = (direction == NEXT) ? words.next()
: words.prevIoUs();
}
return BreakIterator.DONE;
}
项目:openjdk-jdk10
文件:Bug4533872.java
void TestPrintAt_1() {
iter = BreakIterator.getWordInstance(Locale.US);
int[][] index = {
{2,8,10,15,17},{1,12,17,20},{3,13,16,18,{4,6,9,16},};
for (int i = 0; i < given.length; i++) {
iter.setText(given[i]);
for (int j = index[i].length-1; j >= 0; j--) {
end = iter.following(index[i][j]);
start = iter.prevIoUs();
if (!expected[i][j].equals(given[i].substring(start,end))) {
errln("Word break failure: printAt_1() expected:<" +
expected[i][j] + ">,got:<" +
given[i].substring(start,end) +
"> start=" + start + " end=" + end);
}
}
}
}
项目:Yass
文件:DocumentWordTokenizer.java
/**
* Creates a new DocumentWordTokenizer to work on a document
*
* @param document The document to spell check
*/
public DocumentWordTokenizer(Document document) {
this.document = document;
//Create a text segment over the entire document
text = new Segment();
sentenceIterator = BreakIterator.getSentenceInstance();
try {
document.getText(0,text);
sentenceIterator.setText(text);
currentWordPos = getNextWordStart(text,0);
//If the current word pos is -1 then the string was all white space
if (currentWordPos != -1) {
currentWordEnd = getNextWordEnd(text,currentWordPos);
nextWordPos = getNextWordStart(text,currentWordEnd);
}
else {
moretokens = false;
}
}
catch (BadLocationException ex) {
moretokens = false;
}
}
项目:jdk8u-jdk
文件:TextComponent.java
/**
* Needed to unify forward and backward searching.
* The method assumes that s is the text assigned to words.
*/
private int findWordLimit(int index,current); p++) {
if (Character.isLetter(s.charat(p))) {
return last;
}
}
last = current;
current = (direction == NEXT) ? words.next()
: words.prevIoUs();
}
return BreakIterator.DONE;
}
项目:openjdk-jdk10
文件:Bug8001562.java
public static void main(String[] args) {
List<Locale> avail = Arrays.asList(BreakIterator.getAvailableLocales());
diffLocale(BreakIterator.class,avail);
avail = Arrays.asList(Collator.getAvailableLocales());
diffLocale(Collator.class,avail);
avail = Arrays.asList(DateFormat.getAvailableLocales());
diffLocale(DateFormat.class,avail);
avail = Arrays.asList(DateFormatSymbols.getAvailableLocales());
diffLocale(DateFormatSymbols.class,avail);
avail = Arrays.asList(DecimalFormatSymbols.getAvailableLocales());
diffLocale(DecimalFormatSymbols.class,avail);
avail = Arrays.asList(NumberFormat.getAvailableLocales());
diffLocale(NumberFormat.class,avail);
avail = Arrays.asList(Locale.getAvailableLocales());
diffLocale(Locale.class,avail);
}
项目:incubator-netbeans
文件:LocalizedBundleInfo.java
private static String[] splitBySentence(String text) {
List<String> sentences = new ArrayList<String>();
// Use Locale.US since the customizer is setting the default (US) locale text only:
BreakIterator it = BreakIterator.getSentenceInstance(Locale.US);
it.setText(text);
int start = it.first();
int end;
while ((end = it.next()) != BreakIterator.DONE) {
sentences.add(text.substring(start,end));
start = end;
}
return sentences.toArray(new String[sentences.size()]);
}
项目:openjdk-jdk10
文件:LineBreakMeasurer.java
/**
* Constructs a {@code LineBreakMeasurer} for the specified text.
*
* @param text the text for which this {@code LineBreakMeasurer}
* produces {@code TextLayout} objects; the text must contain
* at least one character; if the text available through
* {@code iter} changes,further calls to this
* {@code LineBreakMeasurer} instance are undefined (except,* in some cases,when {@code insertChar} or
* {@code deleteChar} are invoked afterward - see below)
* @param breakIter the {@link BreakIterator} which defines line
* breaks
* @param frc contains @R_472_4045@ion about a graphics device which is
* needed to measure the text correctly;
* text measurements can vary slightly depending on the
* device resolution,and attributes such as antialiasing; this
* parameter does not specify a translation between the
* {@code LineBreakMeasurer} and user space
* @throws IllegalArgumentException if the text has less than one character
* @see LineBreakMeasurer#insertChar
* @see LineBreakMeasurer#deleteChar
*/
public LineBreakMeasurer(AttributedCharacterIterator text,BreakIterator breakIter,FontRenderContext frc) {
if (text.getEndindex() - text.getBeginIndex() < 1) {
throw new IllegalArgumentException("Text must contain at least one character.");
}
this.breakIter = breakIter;
this.measurer = new TextMeasurer(text,frc);
this.limit = text.getEndindex();
this.pos = this.start = text.getBeginIndex();
charIter = new ChararrayIterator(measurer.getChars(),this.start);
this.breakIter.setText(charIter);
}
项目:incubator-netbeans
文件:BaseUtilities.java
/** Wrap multi-line strings.
* @param original the original string to wrap
* @param width the maximum width of lines
* @param breakIterator algorithm for breaking lines
* @param removeNewLines if <code>true</code>,any newlines in the original string are ignored
* @return the whole string with embedded newlines
*/
public static String wrapString(String original,int width,BreakIterator breakIterator,boolean removeNewLines) {
String[] sarray = wrapStringToArray(original,breakIterator,removeNewLines);
StringBuilder retBuf = new StringBuilder();
for (int i = 0; i < sarray.length; i++) {
retBuf.append(sarray[i]);
retBuf.append('\n');
}
return retBuf.toString();
}
public void TestGetAvailableLocales()
{
Locale[] locList = BreakIterator.getAvailableLocales();
if (locList.length == 0)
errln("getAvailableLocales() returned an empty list!");
// I have no idea how to test this function...
}
项目:elasticsearch_my
文件:CustomPostingsHighlighter.java
项目:annoflex
文件:textformatter.java
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a",expected,actual);
assertSameBreaks("ab",actual);
assertSameBreaks("abc",actual);
assertSameBreaks("",actual);
}
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a000",1,actual);
assertSameBreaks("ab000",actual);
assertSameBreaks("abc000",actual);
assertSameBreaks("000",actual);
}
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000a000",3,actual);
assertSameBreaks("000ab000",2,actual);
assertSameBreaks("000abc000",actual);
assertSameBreaks("000000",actual);
}
项目:openjdk-jdk10
文件:DocLocale.java
/**
* Constructor
*/
DocLocale(DocEnv docenv,String localeName,boolean useBreakIterator) {
this.docenv = docenv;
this.localeName = localeName;
this.useBreakIterator = useBreakIterator;
locale = getLocale();
if (locale == null) {
docenv.exit();
} else {
Locale.setDefault(locale); // NOTE: updating global state
}
collator = Collator.getInstance(locale);
sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
项目:lyra2-java
文件:CommandLine.java
项目:mongol-library
文件:MongolEditText.java
@Override
public boolean onDoubleTap(MotionEvent e) {
int x = (int) e.getX();
int y = (int) e.getY();
// find the position
int offset = getoffsetForPosition(x,y);
// select word
BreakIterator iterator = BreakIterator.getWordInstance();
iterator.setText(getText().toString());
// start and end are the word boundaries;
int start;
if (iterator.isBoundary(offset)) {
start = offset;
} else {
start = iterator.preceding(offset);
}
int end = iterator.following(offset);
// handle tapping at the very beginning or end.
if (end == BreakIterator.DONE) {
end = start;
start = iterator.preceding(offset);
if (start == BreakIterator.DONE) start = end;
}
setSelection(start,end);
return super.onDoubleTap(e);
}
项目:Java-data-science-Cookbook
文件:SentenceDetection.java
public void useSentenceIterator(String source){
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
iterator.setText(source);
int start = iterator.first();
for (int end = iterator.next();
end != BreakIterator.DONE;
start = end,end = iterator.next()) {
System.out.println(source.substring(start,end));
}
}
项目:Java-data-science-Cookbook
文件:WordDetection.java
public void useBreakIterator(String input){
System.out.println("Break Iterator");
BreakIterator tokenizer = BreakIterator.getWordInstance();
tokenizer.setText(input);
int start = tokenizer.first();
for (int end = tokenizer.next();
end != BreakIterator.DONE;
start = end,end = tokenizer.next()) {
System.out.println(input.substring(start,end));
}
}
项目:myfaces-trinidad
文件:SimpleInputTextRenderer.java
private void _writeTextWithBreaks(
FacesContext context,BreakIterator breaks,String textString,int columns
) throws IOException
{
int start = 0;
while (true)
{
int nextLineBreak = textString.indexOf('\n',start);
String substring;
if (nextLineBreak >= 0)
substring = textString.substring(start,nextLineBreak);
else
substring = textString.substring(start);
_writeTextLineWithBreaks(context,breaks,substring,columns);
if (nextLineBreak < 0)
break;
start = nextLineBreak + 1;
char[] chars = new char['\n'];
context.getResponseWriter().write(chars,1);
}
}
/**
* Sets the iterator to refer to the first boundary position following
* the specified position.
* @offset The position from which to begin searching for a break position.
* @return The position of the first break after the current position.
*/
@Override
public int following(int offset) {
CharacterIterator text = getText();
checkOffset(offset,text);
// Set our internal iteration position (temporarily)
// to the position passed in. If this is the _beginning_ position,// then we can just use next() to get our return value
text.setIndex(offset);
if (offset == text.getBeginIndex()) {
cachedLastKNownBreak = handleNext();
return cachedLastKNownBreak;
}
// otherwise,we have to sync up first. Use handlePrevIoUs() to back
// us up to a kNown break position before the specified position (if
// we can determine that the specified position is a break position,// we don't back up at all). This may or may not be the last break
// position at or before our starting position. Advance forward
// from here until we've passed the starting position. The position
// we stop on will be the first break position after the specified one.
int result = cachedLastKNownBreak;
if (result >= offset || result <= BreakIterator.DONE) {
result = handlePrevIoUs();
} else {
//it might be better to check if handlePrevIoUs() give us closer
//safe value but handlePrevIoUs() is slow too
//So,this has to be done carefully
text.setIndex(result);
}
while (result != BreakIterator.DONE && result <= offset) {
result = handleNext();
}
cachedLastKNownBreak = result;
return result;
}
项目:DolphinNG
文件:StringUtil.java
/**
* Truncates given string based on number of words required
*
* @param n - number of words required
* @param s - input string
* @return
*/
public static String truncateAfterWords(int n,String s)
{
if (s == null) return null;
BreakIterator wb = BreakIterator.getWordInstance();
wb.setText(s);
int pos = 0;
for (int i = 0; i < n && pos != BreakIterator.DONE && pos < s.length(); )
{
if (Character.isLetter(s.codePointAt(pos))) i++;
pos = wb.next();
}
if (pos == BreakIterator.DONE || pos >= s.length()) return s;
return s.substring(0,pos);
}
项目:Elasticsearch
文件:CustomPostingsHighlighter.java
/**
* Constructor
*/
DocLocale(DocEnv docenv,boolean useBreakIterator) {
this.docenv = docenv;
this.localeName = localeName;
this.useBreakIterator = useBreakIterator;
locale = getLocale();
if (locale == null) {
docenv.exit();
} else {
Locale.setDefault(locale); // NOTE: updating global state
}
collator = Collator.getInstance(locale);
sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
/**
* Constructs a <code>LineBreakMeasurer</code> for the specified text.
*
* @param text the text for which this <code>LineBreakMeasurer</code>
* produces <code>TextLayout</code> objects; the text must contain
* at least one character; if the text available through
* <code>iter</code> changes,further calls to this
* <code>LineBreakMeasurer</code> instance are undefined (except,when <code>insertChar</code> or
* <code>deleteChar</code> are invoked afterward - see below)
* @param breakIter the {@link BreakIterator} which defines line
* breaks
* @param frc contains @R_472_4045@ion about a graphics device which is
* needed to measure the text correctly;
* text measurements can vary slightly depending on the
* device resolution,and attributes such as antialiasing; this
* parameter does not specify a translation between the
* <code>LineBreakMeasurer</code> and user space
* @throws IllegalArgumentException if the text has less than one character
* @see LineBreakMeasurer#insertChar
* @see LineBreakMeasurer#deleteChar
*/
public LineBreakMeasurer(AttributedCharacterIterator text,this.start);
this.breakIter.setText(charIter);
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。