ICU 63.1  63.1
rbbi.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 1999-2016 International Business Machines Corporation *
6 * and others. All rights reserved. *
7 ***************************************************************************
8 
9 **********************************************************************
10 * Date Name Description
11 * 10/22/99 alan Creation.
12 * 11/11/99 rgillam Complete port from Java.
13 **********************************************************************
14 */
15 
16 #ifndef RBBI_H
17 #define RBBI_H
18 
19 #include "unicode/utypes.h"
20 
26 #if !UCONFIG_NO_BREAK_ITERATION
27 
28 #include "unicode/brkiter.h"
29 #include "unicode/udata.h"
30 #include "unicode/parseerr.h"
31 #include "unicode/schriter.h"
32 
34 
36 class LanguageBreakEngine;
37 struct RBBIDataHeader;
38 class RBBIDataWrapper;
39 class UnhandledEngine;
40 class UStack;
41 
54 
55 private:
60  UText fText;
61 
62 #ifndef U_HIDE_INTERNAL_API
63 public:
64 #endif /* U_HIDE_INTERNAL_API */
65 
70  RBBIDataWrapper *fData;
71 private:
72 
77  int32_t fPosition;
78 
82  int32_t fRuleStatusIndex;
83 
87  class BreakCache;
88  BreakCache *fBreakCache;
89 
94  class DictionaryCache;
95  DictionaryCache *fDictionaryCache;
96 
104  UStack *fLanguageBreakEngines;
105 
113  UnhandledEngine *fUnhandledBreakEngine;
114 
120  uint32_t fDictionaryCharCount;
121 
127  CharacterIterator *fCharIter;
128 
134  StringCharacterIterator fSCharIter;
135 
139  UBool fDone;
140 
141  //=======================================================================
142  // constructors
143  //=======================================================================
144 
155  RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
156 
158  friend class RBBIRuleBuilder;
160  friend class BreakIterator;
161 
162 public:
163 
169 
177 
187  UParseError &parseError,
188  UErrorCode &status);
189 
213  RuleBasedBreakIterator(const uint8_t *compiledRules,
214  uint32_t ruleLength,
215  UErrorCode &status);
216 
230 
236 
245 
254  virtual UBool operator==(const BreakIterator& that) const;
255 
263  UBool operator!=(const BreakIterator& that) const;
264 
275  virtual BreakIterator* clone() const;
276 
282  virtual int32_t hashCode(void) const;
283 
289  virtual const UnicodeString& getRules(void) const;
290 
291  //=======================================================================
292  // BreakIterator overrides
293  //=======================================================================
294 
320  virtual CharacterIterator& getText(void) const;
321 
322 
337  virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
338 
346  virtual void adoptText(CharacterIterator* newText);
347 
359  virtual void setText(const UnicodeString& newText);
360 
374  virtual void setText(UText *text, UErrorCode &status);
375 
381  virtual int32_t first(void);
382 
388  virtual int32_t last(void);
389 
400  virtual int32_t next(int32_t n);
401 
407  virtual int32_t next(void);
408 
414  virtual int32_t previous(void);
415 
423  virtual int32_t following(int32_t offset);
424 
432  virtual int32_t preceding(int32_t offset);
433 
442  virtual UBool isBoundary(int32_t offset);
443 
452  virtual int32_t current(void) const;
453 
454 
486  virtual int32_t getRuleStatus() const;
487 
511  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
512 
524  virtual UClassID getDynamicClassID(void) const;
525 
537  static UClassID U_EXPORT2 getStaticClassID(void);
538 
565  virtual BreakIterator * createBufferClone(void *stackBuffer,
566  int32_t &BufferSize,
567  UErrorCode &status);
568 
569 
587  virtual const uint8_t *getBinaryRules(uint32_t &length);
588 
615 
616 
617 private:
618  //=======================================================================
619  // implementation
620  //=======================================================================
626  void reset(void);
627 
632  void init(UErrorCode &status);
633 
643  int32_t handleSafePrevious(int32_t fromPosition);
644 
657  int32_t handleNext();
658 
659 
666  const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
667 
668  public:
669 #ifndef U_HIDE_INTERNAL_API
670 
674  void dumpCache();
675 
680  void dumpTables();
681 
682 #endif /* U_HIDE_INTERNAL_API */
683 };
684 
685 //------------------------------------------------------------------------------
686 //
687 // Inline Functions Definitions ...
688 //
689 //------------------------------------------------------------------------------
690 
692  return !operator==(that);
693 }
694 
696 
697 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
698 
699 #endif
icu::RuleBasedBreakIterator::adoptText
virtual void adoptText(CharacterIterator *newText)
Set the iterator to analyze a new piece of text.
icu::BreakIterator
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:102
parseerr.h
C API: Parse Error Information.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225
icu::RuleBasedBreakIterator::preceding
virtual int32_t preceding(int32_t offset)
Sets the iterator to refer to the last boundary position before the specified position.
icu::RuleBasedBreakIterator::fData
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
Definition: rbbi.h:70
icu::RuleBasedBreakIterator::dumpCache
void dumpCache()
Debugging function only.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator()
Default constructor.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
icu::operator==
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:300
icu::RuleBasedBreakIterator::operator=
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
icu::RuleBasedBreakIterator::~RuleBasedBreakIterator
virtual ~RuleBasedBreakIterator()
Destructor.
brkiter.h
C++ API: Break Iterator.
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
icu::RuleBasedBreakIterator::getRuleStatus
virtual int32_t getRuleStatus() const
Return the status tag from the break rule that determined the boundary at the current iteration posit...
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:290
schriter.h
C++ API: String Character Iterator.
icu::RuleBasedBreakIterator::isBoundary
virtual UBool isBoundary(int32_t offset)
Returns true if the specified position is a boundary position.
icu::RuleBasedBreakIterator::following
virtual int32_t following(int32_t offset)
Sets the iterator to refer to the first boundary position following the specified position.
icu::StringCharacterIterator
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition: schriter.h:45
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:389
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:90
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:401
icu::RuleBasedBreakIterator::next
virtual int32_t next(int32_t n)
Advances the iterator either forward or backward the specified number of steps.
icu::RuleBasedBreakIterator
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition: rbbi.h:53
udata.h
C API: Data loading interface.
icu::RuleBasedBreakIterator::clone
virtual BreakIterator * clone() const
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
icu::operator!=
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
UText
UText struct.
Definition: utext.h:1345
icu::BreakIterator::operator!=
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:131
icu::RuleBasedBreakIterator::getStaticClassID
static UClassID getStaticClassID(void)
Returns the class ID for this class.
icu::RuleBasedBreakIterator::RuleBasedBreakIterator
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
icu::RuleBasedBreakIterator::current
virtual int32_t current(void) const
Returns the current iteration position.
UDataMemory
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
Definition: udata.h:158
icu::RuleBasedBreakIterator::refreshInputText
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
icu::RuleBasedBreakIterator::setText
virtual void setText(UText *text, UErrorCode &status)
Reset the break iterator to operate over the text represented by the UText.
icu::RuleBasedBreakIterator::next
virtual int32_t next(void)
Advances the iterator to the next boundary position.
icu::RuleBasedBreakIterator::dumpTables
void dumpTables()
Debugging function only.
icu::RuleBasedBreakIterator::getRuleStatusVec
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
icu::RuleBasedBreakIterator::getText
virtual CharacterIterator & getText(void) const
icu::RuleBasedBreakIterator::getDynamicClassID
virtual UClassID getDynamicClassID(void) const
Returns a unique class ID POLYMORPHICALLY.
icu::RuleBasedBreakIterator::hashCode
virtual int32_t hashCode(void) const
Compute a hash code for this BreakIterator.
icu::RuleBasedBreakIterator::last
virtual int32_t last(void)
Sets the current iteration position to the end of the text.
icu::RuleBasedBreakIterator::getUText
virtual UText * getUText(UText *fillIn, UErrorCode &status) const
Get a UText for the text being analyzed.
icu::RuleBasedBreakIterator::setText
virtual void setText(const UnicodeString &newText)
Set the iterator to analyze a new piece of text.
icu::RuleBasedBreakIterator::getRules
virtual const UnicodeString & getRules(void) const
Returns the description used to create this iterator.
icu::RuleBasedBreakIterator::createBufferClone
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)
Deprecated functionality.
U_NAMESPACE_END
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
icu::RuleBasedBreakIterator::previous
virtual int32_t previous(void)
Moves the iterator backwards, to the last boundary preceding this one.
icu::RuleBasedBreakIterator::first
virtual int32_t first(void)
Sets the current iteration position to the beginning of the text, position zero.
icu::RuleBasedBreakIterator::getBinaryRules
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
icu::RuleBasedBreakIterator::operator==
virtual UBool operator==(const BreakIterator &that) const
Equality operator.