ICU 63.1  63.1
brkiter.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
8 *
9 * File brkiter.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15 * 05/07/97 aliu Fixed DLL declaration.
16 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17 * 08/11/98 helena Sync-up JDK1.2.
18 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19 ********************************************************************************
20 */
21 
22 #ifndef BRKITER_H
23 #define BRKITER_H
24 
25 #include "unicode/utypes.h"
26 
32 #if UCONFIG_NO_BREAK_ITERATION
33 
35 
36 /*
37  * Allow the declaration of APIs with pointers to BreakIterator
38  * even when break iteration is removed from the build.
39  */
40 class BreakIterator;
41 
43 
44 #else
45 
46 #include "unicode/uobject.h"
47 #include "unicode/unistr.h"
48 #include "unicode/chariter.h"
49 #include "unicode/locid.h"
50 #include "unicode/ubrk.h"
51 #include "unicode/strenum.h"
52 #include "unicode/utext.h"
53 #include "unicode/umisc.h"
54 
56 
103 public:
108  virtual ~BreakIterator();
109 
123  virtual UBool operator==(const BreakIterator&) const = 0;
124 
131  UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
132 
138  virtual BreakIterator* clone(void) const = 0;
139 
145  virtual UClassID getDynamicClassID(void) const = 0;
146 
151  virtual CharacterIterator& getText(void) const = 0;
152 
153 
168  virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
169 
181  virtual void setText(const UnicodeString &text) = 0;
182 
201  virtual void setText(UText *text, UErrorCode &status) = 0;
202 
211  virtual void adoptText(CharacterIterator* it) = 0;
212 
213  enum {
219  DONE = (int32_t)-1
220  };
221 
227  virtual int32_t first(void) = 0;
228 
234  virtual int32_t last(void) = 0;
235 
242  virtual int32_t previous(void) = 0;
243 
250  virtual int32_t next(void) = 0;
251 
257  virtual int32_t current(void) const = 0;
258 
267  virtual int32_t following(int32_t offset) = 0;
268 
277  virtual int32_t preceding(int32_t offset) = 0;
278 
287  virtual UBool isBoundary(int32_t offset) = 0;
288 
298  virtual int32_t next(int32_t n) = 0;
299 
313  virtual int32_t getRuleStatus() const;
314 
343  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
344 
364  static BreakIterator* U_EXPORT2
365  createWordInstance(const Locale& where, UErrorCode& status);
366 
388  static BreakIterator* U_EXPORT2
389  createLineInstance(const Locale& where, UErrorCode& status);
390 
410  static BreakIterator* U_EXPORT2
411  createCharacterInstance(const Locale& where, UErrorCode& status);
412 
431  static BreakIterator* U_EXPORT2
432  createSentenceInstance(const Locale& where, UErrorCode& status);
433 
456  static BreakIterator* U_EXPORT2
457  createTitleInstance(const Locale& where, UErrorCode& status);
458 
468  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
469 
479  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
480  const Locale& displayLocale,
481  UnicodeString& name);
482 
491  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
492  UnicodeString& name);
493 
513  virtual BreakIterator * createBufferClone(void *stackBuffer,
514  int32_t &BufferSize,
515  UErrorCode &status) = 0;
516 
517 #ifndef U_HIDE_DEPRECATED_API
518 
525  inline UBool isBufferClone(void);
526 
527 #endif /* U_HIDE_DEPRECATED_API */
528 
529 #if !UCONFIG_NO_SERVICE
530 
545  static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
546  const Locale& locale,
547  UBreakIteratorType kind,
548  UErrorCode& status);
549 
562  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
563 
570  static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
571 #endif
572 
579 
580 #ifndef U_HIDE_INTERNAL_API
581 
587  const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
588 #endif /* U_HIDE_INTERNAL_API */
589 
615  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
616 
617  private:
618  static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
619  static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
620  static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
621 
622  friend class ICUBreakIteratorFactory;
623  friend class ICUBreakIteratorService;
624 
625 protected:
626  // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
627  // or else the compiler will create a public ones.
631  BreakIterator (const BreakIterator &other);
632 #ifndef U_HIDE_INTERNAL_API
633 
634  BreakIterator (const Locale& valid, const Locale &actual);
636  BreakIterator &operator = (const BreakIterator &other);
637 #endif /* U_HIDE_INTERNAL_API */
638 
639 private:
640 
642  char actualLocale[ULOC_FULLNAME_CAPACITY];
643  char validLocale[ULOC_FULLNAME_CAPACITY];
644 };
645 
646 #ifndef U_HIDE_DEPRECATED_API
647 
648 inline UBool BreakIterator::isBufferClone()
649 {
650  return FALSE;
651 }
652 
653 #endif /* U_HIDE_DEPRECATED_API */
654 
656 
657 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
658 
659 #endif // BRKITER_H
660 //eof
FALSE
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:233
icu::BreakIterator
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:102
icu::BreakIterator::getLocale
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::BreakIterator::current
virtual int32_t current(void) const =0
Return character index of the current iterator position within the text.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:225
ubrk.h
C API: BreakIterator.
icu::BreakIterator::getRuleStatus
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
ULocDataLocaleType
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:338
icu::BreakIterator::~BreakIterator
virtual ~BreakIterator()
destructor
icu::BreakIterator::unregister
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
icu::operator==
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
utext.h
C API: Abstract Unicode Text API.
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:300
icu::BreakIterator::createBufferClone
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
icu::BreakIterator::setText
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
icu::BreakIterator::isBoundary
virtual UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
umisc.h
C API:misc definitions.
icu::BreakIterator::getDynamicClassID
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
icu::BreakIterator::preceding
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:290
URegistryKey
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition: umisc.h:57
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:220
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:90
icu::BreakIterator::refreshInputText
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:401
icu::StringEnumeration
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:57
icu::BreakIterator::getDisplayName
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
icu::BreakIterator::createLineInstance
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
icu::BreakIterator::getDisplayName
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
icu::BreakIterator::following
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
UBreakIteratorType
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:99
icu::BreakIterator::registerInstance
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
UText
UText struct.
Definition: utext.h:1345
icu::BreakIterator::adoptText
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
icu::BreakIterator::operator!=
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:131
icu::BreakIterator::BreakIterator
BreakIterator(const BreakIterator &other)
icu::BreakIterator::last
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
icu::BreakIterator::getRuleStatusVec
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
ULOC_FULLNAME_CAPACITY
#define ULOC_FULLNAME_CAPACITY
Useful constant for the maximum size of the whole locale ID (including the terminating NULL and all k...
Definition: uloc.h:264
icu::BreakIterator::createCharacterInstance
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
icu::BreakIterator::clone
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
icu::BreakIterator::operator==
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
icu::BreakIterator::first
virtual int32_t first(void)=0
Sets the current iteration position to the beginning of the text, position zero.
icu::BreakIterator::createWordInstance
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
icu::BreakIterator::getUText
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
strenum.h
C++ API: String Enumeration.
chariter.h
C++ API: Character Iterator.
icu::BreakIterator::next
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
icu::BreakIterator::previous
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
icu::BreakIterator::getAvailableLocales
static StringEnumeration * getAvailableLocales(void)
Return a StringEnumeration over the locales available at the time of the call, including registered l...
icu::BreakIterator::createSentenceInstance
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
locid.h
C++ API: Locale ID object.
icu::BreakIterator::BreakIterator
BreakIterator(const Locale &valid, const Locale &actual)
icu::BreakIterator::getText
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
uobject.h
C++ API: Common ICU base class UObject.
icu::BreakIterator::createTitleInstance
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
U_NAMESPACE_END
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:138
icu::BreakIterator::next
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:137
icu::Locale
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:192
icu::BreakIterator::getAvailableLocales
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
unistr.h
C++ API: Unicode String.
icu::BreakIterator::BreakIterator
BreakIterator()
icu::BreakIterator::getLocaleID
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
icu::BreakIterator::setText
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.