00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017
00023 U_NAMESPACE_BEGIN
00024
00025 class BMPSet;
00026 class ParsePosition;
00027 class RBBIRuleScanner;
00028 class SymbolTable;
00029 class UnicodeSetStringSpan;
00030 class UVector;
00031 class RuleCharacterIterator;
00032
00273 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00274
00275 int32_t len;
00276 int32_t capacity;
00277 UChar32* list;
00278 BMPSet *bmpSet;
00279 UChar32* buffer;
00280 int32_t bufferCapacity;
00281 int32_t patLen;
00282
00292 UChar *pat;
00293 UVector* strings;
00294 UnicodeSetStringSpan *stringSpan;
00295
00296 private:
00297 enum {
00298 kIsBogus = 1
00299 };
00300 uint8_t fFlags;
00301 public:
00311 inline UBool isBogus(void) const;
00312
00329 void setToBogus();
00330
00331 public:
00332
00333 enum {
00338 MIN_VALUE = 0,
00339
00344 MAX_VALUE = 0x10ffff
00345 };
00346
00347
00348
00349
00350
00351 public:
00352
00357 UnicodeSet();
00358
00367 UnicodeSet(UChar32 start, UChar32 end);
00368
00377 UnicodeSet(const UnicodeString& pattern,
00378 UErrorCode& status);
00379
00380 #ifndef U_HIDE_INTERNAL_API
00381
00393 UnicodeSet(const UnicodeString& pattern,
00394 uint32_t options,
00395 const SymbolTable* symbols,
00396 UErrorCode& status);
00397 #endif
00398
00412 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00413 uint32_t options,
00414 const SymbolTable* symbols,
00415 UErrorCode& status);
00416
00421 UnicodeSet(const UnicodeSet& o);
00422
00427 virtual ~UnicodeSet();
00428
00434 UnicodeSet& operator=(const UnicodeSet& o);
00435
00447 virtual UBool operator==(const UnicodeSet& o) const;
00448
00454 UBool operator!=(const UnicodeSet& o) const;
00455
00465 virtual UnicodeFunctor* clone() const;
00466
00474 virtual int32_t hashCode(void) const;
00475
00484 inline static UnicodeSet *fromUSet(USet *uset);
00485
00494 inline static const UnicodeSet *fromUSet(const USet *uset);
00495
00503 inline USet *toUSet();
00504
00505
00513 inline const USet * toUSet() const;
00514
00515
00516
00517
00518
00519
00528 inline UBool isFrozen() const;
00529
00543 UnicodeFunctor *freeze();
00544
00553 UnicodeFunctor *cloneAsThawed() const;
00554
00555
00556
00557
00558
00569 UnicodeSet& set(UChar32 start, UChar32 end);
00570
00576 static UBool resemblesPattern(const UnicodeString& pattern,
00577 int32_t pos);
00578
00591 UnicodeSet& applyPattern(const UnicodeString& pattern,
00592 UErrorCode& status);
00593
00594 #ifndef U_HIDE_INTERNAL_API
00595
00611 UnicodeSet& applyPattern(const UnicodeString& pattern,
00612 uint32_t options,
00613 const SymbolTable* symbols,
00614 UErrorCode& status);
00615 #endif
00616
00648 UnicodeSet& applyPattern(const UnicodeString& pattern,
00649 ParsePosition& pos,
00650 uint32_t options,
00651 const SymbolTable* symbols,
00652 UErrorCode& status);
00653
00667 virtual UnicodeString& toPattern(UnicodeString& result,
00668 UBool escapeUnprintable = FALSE) const;
00669
00692 UnicodeSet& applyIntPropertyValue(UProperty prop,
00693 int32_t value,
00694 UErrorCode& ec);
00695
00725 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00726 const UnicodeString& value,
00727 UErrorCode& ec);
00728
00737 virtual int32_t size(void) const;
00738
00745 virtual UBool isEmpty(void) const;
00746
00754 virtual UBool contains(UChar32 c) const;
00755
00764 virtual UBool contains(UChar32 start, UChar32 end) const;
00765
00773 UBool contains(const UnicodeString& s) const;
00774
00782 virtual UBool containsAll(const UnicodeSet& c) const;
00783
00791 UBool containsAll(const UnicodeString& s) const;
00792
00801 UBool containsNone(UChar32 start, UChar32 end) const;
00802
00810 UBool containsNone(const UnicodeSet& c) const;
00811
00819 UBool containsNone(const UnicodeString& s) const;
00820
00829 inline UBool containsSome(UChar32 start, UChar32 end) const;
00830
00838 inline UBool containsSome(const UnicodeSet& s) const;
00839
00847 inline UBool containsSome(const UnicodeString& s) const;
00848
00867 int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00868
00881 inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
00882
00900 int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00901
00915 inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
00916
00935 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00936
00954 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00955
00960 virtual UMatchDegree matches(const Replaceable& text,
00961 int32_t& offset,
00962 int32_t limit,
00963 UBool incremental);
00964
00965 private:
00988 static int32_t matchRest(const Replaceable& text,
00989 int32_t start, int32_t limit,
00990 const UnicodeString& s);
00991
01001 int32_t findCodePoint(UChar32 c) const;
01002
01003 public:
01004
01012 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
01013
01022 int32_t indexOf(UChar32 c) const;
01023
01033 UChar32 charAt(int32_t index) const;
01034
01049 virtual UnicodeSet& add(UChar32 start, UChar32 end);
01050
01058 UnicodeSet& add(UChar32 c);
01059
01071 UnicodeSet& add(const UnicodeString& s);
01072
01073 private:
01079 static int32_t getSingleCP(const UnicodeString& s);
01080
01081 void _add(const UnicodeString& s);
01082
01083 public:
01092 UnicodeSet& addAll(const UnicodeString& s);
01093
01102 UnicodeSet& retainAll(const UnicodeString& s);
01103
01112 UnicodeSet& complementAll(const UnicodeString& s);
01113
01122 UnicodeSet& removeAll(const UnicodeString& s);
01123
01132 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01133
01134
01142 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01143
01157 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01158
01159
01165 UnicodeSet& retain(UChar32 c);
01166
01180 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01181
01189 UnicodeSet& remove(UChar32 c);
01190
01200 UnicodeSet& remove(const UnicodeString& s);
01201
01209 virtual UnicodeSet& complement(void);
01210
01225 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01226
01234 UnicodeSet& complement(UChar32 c);
01235
01246 UnicodeSet& complement(const UnicodeString& s);
01247
01260 virtual UnicodeSet& addAll(const UnicodeSet& c);
01261
01273 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01274
01286 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01287
01298 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01299
01306 virtual UnicodeSet& clear(void);
01307
01333 UnicodeSet& closeOver(int32_t attribute);
01334
01341 virtual UnicodeSet &removeAllStrings();
01342
01350 virtual int32_t getRangeCount(void) const;
01351
01359 virtual UChar32 getRangeStart(int32_t index) const;
01360
01368 virtual UChar32 getRangeEnd(int32_t index) const;
01369
01418 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01419
01426 virtual UnicodeSet& compact();
01427
01439 static UClassID U_EXPORT2 getStaticClassID(void);
01440
01449 virtual UClassID getDynamicClassID(void) const;
01450
01451 private:
01452
01453
01454
01455 friend class USetAccess;
01456
01457 int32_t getStringCount() const;
01458
01459 const UnicodeString* getString(int32_t index) const;
01460
01461
01462
01463
01464
01465 private:
01466
01472 virtual UBool matchesIndexValue(uint8_t v) const;
01473
01474 private:
01475 friend class RBBIRuleScanner;
01476
01477
01478
01479
01480
01481 UnicodeSet(const UnicodeSet& o, UBool );
01482
01483
01484
01485
01486
01487 void applyPatternIgnoreSpace(const UnicodeString& pattern,
01488 ParsePosition& pos,
01489 const SymbolTable* symbols,
01490 UErrorCode& status);
01491
01492 void applyPattern(RuleCharacterIterator& chars,
01493 const SymbolTable* symbols,
01494 UnicodeString& rebuiltPat,
01495 uint32_t options,
01496 UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
01497 UErrorCode& ec);
01498
01499
01500
01501
01502
01503 void ensureCapacity(int32_t newLen, UErrorCode& ec);
01504
01505 void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01506
01507 void swapBuffers(void);
01508
01509 UBool allocateStrings(UErrorCode &status);
01510
01511 UnicodeString& _toPattern(UnicodeString& result,
01512 UBool escapeUnprintable) const;
01513
01514 UnicodeString& _generatePattern(UnicodeString& result,
01515 UBool escapeUnprintable) const;
01516
01517 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01518
01519 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01520
01521
01522
01523
01524
01525 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01526
01527 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01528
01529 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01530
01536 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01537 int32_t pos);
01538
01539 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01540 int32_t iterOpts);
01541
01581 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01582 ParsePosition& ppos,
01583 UErrorCode &ec);
01584
01585 void applyPropertyPattern(RuleCharacterIterator& chars,
01586 UnicodeString& rebuiltPat,
01587 UErrorCode& ec);
01588
01589 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01590
01595 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01596
01606 void applyFilter(Filter filter,
01607 void* context,
01608 int32_t src,
01609 UErrorCode &status);
01610
01614 void setPattern(const UnicodeString& newPat);
01618 void releasePattern();
01619
01620 friend class UnicodeSetIterator;
01621 };
01622
01623
01624
01625 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01626 return !operator==(o);
01627 }
01628
01629 inline UBool UnicodeSet::isFrozen() const {
01630 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01631 }
01632
01633 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01634 return !containsNone(start, end);
01635 }
01636
01637 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01638 return !containsNone(s);
01639 }
01640
01641 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01642 return !containsNone(s);
01643 }
01644
01645 inline UBool UnicodeSet::isBogus() const {
01646 return (UBool)(fFlags & kIsBogus);
01647 }
01648
01649 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
01650 return reinterpret_cast<UnicodeSet *>(uset);
01651 }
01652
01653 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
01654 return reinterpret_cast<const UnicodeSet *>(uset);
01655 }
01656
01657 inline USet *UnicodeSet::toUSet() {
01658 return reinterpret_cast<USet *>(this);
01659 }
01660
01661 inline const USet *UnicodeSet::toUSet() const {
01662 return reinterpret_cast<const USet *>(this);
01663 }
01664
01665 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
01666 int32_t sLength=s.length();
01667 if(start<0) {
01668 start=0;
01669 } else if(start>sLength) {
01670 start=sLength;
01671 }
01672 return start+span(s.getBuffer()+start, sLength-start, spanCondition);
01673 }
01674
01675 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
01676 int32_t sLength=s.length();
01677 if(limit<0) {
01678 limit=0;
01679 } else if(limit>sLength) {
01680 limit=sLength;
01681 }
01682 return spanBack(s.getBuffer(), limit, spanCondition);
01683 }
01684
01685 U_NAMESPACE_END
01686
01687 #endif