00001
00002
00003
00004
00005 #ifndef BITVECTOR_H
00006 #define BITVECTOR_H
00007
00008
00009
00010 #include "array_t.h"
00011
00012 #if defined(_MSC_VER) && defined(_WIN32)
00013
00014 #pragma warning (disable : 4231)
00015 #endif
00016 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT)
00017 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>;
00018 #endif
00019
00020
00062 class FASTBIT_CXX_DLLSPEC ibis::bitvector {
00063 public:
00064 typedef uint32_t word_t;
00065
00067 ~bitvector() {clear();};
00068
00069 bitvector();
00070 bitvector(const bitvector& bv);
00071 explicit bitvector(const array_t<word_t>& arr);
00072 explicit bitvector(const char* file);
00073
00074 inline const bitvector& operator=(const bitvector& bv);
00075 inline bitvector& copy(const bitvector& bv);
00076 inline bitvector& swap(bitvector& bv);
00077
00078
00079
00080
00081 void setBit(const word_t i, int val);
00082 inline void turnOnRawBit(const word_t i);
00084 void erase(word_t i, word_t j);
00085
00086 void set(int val, word_t n);
00087 void clear();
00088
00089 bitvector& operator+=(const bitvector& bv);
00090 inline bitvector& operator+=(int b);
00091 void appendWord(word_t w);
00092 inline void appendFill(int val, word_t n);
00093
00095 int operator==(const bitvector& rhs) const;
00096
00098 void flip();
00100 void operator&=(const bitvector& rhs);
00103 bitvector* operator&(const bitvector&) const;
00105 void operator|=(const bitvector& rhs);
00107 bitvector* operator|(const bitvector&) const;
00109 void operator^=(const bitvector& rhs);
00111 bitvector* operator^(const bitvector&) const;
00113 void operator-=(const bitvector& rhs);
00116 bitvector* operator-(const bitvector&) const;
00117
00118 void subset(const bitvector& mask, bitvector& res) const;
00119 word_t count(const bitvector& mask) const;
00120
00121
00124 void read(const char *fn);
00126 void write(const char *fn) const;
00128 void write(int fdes) const;
00132 void write(array_t<word_t>& arr) const;
00133
00134 void compress();
00135 void decompress();
00136 word_t compressible() const;
00139 bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
00140
00141 inline word_t size() const throw();
00142 inline void sloppySize(word_t n) const;
00143 inline word_t cnt() const;
00144 inline word_t count() const {return cnt();}
00145 inline word_t sloppyCount() const;
00146 inline word_t numFillWords() const;
00148 uint32_t bytes() const throw() {
00149 return (m_vec.size()*sizeof(word_t) + sizeof(bitvector));
00150 };
00154 uint32_t getSerialSize() const throw() {
00155 return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t);
00156 };
00158 static word_t bitsPerLiteral() {return MAXBITS;}
00159 inline static double randomSize(word_t nb, word_t nc);
00160 inline static double markovSize(word_t nb, word_t nc, double f);
00161 static double clusteringFactor(word_t nb, word_t nc, word_t sz);
00162
00163 void adjustSize(word_t nv, word_t nt);
00164 void reserve(unsigned nb, unsigned nc, double cf=0.0);
00165
00168 bool empty() const {return all0s() && active.val == 0;}
00169
00170 std::ostream& print(std::ostream &) const;
00171
00173 class iterator;
00174 inline iterator end();
00175 inline iterator begin();
00176
00178 class const_iterator;
00179 inline const_iterator end() const;
00180 inline const_iterator begin() const;
00181
00183 class indexSet;
00184 inline indexSet firstIndexSet() const;
00185
00186
00187 friend class indexSet;
00188 friend class iterator;
00189 friend class const_iterator;
00190
00191 protected:
00192 inline bool all0s() const;
00193 inline bool all1s() const;
00194
00195 private:
00196
00197 static const unsigned MAXBITS;
00198 static const unsigned SECONDBIT;
00199 static const word_t FILLBIT;
00200 static const word_t HEADER0;
00201 static const word_t HEADER1;
00202 static const word_t ALLONES;
00203 static const word_t MAXCNT;
00204
00211 struct active_word {
00212 word_t val;
00213 word_t nbits;
00214
00215 active_word() : val(0), nbits(0) {};
00216 void reset() {val = 0; nbits = 0;};
00217 int is_full() const {return (nbits >= MAXBITS);};
00218 void append(int b) {
00219 val <<= 1; nbits ++; val += b;
00220 };
00221 };
00222
00225 struct run {
00226 int isFill;
00227 int fillBit;
00228 word_t nWords;
00229 array_t<word_t>::const_iterator it;
00230 run() : isFill(0), fillBit(0), nWords(0), it(0) {};
00231 void decode() {
00232 fillBit = (*it > HEADER1);
00233 if (*it > ALLONES) {
00234 nWords = (*it & MAXCNT);
00235 isFill = 1;
00236 }
00237 else {
00238 nWords = 1;
00239 isFill = 0;
00240 }
00241 };
00244 void operator--() {
00245 if (nWords > 1) {--nWords;}
00246 else {++ it; nWords = 0;}
00247 };
00250 void operator-=(word_t len) {
00251 while (len > 0) {
00252 if (nWords == 0) decode();
00253 if (isFill != 0) {
00254 if (nWords > len) {nWords -= len; len = 0;}
00255 else if (nWords == len) {nWords = 0; len = 0; ++ it;}
00256 else {len -= nWords; ++ it; nWords = 0;}
00257 }
00258 else {-- len; ++ it; nWords = 0;}
00259 }
00260 };
00261 };
00262 friend struct run;
00263 friend struct active_word;
00264
00265
00266 mutable word_t nbits;
00267 mutable word_t nset;
00268 active_word active;
00269 array_t<word_t> m_vec;
00270
00271
00272 word_t count_c1(const bitvector& mask) const;
00273 word_t count_c2(const bitvector& mask) const;
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283 void or_c2(const bitvector& rhs, bitvector& res) const;
00284 void or_c1(const bitvector& rhs, bitvector& res) const;
00285 void or_c0(const bitvector& rhs);
00286 void or_d1(const bitvector& rhs);
00287 void or_d2(const bitvector& rhs, bitvector& res) const;
00288 void and_c2(const bitvector& rhs, bitvector& res) const;
00289 void and_c1(const bitvector& rhs, bitvector& res) const;
00290 void and_c0(const bitvector& rhs);
00291 void and_d1(const bitvector& rhs);
00292 void and_d2(const bitvector& rhs, bitvector& res) const;
00293 void xor_c2(const bitvector& rhs, bitvector& res) const;
00294 void xor_c1(const bitvector& rhs, bitvector& res) const;
00295 void xor_c0(const bitvector& rhs);
00296 void xor_d1(const bitvector& rhs);
00297 void xor_d2(const bitvector& rhs, bitvector& res) const;
00298 void minus_c2(const bitvector& rhs, bitvector& res) const;
00299 void minus_c1(const bitvector& rhs, bitvector& res) const;
00300 void minus_c1x(const bitvector& rhs, bitvector& res) const;
00301 void minus_c0(const bitvector& rhs);
00302 void minus_d1(const bitvector& rhs);
00303 void minus_d2(const bitvector& rhs, bitvector& res) const;
00304 inline void copy_runs(run& it, word_t& nw);
00305 inline void copy_runsn(run& it, word_t& nw);
00306 inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
00307 inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
00308 word_t& nw);
00309 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
00310 word_t& nw);
00311 void decompress(array_t<word_t>& tmp) const;
00312 void copy_comp(array_t<word_t>& tmp) const;
00313 inline void append_active();
00314 inline void append_counter(int val, word_t cnt);
00315 inline word_t cnt_ones(word_t) const;
00316 inline word_t cnt_bits(word_t) const;
00317 word_t do_cnt() const throw ();
00318 };
00319
00321 class ibis::bitvector::const_iterator {
00322 public:
00323 const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0),
00324 fillbit(0), active(0) {}
00325
00326 const_iterator(const const_iterator& r)
00327 : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00328 literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active),
00329 end(r.end), begin(r.begin), it(r.it) {};
00330 const_iterator& operator=(const const_iterator& r) {
00331 compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00332 literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active;
00333 end = r.end; begin = r.begin; it = r.it;
00334 return *this;}
00335
00336 inline bool operator*() const;
00337 inline int operator!=(const const_iterator& rhs) const throw ();
00338 inline int operator==(const const_iterator& rhs) const throw ();
00339
00340 inline const_iterator& operator++();
00341 inline const_iterator& operator--();
00342 const_iterator& operator+=(int incr);
00343
00344 private:
00345 ibis::bitvector::word_t compressed;
00346 ibis::bitvector::word_t ind;
00347 ibis::bitvector::word_t nbits;
00348 ibis::bitvector::word_t literalvalue;
00349 int fillbit;
00350 const active_word* active;
00351 array_t<word_t>::const_iterator end;
00352 array_t<word_t>::const_iterator begin;
00353 array_t<word_t>::const_iterator it;
00354
00355 void decodeWord();
00356
00357
00358 friend void ibis::bitvector::erase(word_t i, word_t j);
00359 friend const_iterator ibis::bitvector::begin() const;
00360 friend const_iterator ibis::bitvector::end() const;
00361 friend class ibis::bitvector::iterator;
00362 };
00363
00371 class ibis::bitvector::iterator {
00372 public:
00373 iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0),
00374 bitv(0), active(0), vec(0) {}
00375 iterator(const iterator& r)
00376 : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00377 literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv),
00378 active(r.active), vec(r.vec), it(r.it) {};
00379 const iterator& operator=(const iterator& r) {
00380 compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00381 literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv;
00382 active = r.active; vec = r.vec; it = r.it; return *this;}
00383
00384 inline bool operator*() const;
00385 inline int operator!=(const const_iterator& rhs) const throw ();
00386 inline int operator==(const const_iterator& rhs) const throw ();
00387 inline int operator!=(const iterator& rhs) const throw ();
00388 inline int operator==(const iterator& rhs) const throw ();
00389
00390 inline iterator& operator++();
00391 inline iterator& operator--();
00392 iterator& operator+=(int incr);
00393 const iterator& operator=(int val);
00394
00395 private:
00396 ibis::bitvector::word_t compressed;
00397 ibis::bitvector::word_t ind;
00398 ibis::bitvector::word_t nbits;
00399 ibis::bitvector::word_t literalvalue;
00400 int fillbit;
00401 ibis::bitvector* bitv;
00402 active_word* active;
00403 array_t<word_t>* vec;
00404 array_t<word_t>::iterator it;
00405
00406 void decodeWord();
00407
00408 friend iterator ibis::bitvector::begin();
00409 friend iterator ibis::bitvector::end();
00410 };
00411
00421 class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet {
00422 public:
00424 indexSet() : it(0), end(0), active(0), nind(0) {}
00426 indexSet(const indexSet& rhs)
00427 : it(rhs.it), end(rhs.end), active(rhs.active), nind(rhs.nind) {
00428 ind[0] = rhs.ind[0];
00429 ind[1] = rhs.ind[1];
00430 ind[2] = rhs.ind[2];
00431 ind[3] = rhs.ind[3];
00432 ind[4] = rhs.ind[4];
00433 ind[5] = rhs.ind[5];
00434 ind[6] = rhs.ind[6];
00435 ind[7] = rhs.ind[7];
00436 ind[8] = rhs.ind[8];
00437 ind[9] = rhs.ind[9];
00438 ind[10] = rhs.ind[10];
00439 ind[11] = rhs.ind[11];
00440 ind[12] = rhs.ind[12];
00441 ind[13] = rhs.ind[13];
00442 ind[14] = rhs.ind[14];
00443 ind[15] = rhs.ind[15];
00444 ind[16] = rhs.ind[16];
00445 ind[17] = rhs.ind[17];
00446 ind[18] = rhs.ind[18];
00447 ind[19] = rhs.ind[19];
00448 ind[20] = rhs.ind[20];
00449 ind[21] = rhs.ind[21];
00450 ind[22] = rhs.ind[22];
00451 ind[23] = rhs.ind[23];
00452 ind[24] = rhs.ind[24];
00453 ind[25] = rhs.ind[25];
00454 ind[26] = rhs.ind[26];
00455 ind[27] = rhs.ind[27];
00456 ind[28] = rhs.ind[28];
00457 ind[29] = rhs.ind[29];
00458 ind[30] = rhs.ind[30];
00459 ind[31] = rhs.ind[31];
00460 }
00462 indexSet& operator=(const indexSet& rhs) {
00463 it = rhs.it;
00464 end = rhs.end;
00465 active = rhs.active;
00466 nind = rhs.nind;
00467 ind[0] = rhs.ind[0];
00468 ind[1] = rhs.ind[1];
00469 ind[2] = rhs.ind[2];
00470 ind[3] = rhs.ind[3];
00471 ind[4] = rhs.ind[4];
00472 ind[5] = rhs.ind[5];
00473 ind[6] = rhs.ind[6];
00474 ind[7] = rhs.ind[7];
00475 ind[8] = rhs.ind[8];
00476 ind[9] = rhs.ind[9];
00477 ind[10] = rhs.ind[10];
00478 ind[11] = rhs.ind[11];
00479 ind[12] = rhs.ind[12];
00480 ind[13] = rhs.ind[13];
00481 ind[14] = rhs.ind[14];
00482 ind[15] = rhs.ind[15];
00483 ind[16] = rhs.ind[16];
00484 ind[17] = rhs.ind[17];
00485 ind[18] = rhs.ind[18];
00486 ind[19] = rhs.ind[19];
00487 ind[20] = rhs.ind[20];
00488 ind[21] = rhs.ind[21];
00489 ind[22] = rhs.ind[22];
00490 ind[23] = rhs.ind[23];
00491 ind[24] = rhs.ind[24];
00492 ind[25] = rhs.ind[25];
00493 ind[26] = rhs.ind[26];
00494 ind[27] = rhs.ind[27];
00495 ind[28] = rhs.ind[28];
00496 ind[29] = rhs.ind[29];
00497 ind[30] = rhs.ind[30];
00498 ind[31] = rhs.ind[31];
00499 return *this;
00500 }
00501
00502
00504 bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);}
00506 const word_t* indices() const {return ind;};
00508 word_t nIndices() const {return nind;}
00510 const word_t& currentWord() const {return *it;}
00511 indexSet& operator++();
00512
00513
00514 friend indexSet ibis::bitvector::firstIndexSet() const;
00515
00516 private:
00517 array_t<word_t>::const_iterator it;
00518 array_t<word_t>::const_iterator end;
00519 const active_word* active;
00520 word_t nind;
00521 word_t ind[32];
00522 };
00523
00530 inline void ibis::bitvector::sloppySize(word_t n) const {
00531 nbits = n-active.nbits;
00532 #if defined(WAH_CHECK_SIZE)
00533 word_t nb = do_cnt();
00534 if (nb != nbits) {
00535 const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits);
00536 LOGGER(ibis::gVerbose >= 0)
00537 << "bitvector::sloppySize -- adjust the number of bits to "
00538 << n;
00539 }
00540 #endif
00541 }
00542
00552 inline ibis::bitvector::word_t ibis::bitvector::sloppyCount() const {
00553 if (nset > 0) {
00554 return nset;
00555 }
00556 else if (active.nbits == 0 || active.val == 0) {
00557 if (m_vec.empty() ||
00558 (m_vec.size() == 1 &&
00559 (m_vec[0] == 0 ||
00560 (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1))))
00561 return 0;
00562 else
00563 return 2;
00564 }
00565 else {
00566 return 1;
00567 }
00568 }
00569
00572 inline bool ibis::bitvector::all0s() const {
00573 if (m_vec.empty()) {
00574 return true;
00575 }
00576 else if (m_vec.size() == 1) {
00577 return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1));
00578 }
00579 else {
00580 return false;
00581 }
00582 }
00583
00586 inline bool ibis::bitvector::all1s() const {
00587 if (m_vec.size() == 1) {
00588 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
00589 }
00590 else {
00591 return false;
00592 }
00593 }
00594
00596 inline ibis::bitvector::word_t
00597 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const {
00598 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
00599 }
00600
00602 inline ibis::bitvector::word_t
00603 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const {
00604
00605 static const word_t table[256] = {
00606 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00607 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00608 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00609 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00610 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00611 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00612 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00613 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00614 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00615 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00616 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00617 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00618 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00619 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00620 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00621 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
00622 return table[val&0xFFU] + table[(val>>8)&0xFFU] +
00623 table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU];
00624 }
00625
00627 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() {
00628 return ((nbits?nbits:(nbits=do_cnt()))+active.nbits);
00629 }
00630
00632 inline ibis::bitvector::word_t ibis::bitvector::cnt() const {
00633 if (nset==0 && !m_vec.empty())
00634 nbits = do_cnt();
00635 return (nset+cnt_ones(active.val));
00636 }
00637
00639 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const {
00640 word_t cnt=0;
00641 array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin();
00642 while (it != m_vec.end()) {
00643 cnt += (*it >> ibis::bitvector::MAXBITS);
00644 it++;
00645 }
00646 return cnt;
00647 }
00648
00652 inline const ibis::bitvector&
00653 ibis::bitvector::operator=(const ibis::bitvector& bv) {
00654 nbits = bv.nbits; nset = bv.nset; active = bv.active;
00655 m_vec.deepCopy(bv.m_vec);
00656 return *this;
00657 }
00658
00660 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) {
00661 nbits = bv.nbits; nset = bv.nset; active = bv.active;
00662 m_vec.deepCopy(bv.m_vec);
00663 return *this;
00664 }
00665
00666 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) {
00667 word_t tmp;
00668 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
00669 tmp = bv.nset; bv.nset = nset; nset = tmp;
00670 active_word atmp = bv.active;
00671 bv.active = active; active = atmp;
00672 m_vec.swap(bv.m_vec);
00673 return *this;
00674 }
00675
00679 inline void ibis::bitvector::append_active() {
00680
00681
00682
00683
00684 if (m_vec.empty()) {
00685 m_vec.push_back(active.val);
00686 }
00687 else if (active.val == 0) {
00688 if (m_vec.back() == 0) {
00689 m_vec.back() = (HEADER0 + 2);
00690 }
00691 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
00692 ++ m_vec.back();
00693 }
00694 else {
00695 m_vec.push_back(active.val);
00696 }
00697 }
00698 else if (active.val == ALLONES) {
00699 if (m_vec.back() == ALLONES) {
00700 m_vec.back() = (HEADER1 | 2);
00701 }
00702 else if (m_vec.back() >= HEADER1) {
00703 ++m_vec.back();
00704 }
00705 else {
00706 m_vec.push_back(active.val);
00707 }
00708 }
00709 else {
00710 m_vec.push_back(active.val);
00711 }
00712 nbits += MAXBITS;
00713 active.reset();
00714 nset = 0;
00715
00716
00717 }
00718
00722 inline void ibis::bitvector::append_counter(int val, word_t cnt) {
00723 word_t head = 2 + val;
00724 word_t w = (head << SECONDBIT) + cnt;
00725 nbits += cnt*MAXBITS;
00726 if (m_vec.empty()) {
00727 m_vec.push_back(w);
00728 }
00729 else if ((m_vec.back()>>SECONDBIT) == head) {
00730 m_vec.back() += cnt;
00731 }
00732 else if ((m_vec.back()==ALLONES) && head==3) {
00733 m_vec.back() = w + 1;
00734 }
00735 else if ((m_vec.back() == 0) && head==2) {
00736 m_vec.back() = w + 1;
00737 }
00738 else {
00739 m_vec.push_back(w);
00740 }
00741 }
00742
00743
00744 inline ibis::bitvector& ibis::bitvector::operator+=(int b) {
00745 active.append(b);
00746 if (active.is_full())
00747 append_active();
00748 return *this;
00749 }
00750
00755 inline void ibis::bitvector::appendFill(int val, word_t n) {
00756 if (n == 0) return;
00757 if (active.nbits > 0) {
00758 word_t tmp = (MAXBITS - active.nbits);
00759 if (tmp > n) tmp = n;
00760 active.nbits += tmp;
00761 active.val <<= tmp;
00762 n -= tmp;
00763 if (val != 0)
00764 active.val |= (1U<<tmp) - 1;
00765 if (active.nbits >= MAXBITS)
00766 append_active();
00767 }
00768 if (n >= MAXBITS) {
00769 word_t cnt = n / MAXBITS;
00770 if (cnt > 1)
00771 append_counter(val, cnt);
00772 else if (val != 0) {
00773 active.val = ALLONES;
00774 append_active();
00775 }
00776 else {
00777 active.val = 0;
00778 append_active();
00779 }
00780 n -= cnt * MAXBITS;
00781 }
00782 if (n > 0) {
00783 active.nbits = n;
00784 active.val = val*((1U<<n)-1);
00785 }
00786 }
00787
00793 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) {
00794
00795 if (it.isFill != 0) {
00796 append_counter(it.fillBit, it.nWords);
00797 nw -= it.nWords;
00798 }
00799 else {
00800 active.val = *(it.it);
00801 append_active();
00802 -- nw;
00803 }
00804 ++ it.it;
00805 it.nWords = 0;
00806 nset = 0;
00807 nbits += MAXBITS * nw;
00808
00809 while (nw > 0) {
00810 it.decode();
00811 if (nw >= it.nWords) {
00812 m_vec.push_back(*(it.it));
00813 nw -= it.nWords;
00814 it.nWords = 0;
00815 ++ it.it;
00816 }
00817 else {
00818 break;
00819 }
00820 }
00821 nbits -= MAXBITS * nw;
00822 }
00823
00826 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) {
00827
00828 if (it.isFill != 0) {
00829 append_counter(!it.fillBit, it.nWords);
00830 nw -= it.nWords;
00831 }
00832 else {
00833 active.val = ALLONES ^ *(it.it);
00834 append_active();
00835 -- nw;
00836 }
00837 ++ it.it;
00838 it.nWords = 0;
00839 nset = 0;
00840 nbits += MAXBITS * nw;
00841
00842 while (nw > 0) {
00843 it.decode();
00844 if (nw >= it.nWords) {
00845 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
00846 nw -= it.nWords;
00847 it.nWords = 0;
00848 ++ it.it;
00849 }
00850 else {
00851 break;
00852 }
00853 }
00854 nbits -= MAXBITS * nw;
00855 }
00856
00860 inline void ibis::bitvector::copy_fill
00861 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) {
00862 if (it.fillBit == 0) {
00863 while (it.nWords > 3) {
00864 *jt = 0;
00865 jt[1] = 0;
00866 jt[2] = 0;
00867 jt[3] = 0;
00868 jt += 4;
00869 it.nWords -= 4;
00870 }
00871 if (it.nWords == 1) {
00872 *jt = 0; ++jt;
00873 }
00874 else if (it.nWords == 2) {
00875 *jt = 0; jt[1] = 0; jt += 2;
00876 }
00877 else if (it.nWords == 3) {
00878 *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3;
00879 }
00880 }
00881 else {
00882 while (it.nWords > 3) {
00883 *jt = ALLONES;
00884 jt[1] = ALLONES;
00885 jt[2] = ALLONES;
00886 jt[3] = ALLONES;
00887 jt += 4;
00888 it.nWords -= 4;
00889 }
00890 if (it.nWords == 1) {
00891 *jt = ALLONES; ++jt;
00892 }
00893 else if (it.nWords == 2) {
00894 *jt = ALLONES; jt[1] = ALLONES; jt += 2;
00895 }
00896 else if (it.nWords == 3) {
00897 *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3;
00898 }
00899 }
00900 it.nWords = 0;
00901 ++ it.it;
00902 }
00903
00908 inline void ibis::bitvector::copy_runs
00909 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00910 while (nw >= it.nWords && nw > 0) {
00911 if (it.isFill != 0) {
00912 const array_t<word_t>::iterator iend = jt + it.nWords;
00913 if (it.fillBit == 0) {
00914 while (jt < iend) {
00915 *jt = 0;
00916 ++ jt;
00917 }
00918 }
00919 else {
00920 while (jt < iend) {
00921 *jt = ALLONES;
00922 ++ jt;
00923 }
00924 }
00925 nw -= it.nWords;
00926 }
00927 else {
00928 *jt = *(it.it);
00929 ++ jt;
00930 -- nw;
00931 }
00932 ++ it.it;
00933 if (nw > 0) {
00934 it.decode();
00935 }
00936 else {
00937 it.nWords = 0;
00938 return;
00939 }
00940 }
00941 }
00942
00945 inline void ibis::bitvector::copy_runsn
00946 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00947 while (nw >= it.nWords) {
00948 if (it.isFill != 0) {
00949 const array_t<word_t>::iterator iend = jt + it.nWords;
00950 if (it.fillBit == 0) {
00951 while (jt < iend) {
00952 *jt = ALLONES;
00953 ++ jt;
00954 }
00955 }
00956 else {
00957 while (jt < iend) {
00958 *jt = 0;
00959 ++ jt;
00960 }
00961 }
00962 nw -= it.nWords;
00963 }
00964 else {
00965 *jt = *(it.it) ^ ALLONES;
00966 ++ jt;
00967 -- nw;
00968 }
00969 ++ it.it;
00970 if (nw > 0) {
00971 it.decode();
00972 }
00973 else {
00974 it.nWords = 0;
00975 return;
00976 }
00977 }
00978 }
00979
00980 inline ibis::bitvector::iterator ibis::bitvector::begin() {
00981 iterator it;
00982 it.it = m_vec.begin();
00983 it.vec = &m_vec;
00984 it.bitv = this;
00985 it.active = &active;
00986 it.decodeWord();
00987 return it;
00988 }
00989
00990 inline ibis::bitvector::iterator ibis::bitvector::end() {
00991 iterator it;
00992 it.ind = 0;
00993 it.compressed = 0;
00994 it.nbits = 0;
00995 it.literalvalue = 0;
00996 it.fillbit = 0;
00997 it.it = m_vec.end() + 1;
00998 it.vec = &m_vec;
00999 it.bitv = this;
01000 it.active = &active;
01001 return it;
01002 }
01003
01004 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const {
01005 const_iterator it;
01006 it.it = m_vec.begin();
01007 it.begin = m_vec.begin();
01008 it.end = m_vec.end();
01009 it.active = &active;
01010 it.decodeWord();
01011 return it;
01012 }
01013
01014
01015 inline bool ibis::bitvector::iterator::operator*() const {
01016 #if defined(DEBUG) && DEBUG + 0 > 1
01017 if (vec==0 || it<vec->begin() || it>vec->end())
01018 throw "bitvector::const_iterator not initialized correctly.";
01019 #endif
01020 if (compressed != 0)
01021 return (fillbit != 0);
01022 else
01023 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
01024 }
01025
01026
01027 inline int ibis::bitvector::iterator::operator!=
01028 (const ibis::bitvector::const_iterator& rhs) const throw () {
01029 return (it != rhs.it);
01030 }
01031 inline int ibis::bitvector::iterator::operator==
01032 (const ibis::bitvector::const_iterator& rhs) const throw () {
01033 return (it == rhs.it);
01034 }
01035 inline int ibis::bitvector::iterator::operator!=
01036 (const ibis::bitvector::iterator& rhs) const throw () {
01037 return (it != rhs.it);
01038 }
01039 inline int ibis::bitvector::iterator::operator==
01040 (const ibis::bitvector::iterator& rhs) const throw () {
01041 return (it == rhs.it);
01042 }
01043
01044
01045 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() {
01046 #if defined(DEBUG) && DEBUG + 0 > 1
01047 if (vec==0 || it<vec->begin() || it>vec->end())
01048 throw "bitvector::iterator not formed correctly.";
01049 #endif
01050 if (ind+1<nbits) {++ind;}
01051 else {++ it; decodeWord();}
01052 return *this;
01053 }
01054
01055
01056 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() {
01057 #if defined(DEBUG) && DEBUG + 0 > 1
01058 if (vec==0 || it<vec->begin() || it>vec->end()+1)
01059 throw "bitvector::iterator not formed correctly.";
01060 #endif
01061 if (ind) -- ind;
01062 else {
01063 if (it <= vec->end()) -- it;
01064 else if (active->nbits) it = vec->end();
01065 else it = vec->end() - 1;
01066 decodeWord();
01067 if (nbits) ind = nbits - 1;
01068 }
01069 return *this;
01070 }
01071
01072 inline ibis::bitvector::const_iterator ibis::bitvector::end() const {
01073 const_iterator it;
01074 it.ind = 0;
01075 it.compressed = 0;
01076 it.nbits = 0;
01077 it.literalvalue = 0;
01078 it.fillbit = 0;
01079 it.it = m_vec.end() + 1;
01080 it.begin = m_vec.begin();
01081 it.end = m_vec.end();
01082 it.active = &active;
01083 return it;
01084 }
01085
01086
01087 inline bool ibis::bitvector::const_iterator::operator*() const {
01088 #if defined(DEBUG) && DEBUG + 0 > 1
01089 if (it==0 || end==0 || it>end || nbits<=ind)
01090 throw "bitvector::const_iterator not initialized correctly.";
01091 #endif
01092 if (compressed != 0)
01093 return (fillbit != 0);
01094 else
01095 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
01096 }
01097
01098
01099 inline int ibis::bitvector::const_iterator::operator!=
01100 (const ibis::bitvector::const_iterator& rhs) const throw (){
01101 return (it != rhs.it);
01102 }
01103 inline int ibis::bitvector::const_iterator::operator==
01104 (const ibis::bitvector::const_iterator& rhs) const throw () {
01105 return (it == rhs.it);
01106 }
01107
01108
01109 inline ibis::bitvector::const_iterator&
01110 ibis::bitvector::const_iterator::operator++() {
01111 #if defined(DEBUG) && DEBUG + 0 > 1
01112 if (it==0 || end==0 || it>end)
01113 throw "bitvector::const_iterator not formed correctly.";
01114 #endif
01115 if (ind+1<nbits) {++ind;}
01116 else {++ it; decodeWord();}
01117 return *this;
01118 }
01119
01120
01121 inline ibis::bitvector::const_iterator&
01122 ibis::bitvector::const_iterator::operator--() {
01123 #if defined(DEBUG) && DEBUG + 0 > 1
01124 if (it==0 || end==0 || it>end)
01125 throw "bitvector::const_iterator not formed correctly.";
01126 #endif
01127 if (ind) -- ind;
01128 else {
01129 if (it <= end) -- it;
01130 else if (active->nbits) it = end;
01131 else it = end - 1;
01132 decodeWord();
01133 if (nbits) ind = nbits - 1;
01134 }
01135 return *this;
01136 }
01137
01138 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const {
01139 indexSet is;
01140 if (m_vec.end() > m_vec.begin()) {
01141 is.it = m_vec.begin() - 1;
01142 is.end = m_vec.end();
01143 }
01144 else {
01145 is.it = 0;
01146 is.end = 0;
01147 }
01148 is.active = &active;
01149 is.ind[0] = static_cast<word_t>(-1);
01150 is.nind = 0;
01151 ++is;
01152 return is;
01153 }
01154
01158 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) {
01159 double sz = 0.0;
01160 if (nb > 0 && nb >= nc && nb > MAXBITS) {
01161 const double den = static_cast<double>(nc) /
01162 static_cast<double>(nb);
01163 const word_t nw = nb / MAXBITS;
01164 sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS))
01165 + pow(den, static_cast<int>(2*MAXBITS)));
01166 }
01167 return sz*sizeof(word_t);
01168 }
01169
01174 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) {
01175 double sz = 0.0;
01176 if (nb > 0 && nb >= nc && nb > MAXBITS) {
01177 const double den = static_cast<double>(nc) /
01178 static_cast<double>(nb);
01179 const word_t nw = nb / MAXBITS;
01180 if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den))
01181 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
01182 static_cast<int>(2*MAXBITS-3)) +
01183 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
01184 else
01185 sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) +
01186 pow(den, static_cast<int>(2*MAXBITS)));
01187 sz = 3.0 + nw * (1.0 - sz);
01188 }
01189 return sz*sizeof(word_t);
01190 }
01191
01193 inline void ibis::bitvector::turnOnRawBit(const word_t ind) {
01194 if (ind < nbits) {
01195 m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS)));
01196 nset = 0;
01197 }
01198 else {
01199 active.val |= (1 << (active.nbits - (ind - nbits) - 1));
01200 #if defined(DEBUG)
01201 if (ind >= nbits + active.nbits ||
01202 active.val >= (1U << active.nbits)) {
01203 LOGGER(ibis::gVerbose >= 0)
01204 << "bitvector::turnOnRawBit(" << ind
01205 << ") found a bad active word";
01206 }
01207 #endif
01208 }
01209 }
01210
01211 std::ostream& operator<<(std::ostream&, const ibis::bitvector&);
01212 #endif // __BITVECTOR_H