00001
00002
00003
00004 #ifndef IBIS_COLUMN_H
00005 #define IBIS_COLUMN_H
00006
00007
00008
00009
00010
00011
00012
00013 #include "table.h"
00014 #include "qExpr.h"
00015 #include "bitvector.h"
00016 #include <string>
00017
00018 namespace ibis {
00019
00020 class category;
00021 class text;
00022 class blob;
00023
00024
00025
00026 class colBytes;
00027 class colUBytes;
00028 class colShorts;
00029 class colUShorts;
00030 class colInts;
00031 class colUInts;
00032 class colLongs;
00033 class colULongs;
00034 class colFloats;
00035 class colDoubles;
00036 class colStrings;
00037 }
00038
00049 class FASTBIT_CXX_DLLSPEC ibis::column {
00050 public:
00051
00052 virtual ~column();
00054 column(const part* tbl, FILE* file);
00056 column(const part* tbl, ibis::TYPE_T t, const char* name,
00057 const char* desc="", double low=DBL_MAX, double high=-DBL_MAX);
00058 column(const column& rhs);
00059
00062 ibis::TYPE_T type() const {return m_type;}
00064 const char* name() const {return m_name.c_str();}
00066 void name(const char* nm) {m_name = nm;}
00068 const char* description() const {return m_desc.c_str();}
00070 const double& lowerBound() const {return lower;}
00072 const double& upperBound() const {return upper;}
00073
00074 int elementSize() const;
00075 bool isFloat() const;
00076 bool isInteger() const;
00077 bool isSignedInteger() const;
00078 bool isUnsignedInteger() const;
00079 bool isNumeric() const;
00080 bool isSorted() const {return m_sorted;}
00081 void description(const char* d) {m_desc = d;}
00082 void lowerBound(double d) {lower = d;}
00083 void upperBound(double d) {upper = d;}
00084 const part* partition() const {return thePart;}
00085 void isSorted(bool);
00086
00087
00088 const char* indexSpec() const;
00089 uint32_t numBins() const;
00090
00091 void indexSpec(const char* spec) {m_bins=spec;}
00093 void preferredBounds(std::vector<double>&) const;
00095 void binWeights(std::vector<uint32_t>&) const;
00096
00097 virtual void computeMinMax();
00098 virtual void computeMinMax(const char *dir);
00099 virtual void computeMinMax(const char *dir,
00100 double& min, double &max) const;
00101
00102 virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
00103 virtual void unloadIndex() const;
00104 virtual long indexSize() const;
00105
00106 uint32_t indexedRows() const;
00107 void indexSpeedTest() const;
00108 void purgeIndexFile(const char *dir=0) const;
00109
00110 const char* dataFileName(std::string& fname, const char *dir=0) const;
00111 const char* nullMaskName(std::string& fname) const;
00112 void getNullMask(bitvector& mask) const;
00113 int setNullMask(const bitvector&);
00114
00117 virtual void getString(uint32_t, std::string &) const {};
00121 virtual const char* findString(const char*) const
00122 {return static_cast<const char*>(0);}
00123
00124 array_t<int32_t>* getIntArray() const;
00125 array_t<float>* getFloatArray() const;
00126 array_t<double>* getDoubleArray() const;
00127 virtual int getValuesArray(void* vals) const;
00128 virtual ibis::fileManager::storage* getRawData() const;
00129
00130 virtual array_t<signed char>* selectBytes(const bitvector& mask) const;
00131 virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const;
00132 virtual array_t<int16_t>* selectShorts(const bitvector& mask) const;
00133 virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const;
00134 virtual array_t<int32_t>* selectInts(const bitvector& mask) const;
00135 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00136 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00137 virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const;
00138 virtual array_t<float>* selectFloats(const bitvector& mask) const;
00139 virtual array_t<double>* selectDoubles(const bitvector& mask) const;
00140 virtual std::vector<std::string>*
00141 selectStrings(const bitvector& mask) const;
00142
00143 long selectValues(const bitvector&, void*) const;
00144 long selectValues(const bitvector&, void*, array_t<uint32_t>&) const;
00145 long selectValues(const ibis::qContinuousRange&, void*) const;
00146
00148 virtual void write(FILE* file) const;
00150 virtual void print(std::ostream& out) const;
00152 void logMessage(const char* event, const char* fmt, ...) const;
00154 void logWarning(const char* event, const char* fmt, ...) const;
00155
00158 int expandRange(ibis::qContinuousRange& rng) const;
00161 int contractRange(ibis::qContinuousRange& rng) const;
00162
00163 virtual long evaluateRange(const ibis::qContinuousRange& cmp,
00164 const ibis::bitvector& mask,
00165 ibis::bitvector& res) const;
00167 virtual long evaluateRange(const ibis::qDiscreteRange& cmp,
00168 const ibis::bitvector& mask,
00169 ibis::bitvector& res) const;
00171 virtual long evaluateRange(const ibis::qIntHod& cmp,
00172 const ibis::bitvector& mask,
00173 ibis::bitvector& res) const;
00175 virtual long evaluateRange(const ibis::qUIntHod& cmp,
00176 const ibis::bitvector& mask,
00177 ibis::bitvector& res) const;
00178
00179 virtual long stringSearch(const char*, ibis::bitvector&) const;
00180 virtual long stringSearch(const std::vector<std::string>&,
00181 ibis::bitvector&) const;
00182 virtual long stringSearch(const char*) const;
00183 virtual long stringSearch(const std::vector<std::string>&) const;
00184 virtual long keywordSearch(const char*, ibis::bitvector&) const;
00185 virtual long keywordSearch(const char*) const;
00186 virtual long patternSearch(const char*) const;
00187 virtual long patternSearch(const char*, ibis::bitvector &) const;
00188
00189 virtual long evaluateAndSelect(const ibis::qContinuousRange&,
00190 const ibis::bitvector&, void*,
00191 ibis::bitvector&) const;
00192
00202 virtual long estimateRange(const ibis::qContinuousRange& cmp,
00203 ibis::bitvector& low,
00204 ibis::bitvector& high) const;
00206 virtual long estimateRange(const ibis::qDiscreteRange& cmp,
00207 ibis::bitvector& low,
00208 ibis::bitvector& high) const;
00210 virtual long estimateRange(const ibis::qIntHod& cmp,
00211 ibis::bitvector& low,
00212 ibis::bitvector& high) const;
00214 virtual long estimateRange(const ibis::qUIntHod& cmp,
00215 ibis::bitvector& low,
00216 ibis::bitvector& high) const;
00217
00218 virtual long estimateRange(const ibis::qContinuousRange& cmp) const;
00219 virtual long estimateRange(const ibis::qDiscreteRange& cmp) const;
00221 virtual long estimateRange(const ibis::qIntHod& cmp) const;
00223 virtual long estimateRange(const ibis::qUIntHod& cmp) const;
00224
00226 virtual double estimateCost(const ibis::qContinuousRange& cmp) const;
00228 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const;
00230 virtual double estimateCost(const ibis::qIntHod& cmp) const;
00232 virtual double estimateCost(const ibis::qUIntHod& cmp) const;
00234 virtual double estimateCost(const ibis::qString&) const {
00235 return 0;}
00237 virtual double estimateCost(const ibis::qMultiString&) const {
00238 return 0;}
00239
00240 virtual float getUndecidable(const ibis::qContinuousRange& cmp,
00241 ibis::bitvector& iffy) const;
00243 virtual float getUndecidable(const ibis::qDiscreteRange& cmp,
00244 ibis::bitvector& iffy) const;
00246 virtual float getUndecidable(const ibis::qIntHod& cmp,
00247 ibis::bitvector& iffy) const;
00249 virtual float getUndecidable(const ibis::qUIntHod& cmp,
00250 ibis::bitvector& iffy) const;
00251
00253 virtual long append(const char* dt, const char* df, const uint32_t nold,
00254 const uint32_t nnew, uint32_t nbuf, char* buf);
00255
00256 virtual long append(const void* vals, const ibis::bitvector& msk);
00257 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00258 ibis::bitvector& mask, const void *va1,
00259 void *va2=0);
00260 template <typename T>
00261 long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask,
00262 const T special);
00263 virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00264 char *buf, uint32_t nbuf);
00265 virtual long truncateData(const char* dir, uint32_t nent,
00266 ibis::bitvector& mask) const;
00267
00273 virtual double getActualMin() const;
00276 virtual double getActualMax() const;
00278 virtual double getSum() const;
00285 long getCumulativeDistribution(std::vector<double>& bounds,
00286 std::vector<uint32_t>& counts) const;
00298 long getDistribution(std::vector<double>& bbs,
00299 std::vector<uint32_t>& counts) const;
00301 class info;
00302 class indexLock;
00303 class mutexLock;
00304
00305 protected:
00306
00307 const part* thePart;
00308 ibis::bitvector mask_;
00309 ibis::TYPE_T m_type;
00310 std::string m_name;
00311 std::string m_desc;
00312 std::string m_bins;
00313 bool m_sorted;
00314 double lower;
00315 double upper;
00316
00317 mutable ibis::index* idx;
00319 mutable ibis::util::sharedInt32 idxcnt;
00320
00322 void logError(const char* event, const char* fmt, ...) const;
00325 long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf,
00326 array_t<uint32_t>& out) const;
00328 double computeMin() const;
00330 double computeMax() const;
00332 double computeSum() const;
00335 void actualMinMax(const char *fname, const ibis::bitvector& mask,
00336 double &min, double &max) const;
00338 template <typename T>
00339 void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask,
00340 double& min, double& max) const;
00342 template <typename T>
00343 T computeMin(const array_t<T>& vals,
00344 const ibis::bitvector& mask) const;
00346 template <typename T>
00347 T computeMax(const array_t<T>& vals,
00348 const ibis::bitvector& mask) const;
00350 template <typename T>
00351 double computeSum(const array_t<T>& vals,
00352 const ibis::bitvector& mask) const;
00353
00355 virtual int searchSorted(const ibis::qContinuousRange&,
00356 ibis::bitvector&) const;
00358 virtual int searchSorted(const ibis::qDiscreteRange&,
00359 ibis::bitvector&) const;
00361 virtual int searchSorted(const ibis::qIntHod&,
00362 ibis::bitvector&) const;
00364 virtual int searchSorted(const ibis::qUIntHod&,
00365 ibis::bitvector&) const;
00367 template <typename T> int
00368 searchSortedICC(const array_t<T>& vals,
00369 const ibis::qContinuousRange& rng,
00370 ibis::bitvector& hits) const;
00372 template <typename T> int
00373 searchSortedICD(const array_t<T>& vals,
00374 const ibis::qDiscreteRange& rng,
00375 ibis::bitvector& hits) const;
00377 template <typename T> int
00378 searchSortedICD(const array_t<T>& vals,
00379 const ibis::qIntHod& rng,
00380 ibis::bitvector& hits) const;
00382 template <typename T> int
00383 searchSortedICD(const array_t<T>& vals,
00384 const ibis::qUIntHod& rng,
00385 ibis::bitvector& hits) const;
00387 template <typename T> int
00388 searchSortedOOCC(const char* fname,
00389 const ibis::qContinuousRange& rng,
00390 ibis::bitvector& hits) const;
00392 template <typename T> int
00393 searchSortedOOCD(const char* fname,
00394 const ibis::qDiscreteRange& rng,
00395 ibis::bitvector& hits) const;
00397 template <typename T> int
00398 searchSortedOOCD(const char* fname,
00399 const ibis::qIntHod& rng,
00400 ibis::bitvector& hits) const;
00402 template <typename T> int
00403 searchSortedOOCD(const char* fname,
00404 const ibis::qUIntHod& rng,
00405 ibis::bitvector& hits) const;
00406
00408 template <typename T> uint32_t
00409 findLower(int fdes, const uint32_t nr, const T tgt) const;
00411 template <typename T> uint32_t
00412 findUpper(int fdes, const uint32_t nr, const T tgt) const;
00413
00414 template <typename T>
00415 long selectValuesT(const char*, const bitvector&, array_t<T>&) const;
00416 template <typename T>
00417 long selectValuesT(const char*, const bitvector& mask,
00418 array_t<T>& vals, array_t<uint32_t>& inds) const;
00419 template <typename T>
00420 long selectToStrings(const char*, const bitvector&,
00421 std::vector<std::string>&) const;
00422
00424 template <typename T>
00425 long appendValues(const array_t<T>&, const ibis::bitvector&);
00427 long appendStrings(const std::vector<std::string>&, const ibis::bitvector&);
00428
00429 class readLock;
00430 class writeLock;
00431 class softWriteLock;
00432 friend class readLock;
00433 friend class writeLock;
00434 friend class indexLock;
00435 friend class mutexLock;
00436 friend class softWriteLock;
00437
00438 private:
00441 mutable pthread_rwlock_t rwlock;
00443 mutable pthread_mutex_t mutex;
00444
00445 column& operator=(const column&);
00446 };
00447
00450 class FASTBIT_CXX_DLLSPEC ibis::column::info {
00451 public:
00452 const char* name;
00453 const char* description;
00454 const double expectedMin;
00455 const double expectedMax;
00456 const ibis::TYPE_T type;
00457 info(const ibis::column& col);
00458 info(const info& rhs)
00459 : name(rhs.name), description(rhs.description),
00460 expectedMin(rhs.expectedMin),
00461 expectedMax(rhs.expectedMax),
00462 type(rhs.type) {};
00463
00464 private:
00465 info();
00466 info& operator=(const info&);
00467 };
00468
00472 class ibis::column::indexLock {
00473 public:
00474 ~indexLock();
00475 indexLock(const ibis::column* col, const char* m);
00476 const ibis::index* getIndex() const {return theColumn->idx;};
00477
00478 private:
00479 const ibis::column* theColumn;
00480 const char* mesg;
00481
00482 indexLock();
00483 indexLock(const indexLock&);
00484 indexLock& operator=(const indexLock&);
00485 };
00486
00488 class ibis::column::mutexLock {
00489 public:
00490 mutexLock(const ibis::column* col, const char* m)
00491 : theColumn(col), mesg(m) {
00492 if (ibis::gVerbose > 9)
00493 col->logMessage("gainExclusiveAccess",
00494 "pthread_mutex_lock for %s", m);
00495 int ierr = pthread_mutex_lock(&(col->mutex));
00496 if (0 != ierr)
00497 col->logWarning("gainExclusiveAccess", "pthread_mutex_lock for %s "
00498 "returned %d (%s)", m, ierr, strerror(ierr));
00499 }
00500 ~mutexLock() {
00501 if (ibis::gVerbose > 9)
00502 theColumn->logMessage("releaseExclusiveAccess",
00503 "pthread_mutex_unlock for %s", mesg);
00504 int ierr = pthread_mutex_unlock(&(theColumn->mutex));
00505 if (0 != ierr)
00506 theColumn->logWarning("releaseExclusiveAccess",
00507 "pthread_mutex_unlock for %s returned %d "
00508 "(%s)", mesg, ierr, strerror(ierr));
00509 }
00510
00511 private:
00512 const ibis::column* theColumn;
00513 const char* mesg;
00514
00515 mutexLock() {};
00516 mutexLock(const mutexLock&) {};
00517 mutexLock& operator=(const mutexLock&);
00518 };
00519
00521 class ibis::column::writeLock {
00522 public:
00523 writeLock(const ibis::column* col, const char* m);
00524 ~writeLock();
00525
00526 private:
00527 const ibis::column* theColumn;
00528 const char* mesg;
00529
00530 writeLock();
00531 writeLock(const writeLock&);
00532 writeLock& operator=(const writeLock&);
00533 };
00534
00536 class ibis::column::softWriteLock {
00537 public:
00538 softWriteLock(const ibis::column* col, const char* m);
00539 ~softWriteLock();
00540 bool isLocked() const {return(locked==0);}
00541
00542 private:
00543 const ibis::column* theColumn;
00544 const char* mesg;
00545 const int locked;
00546
00547 softWriteLock();
00548 softWriteLock(const softWriteLock&);
00549 softWriteLock& operator=(const softWriteLock&);
00550 };
00551
00553 class ibis::column::readLock {
00554 public:
00555 readLock(const ibis::column* col, const char* m);
00556 ~readLock();
00557
00558 private:
00559 const ibis::column* theColumn;
00560 const char* mesg;
00561
00562 readLock();
00563 readLock(const readLock&);
00564 readLock& operator=(const readLock&);
00565 };
00566
00568 inline int ibis::column::elementSize() const {
00569 int sz = -1;
00570 switch (m_type) {
00571 case ibis::OID: sz = sizeof(rid_t); break;
00572 case ibis::INT: sz = sizeof(int32_t); break;
00573 case ibis::UINT: sz = sizeof(uint32_t); break;
00574 case ibis::LONG: sz = sizeof(int64_t); break;
00575 case ibis::ULONG: sz = sizeof(uint64_t); break;
00576 case ibis::FLOAT: sz = sizeof(float); break;
00577 case ibis::DOUBLE: sz = sizeof(double); break;
00578 case ibis::BYTE: sz = sizeof(char); break;
00579 case ibis::UBYTE: sz = sizeof(unsigned char); break;
00580 case ibis::SHORT: sz = sizeof(int16_t); break;
00581 case ibis::USHORT: sz = sizeof(uint16_t); break;
00582 case ibis::CATEGORY: sz = 0; break;
00583 case ibis::TEXT: sz = 0; break;
00584 default: sz = -1; break;
00585 }
00586 return sz;
00587 }
00588
00590 inline bool ibis::column::isFloat() const {
00591 return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00592 }
00593
00595 inline bool ibis::column::isInteger() const {
00596 return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00597 m_type == ibis::SHORT || m_type == ibis::USHORT ||
00598 m_type == ibis::INT || m_type == ibis::UINT ||
00599 m_type == ibis::LONG || m_type == ibis::ULONG);
00600 }
00601
00603 inline bool ibis::column::isSignedInteger() const {
00604 return(m_type == ibis::BYTE || m_type == ibis::SHORT ||
00605 m_type == ibis::INT || m_type == ibis::LONG);
00606 }
00607
00609 inline bool ibis::column::isUnsignedInteger() const {
00610 return(m_type == ibis::UBYTE || m_type == ibis::USHORT ||
00611 m_type == ibis::UINT || m_type == ibis::ULONG);
00612 }
00613
00615 inline bool ibis::column::isNumeric() const {
00616 return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00617 m_type == ibis::SHORT || m_type == ibis::USHORT ||
00618 m_type == ibis::INT || m_type == ibis::UINT ||
00619 m_type == ibis::LONG || m_type == ibis::ULONG ||
00620 m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00621 }
00622
00623
00624 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) {
00625 prop.print(out);
00626 return out;
00627 }
00628
00629 namespace ibis {
00630 template <> long column::selectToStrings<signed char>
00631 (const char*, const bitvector&, std::vector<std::string>&) const;
00632 template <> long column::selectToStrings<unsigned char>
00633 (const char*, const bitvector&, std::vector<std::string>&) const;
00634 }
00635 #endif // IBIS_COLUMN_H