00001
00002
00003
00004 #ifndef IBIS_TABLE_H
00005 #define IBIS_TABLE_H
00006
00016 #include <iostream>
00017 #include <vector>
00018 #include <map>
00019 #include <string>
00020 #include "const.h"
00021
00022 namespace ibis {
00023
00025 enum TYPE_T {
00027 UNKNOWN_TYPE=0,
00029 OID,
00030 BYTE,
00031 UBYTE,
00032 SHORT,
00033 USHORT,
00034 INT,
00035 UINT,
00036 LONG,
00037 ULONG,
00038 FLOAT,
00039 DOUBLE,
00040
00041
00042 CATEGORY,
00046 TEXT,
00051 BLOB,
00053 UDT
00054 };
00056 FASTBIT_CXX_DLLSPEC extern const char** TYPESTRING;
00058 FASTBIT_CXX_DLLSPEC extern const char* TYPECODE;
00059
00060 class table;
00061 class tablex;
00062 class tableList;
00063 }
00064
00074 class FASTBIT_CXX_DLLSPEC ibis::table {
00075 public:
00079 static ibis::table* create(ibis::part&);
00083 static ibis::table* create(const ibis::partList&);
00087 static ibis::table* create(const char* dir);
00093 static ibis::table* create(const char* dir1, const char* dir2);
00094
00096 virtual ~table() {};
00097
00100 virtual const char* name() const {return name_.c_str();}
00102 virtual const char* description() const {return desc_.c_str();}
00104 virtual uint64_t nRows() const =0;
00106 virtual uint32_t nColumns() const =0;
00107
00111 typedef ibis::array_t<const char*> stringList;
00113 typedef ibis::array_t<ibis::TYPE_T> typeList;
00117 typedef ibis::array_t<void *> bufferList;
00119 typedef std::map<const char*, ibis::TYPE_T, ibis::lessi> namesTypes;
00120
00121 virtual stringList columnNames() const =0;
00122 virtual typeList columnTypes() const =0;
00123
00125 virtual void describe(std::ostream&) const =0;
00127 virtual void dumpNames(std::ostream& out, const char* del=", ") const =0;
00131 virtual int dump(std::ostream& out, const char* del=", ") const =0;
00133 virtual int dump(std::ostream& out, uint64_t nr,
00134 const char* del=", ") const =0;
00137 virtual int dump(std::ostream& out, uint64_t offset, uint64_t nr,
00138 const char* del=", ") const =0;
00142 virtual int backup(const char* dir, const char* tname=0,
00143 const char* tdesc=0) const =0;
00144
00147 virtual void estimate(const char* cond,
00148 uint64_t& nmin, uint64_t& nmax) const =0;
00151 virtual void estimate(const ibis::qExpr* cond,
00152 uint64_t& nmin, uint64_t& nmax) const =0;
00155 virtual table* select(const char* sel, const char* cond) const =0;
00158 virtual table* select(const char* sel, const ibis::qExpr* cond) const;
00159
00161 static table* select(const ibis::constPartList& parts,
00162 const char* sel, const char* cond);
00164 static table* select(const ibis::constPartList& parts,
00165 const char* sel, const ibis::qExpr* cond);
00167 static int64_t computeHits(const ibis::constPartList& parts,
00168 const char* cond);
00170 static int64_t computeHits(const ibis::constPartList& parts,
00171 const ibis::qExpr* cond);
00172
00179 virtual table* groupby(const stringList&) const =0;
00182 virtual table* groupby(const char*) const;
00190 virtual void orderby(const stringList&)=0;
00191 virtual void orderby(const stringList&, const std::vector<bool>&)=0;
00193 virtual void orderby(const char*);
00195 virtual void reverseRows()=0;
00196
00209 virtual int addPartition(const char*) {return -1;}
00211 virtual int getPartitions(ibis::constPartList&) const {
00212 return -1;}
00213
00228 virtual int buildIndex(const char* colname, const char* option=0) =0;
00233 virtual int buildIndexes(const char* options=0) =0;
00236 virtual const char* indexSpec(const char* colname=0) const =0;
00239 virtual void indexSpec(const char* opt, const char* colname=0) =0;
00251 virtual int combineCategories(const stringList&) {return 0;}
00253
00271 virtual int64_t
00272 getColumnAsBytes(const char* cname, char* vals,
00273 uint64_t begin=0, uint64_t end=0) const =0;
00274 virtual int64_t
00275 getColumnAsUBytes(const char* cname, unsigned char* vals,
00276 uint64_t begin=0, uint64_t end=0) const =0;
00277 virtual int64_t
00278 getColumnAsShorts(const char* cname, int16_t* vals,
00279 uint64_t begin=0, uint64_t end=0) const =0;
00280 virtual int64_t
00281 getColumnAsUShorts(const char* cname, uint16_t* vals,
00282 uint64_t begin=0, uint64_t end=0) const =0;
00283 virtual int64_t
00284 getColumnAsInts(const char* cname, int32_t* vals,
00285 uint64_t begin=0, uint64_t end=0) const =0;
00286 virtual int64_t
00287 getColumnAsUInts(const char* cname, uint32_t* vals,
00288 uint64_t begin=0, uint64_t end=0) const =0;
00289 virtual int64_t
00290 getColumnAsLongs(const char* cname, int64_t* vals,
00291 uint64_t begin=0, uint64_t end=0) const =0;
00292 virtual int64_t
00293 getColumnAsULongs(const char* cname, uint64_t* vals,
00294 uint64_t begin=0, uint64_t end=0) const =0;
00295 virtual int64_t
00296 getColumnAsFloats(const char* cname, float* vals,
00297 uint64_t begin=0, uint64_t end=0) const =0;
00298 virtual int64_t
00299 getColumnAsDoubles(const char* cname, double* vals,
00300 uint64_t begin=0, uint64_t end=0) const =0;
00301 virtual int64_t
00302 getColumnAsDoubles(const char* cname, std::vector<double>& vals,
00303 uint64_t begin=0, uint64_t end=0) const =0;
00307 virtual int64_t
00308 getColumnAsStrings(const char* cname, std::vector<std::string>& vals,
00309 uint64_t begin=0, uint64_t end=0) const =0;
00310
00316 virtual double getColumnMin(const char* cname) const =0;
00322 virtual double getColumnMax(const char* cname) const =0;
00324
00338 virtual long getHistogram(const char* constraints,
00339 const char* cname,
00340 double begin, double end, double stride,
00341 std::vector<uint32_t>& counts) const =0;
00348 virtual long getHistogram2D(const char* constraints,
00349 const char* cname1,
00350 double begin1, double end1, double stride1,
00351 const char* cname2,
00352 double begin2, double end2, double stride2,
00353 std::vector<uint32_t>& counts) const =0;
00360 virtual long getHistogram3D(const char* constraints,
00361 const char* cname1,
00362 double begin1, double end1, double stride1,
00363 const char* cname2,
00364 double begin2, double end2, double stride2,
00365 const char* cname3,
00366 double begin3, double end3, double stride3,
00367 std::vector<uint32_t>& counts) const =0;
00369
00371 struct row {
00372 std::vector<std::string> bytesnames;
00373 std::vector<signed char> bytesvalues;
00374 std::vector<std::string> ubytesnames;
00375 std::vector<unsigned char> ubytesvalues;
00376 std::vector<std::string> shortsnames;
00377 std::vector<int16_t> shortsvalues;
00378 std::vector<std::string> ushortsnames;
00379 std::vector<uint16_t> ushortsvalues;
00380 std::vector<std::string> intsnames;
00381 std::vector<int32_t> intsvalues;
00382 std::vector<std::string> uintsnames;
00383 std::vector<uint32_t> uintsvalues;
00384 std::vector<std::string> longsnames;
00385 std::vector<int64_t> longsvalues;
00386 std::vector<std::string> ulongsnames;
00387 std::vector<uint64_t> ulongsvalues;
00388 std::vector<std::string> floatsnames;
00389 std::vector<float> floatsvalues;
00390 std::vector<std::string> doublesnames;
00391 std::vector<double> doublesvalues;
00392 std::vector<std::string> catsnames;
00393 std::vector<std::string> catsvalues;
00394 std::vector<std::string> textsnames;
00395 std::vector<std::string> textsvalues;
00396 std::vector<std::string> blobsnames;
00397 std::vector<std::string> blobsvalues;
00398
00400 void clear();
00402 void clearValues();
00404 uint32_t nColumns() const {
00405 return bytesvalues.size() + ubytesvalues.size() +
00406 shortsvalues.size() + ushortsvalues.size() +
00407 intsvalues.size() + uintsvalues.size() +
00408 longsvalues.size() + ulongsvalues.size() +
00409 floatsvalues.size() + doublesvalues.size() +
00410 catsvalues.size() + textsvalues.size() + blobsvalues.size();}
00411 };
00412
00413
00414 class cursor;
00416 virtual cursor* createCursor() const =0;
00417
00418 static void parseNames(char* in, stringList& out);
00419 static void parseOrderby(char* in, stringList& out,
00420 std::vector<bool>& direc);
00421
00422 static void* allocateBuffer(ibis::TYPE_T, size_t);
00423 static void freeBuffer(void* buffer, ibis::TYPE_T type);
00424 static void freeBuffers(bufferList&, typeList&);
00425
00426 protected:
00427
00428 std::string name_;
00429 std::string desc_;
00430
00432 table() {};
00434 table(const char* na, const char* de)
00435 : name_(na?na:""), desc_(de?de:na?na:"") {};
00436
00437 private:
00438
00439 table(const table&);
00440 table& operator=(const table&);
00441 };
00442
00455 class FASTBIT_CXX_DLLSPEC ibis::tablex {
00456 public:
00458 static ibis::tablex* create();
00459
00460
00461
00462 virtual ~tablex() {};
00463
00465 virtual int addColumn(const char* cname, ibis::TYPE_T ctype,
00466 const char* cdesc=0, const char* idx=0) =0;
00467
00488 virtual int append(const char* cname, uint64_t begin, uint64_t end,
00489 void* values) =0;
00490
00520 virtual int appendRow(const ibis::table::row&) =0;
00526 virtual int appendRow(const char* line, const char* delimiters=0) = 0;
00534 virtual int appendRows(const std::vector<ibis::table::row>&) =0;
00535
00558 virtual int readCSV(const char* inputfile, int maxrows=0,
00559 const char* outputdir=0, const char* delimiters=0) =0;
00573 virtual int readSQLDump(const char* inputfile, std::string& tname,
00574 int maxrows=0, const char* outputdir=0) =0;
00575
00577 virtual int readNamesAndTypes(const char* filename);
00579 virtual int parseNamesAndTypes(const char* txt);
00580
00618 virtual int write(const char* dir, const char* tname=0,
00619 const char* tdesc=0, const char* idx=0,
00620 const char* nvpairs=0) const =0;
00634 virtual int writeMetaData(const char* dir, const char* tname=0,
00635 const char* tdesc=0, const char* idx=0,
00636 const char* nvpairs=0) const =0;
00637
00641 virtual void clearData() =0;
00653 virtual int32_t reserveSpace(uint32_t) {return 0;}
00663 virtual uint32_t capacity() const {return 0;}
00664
00666 virtual uint32_t mRows() const =0;
00668 virtual uint32_t mColumns() const =0;
00670 virtual void describe(std::ostream&) const =0;
00671
00677 virtual table* toTable(const char* nm=0, const char* de=0) =0;
00678
00679 protected:
00680 tablex() {};
00681
00682 private:
00683 tablex(const tablex&);
00684 tablex& operator=(const tablex&);
00685 };
00686
00690 class FASTBIT_CXX_DLLSPEC ibis::tableList {
00691 public:
00692 typedef std::map< const char*, ibis::table*, ibis::lessi > tableSet;
00693 typedef tableSet::const_iterator iterator;
00694
00697 bool empty() const {return tables.empty();}
00699 uint32_t size() const {return tables.size();}
00701 iterator begin() const {return tables.begin();}
00705 iterator end() const {return tables.end();}
00706
00709 const ibis::table* operator[](const char* tname) const {
00710 tableSet::const_iterator it = tables.find(tname);
00711 if (it != tables.end())
00712 return (*it).second;
00713 else
00714 return 0;
00715 }
00716
00722 void add(ibis::table*& tb) {
00723 tableSet::iterator it = tables.find(tb->name());
00724 if (it == tables.end()) {
00725 tables[tb->name()] = tb;
00726 tb=0;
00727 }
00728 else {
00729 ibis::table* tmp = (*it).second;
00730 tables[tb->name()] = tb;
00731 tb = tmp;
00732 }
00733 }
00734
00738 void remove(const char* tname) {
00739 tableSet::iterator it = tables.find(tname);
00740 if (it != tables.end()) {
00741 ibis::table* tmp = (*it).second;
00742 tables.erase(it);
00743 delete tmp;
00744 }
00745 }
00746
00748 tableList() {};
00749
00751 ~tableList() {
00752 while (! tables.empty()) {
00753 tableSet::iterator it = tables.begin();
00754 ibis::table* tmp = (*it).second;
00755 tables.erase(it);
00756 delete tmp;
00757 }
00758 }
00759
00760 private:
00762 tableSet tables;
00763
00764
00765 tableList(const tableList&);
00766 tableList& operator=(const tableList&);
00767 };
00768
00774 class FASTBIT_CXX_DLLSPEC ibis::table::cursor {
00775 public:
00776 virtual ~cursor() {};
00777 virtual uint64_t nRows() const =0;
00778 virtual uint32_t nColumns() const =0;
00779 virtual ibis::table::typeList columnTypes() const =0;
00780 virtual ibis::table::stringList columnNames() const =0;
00783 virtual int fetch() =0;
00787 virtual int fetch(uint64_t rownum) =0;
00792 virtual uint64_t getCurrentRowNumber() const =0;
00793
00796 virtual int fetch(ibis::table::row&) =0;
00799 virtual int fetch(uint64_t rownum, ibis::table::row&) =0;
00800
00802 virtual int dump(std::ostream& out, const char* del=", ") const =0;
00803
00807 virtual int getColumnAsByte(const char* cname, char&) const =0;
00808 virtual int getColumnAsUByte(const char* cname, unsigned char&) const =0;
00809 virtual int getColumnAsShort(const char* cname, int16_t&) const =0;
00810 virtual int getColumnAsUShort(const char* cname, uint16_t&) const =0;
00811 virtual int getColumnAsInt(const char* cname, int32_t&) const =0;
00812 virtual int getColumnAsUInt(const char* cname, uint32_t&) const =0;
00813 virtual int getColumnAsLong(const char* cname, int64_t&) const =0;
00814 virtual int getColumnAsULong(const char* cname, uint64_t&) const =0;
00815 virtual int getColumnAsFloat(const char* cname, float&) const =0;
00816 virtual int getColumnAsDouble(const char* cname, double&) const =0;
00817 virtual int getColumnAsString(const char* cname, std::string&) const =0;
00818
00824 virtual int getColumnAsByte(uint32_t cnum, char& val) const =0;
00825 virtual int getColumnAsUByte(uint32_t cnum, unsigned char& val) const =0;
00826 virtual int getColumnAsShort(uint32_t cnum, int16_t& val) const =0;
00827 virtual int getColumnAsUShort(uint32_t cnum, uint16_t& val) const =0;
00828 virtual int getColumnAsInt(uint32_t cnum, int32_t& val) const =0;
00829 virtual int getColumnAsUInt(uint32_t cnum, uint32_t& val) const =0;
00830 virtual int getColumnAsLong(uint32_t cnum, int64_t& val) const =0;
00831 virtual int getColumnAsULong(uint32_t cnum, uint64_t& val) const =0;
00832 virtual int getColumnAsFloat(uint32_t cnum, float& val) const =0;
00833 virtual int getColumnAsDouble(uint32_t cnum, double& val) const =0;
00834 virtual int getColumnAsString(uint32_t cnum, std::string& val) const =0;
00835
00836 protected:
00837 cursor() {};
00838 cursor(const cursor&);
00839 cursor& operator=(const cursor&) ;
00840 };
00841
00842 inline void ibis::table::row::clear() {
00843 bytesnames.clear();
00844 bytesvalues.clear();
00845 ubytesnames.clear();
00846 ubytesvalues.clear();
00847 shortsnames.clear();
00848 shortsvalues.clear();
00849 ushortsnames.clear();
00850 ushortsvalues.clear();
00851 intsnames.clear();
00852 intsvalues.clear();
00853 uintsnames.clear();
00854 uintsvalues.clear();
00855 longsnames.clear();
00856 longsvalues.clear();
00857 ulongsnames.clear();
00858 ulongsvalues.clear();
00859 floatsnames.clear();
00860 floatsvalues.clear();
00861 doublesnames.clear();
00862 doublesvalues.clear();
00863 catsnames.clear();
00864 catsvalues.clear();
00865 textsnames.clear();
00866 textsvalues.clear();
00867 blobsnames.clear();
00868 blobsvalues.clear();
00869 }
00870
00871 inline void ibis::table::row::clearValues() {
00872 bytesvalues.clear();
00873 ubytesvalues.clear();
00874 shortsvalues.clear();
00875 ushortsvalues.clear();
00876 intsvalues.clear();
00877 uintsvalues.clear();
00878 longsvalues.clear();
00879 ulongsvalues.clear();
00880 floatsvalues.clear();
00881 doublesvalues.clear();
00882 catsvalues.clear();
00883 textsvalues.clear();
00884 blobsvalues.clear();
00885 }
00886 #endif // IBIS_TABLE_H