00001
00002
00003
00004 #ifndef IBIS_CATEGORY_H
00005 #define IBIS_CATEGORY_H
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "column.h"
00016 #include "dictionary.h"
00017 #include "idirekte.h"
00018
00024 class ibis::text : public ibis::column {
00025 public:
00026 virtual ~text() {unloadIndex();};
00027 text(const part* tbl, FILE* file);
00028 text(const part* tbl, const char* name, ibis::TYPE_T t=ibis::TEXT);
00029 text(const ibis::column& col);
00030
00031 virtual long keywordSearch(const char* str, ibis::bitvector& hits) const;
00032 virtual long keywordSearch(const char* str) const;
00033
00034
00035
00036
00037 virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
00038 virtual long stringSearch(const std::vector<std::string>& strs,
00039 ibis::bitvector& hits) const;
00040 virtual long stringSearch(const char* str) const;
00041 virtual long stringSearch(const std::vector<std::string>& strs) const;
00042 virtual long patternSearch(const char*, ibis::bitvector&) const;
00043 virtual long patternSearch(const char*) const;
00044
00045 using ibis::column::estimateCost;
00046 virtual double estimateCost(const ibis::qString& cmp) const;
00047 virtual double estimateCost(const ibis::qMultiString& cmp) const;
00048
00049 virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
00050 virtual long append(const char* dt, const char* df, const uint32_t nold,
00051 const uint32_t nnew, uint32_t nbuf, char* buf);
00052 virtual long append(const void*, const ibis::bitvector&) {return -1;}
00053 virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00054 char *buf, uint32_t nbuf);
00056 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00058 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00059 virtual
00060 std::vector<std::string>* selectStrings(const bitvector& mask) const;
00061 virtual const char* findString(const char* str) const;
00062 virtual void getString(uint32_t i, std::string &val) const {
00063 readString(i, val);}
00064
00065 virtual void write(FILE* file) const;
00066 virtual void print(std::ostream& out) const;
00067
00068 const column* IDColumnForKeywordIndex() const;
00069 void TDListForKeywordIndex(std::string&) const;
00070 void delimitersForKeywordIndex(std::string&) const;
00071
00074 struct tokenizer {
00081 virtual int operator()(std::vector<const char*>& tkns, char *buf) = 0;
00083 virtual ~tokenizer() {}
00084 };
00085
00086 protected:
00088 void startPositions(const char *dir, char *buf, uint32_t nbuf) const;
00090 void readString(uint32_t i, std::string &val) const;
00092 int readString(std::string&, int, long, long, char*, uint32_t,
00093 uint32_t&, off_t&) const;
00094 int writeStrings(const char *to, const char *from,
00095 const char *spto, const char *spfrom,
00096 ibis::bitvector &msk, const ibis::bitvector &sel,
00097 char *buf, uint32_t nbuf) const;
00098
00099 private:
00100 text& operator=(const text&);
00101 };
00102
00109 class ibis::category : public ibis::text {
00110 public:
00111 virtual ~category();
00112 category(const part* tbl, FILE* file);
00113 category(const part* tbl, const char* name);
00114 category(const ibis::column& col);
00115
00116 category(const part* tbl, const char* name, const char* value,
00117 const char* dir=0, uint32_t nevt=0);
00118
00120 virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
00122 virtual long stringSearch(const std::vector<std::string>& vals,
00123 ibis::bitvector& hits) const;
00125 virtual long stringSearch(const char* str) const;
00127 virtual long stringSearch(const std::vector<std::string>& vals) const;
00128
00129 virtual long patternSearch(const char* pat) const;
00130 virtual long patternSearch(const char* pat, ibis::bitvector &hits) const;
00131 using ibis::text::estimateCost;
00132 virtual double estimateCost(const ibis::qLike& cmp) const;
00133 virtual double estimateCost(const ibis::qString& cmp) const;
00134 virtual double estimateCost(const ibis::qMultiString& cmp) const;
00135
00136 virtual void loadIndex(const char* =0, int =0) const throw ();
00138 virtual long append(const char* dt, const char* df, const uint32_t nold,
00139 const uint32_t nnew, uint32_t nbuf, char* buf);
00140 virtual long append(const void*, const ibis::bitvector&) {return -1;}
00142 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00143 virtual std::vector<std::string>*
00144 selectStrings(const bitvector& mask) const;
00145 virtual void getString(uint32_t i, std::string &val) const;
00146
00147 virtual uint32_t getNumKeys() const;
00148 virtual const char* getKey(uint32_t i) const;
00149 virtual const char* isKey(const char* str) const;
00150
00151 virtual void write(FILE* file) const;
00152 virtual void print(std::ostream& out) const;
00153
00154 ibis::direkte* fillIndex(const char *dir=0) const;
00156 const ibis::dictionary* getDictionary() const {return &dic;}
00157 int setDictionary(const dictionary&);
00158
00159 private:
00160
00161
00162
00163
00164 mutable ibis::dictionary dic;
00165
00166
00167 void prepareMembers() const;
00168 void readDictionary(const char *dir=0) const;
00169
00170 category& operator=(const category&);
00171 };
00172
00177 class ibis::blob : public ibis::column {
00178 public:
00179 virtual ~blob() {};
00180 blob(const part*, FILE*);
00181 blob(const part*, const char*);
00182 blob(const ibis::column&);
00183
00184 virtual long stringSearch(const char*, ibis::bitvector&) const {return -1;}
00185 virtual long stringSearch(const std::vector<std::string>&,
00186 ibis::bitvector&) const {return -1;}
00187 virtual long stringSearch(const char*) const {return -1;}
00188 virtual long stringSearch(const std::vector<std::string>&) const {
00189 return -1;}
00190
00191 virtual void computeMinMax() {}
00192 virtual void computeMinMax(const char*) {}
00193 virtual void computeMinMax(const char*, double&, double&) const {}
00194 virtual void loadIndex(const char*, int) const throw () {}
00195 virtual long indexSize() const {return -1;}
00196 virtual int getValuesArray(void*) const {return -1;}
00197
00198 virtual array_t<signed char>* selectBytes(const bitvector&) const {return 0;}
00199 virtual array_t<unsigned char>* selectUBytes(const bitvector&) const {return 0;}
00200 virtual array_t<int16_t>* selectShorts(const bitvector&) const {return 0;}
00201 virtual array_t<uint16_t>* selectUShorts(const bitvector&) const {return 0;}
00202 virtual array_t<int32_t>* selectInts(const bitvector&) const {return 0;}
00203 virtual array_t<uint32_t>* selectUInts(const bitvector&) const {return 0;}
00204 virtual array_t<int64_t>* selectLongs(const bitvector&) const {return 0;}
00205 virtual array_t<uint64_t>* selectULongs(const bitvector&) const {return 0;}
00206 virtual array_t<float>* selectFloats(const bitvector&) const {return 0;}
00207 virtual array_t<double>* selectDoubles(const bitvector&) const {return 0;}
00208 virtual std::vector<std::string>* selectStrings(const bitvector&) const {return 0;}
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234 virtual double getActualMin() const {return DBL_MAX;}
00235 virtual double getActualMax() const {return -DBL_MAX;}
00236 virtual double getSum() const {return 0;}
00237
00238 virtual long append(const void*, const ibis::bitvector&) {return -1;}
00239 virtual long append(const char* dt, const char* df, const uint32_t nold,
00240 const uint32_t nnew, uint32_t nbuf, char* buf);
00241 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00242 ibis::bitvector& mask, const void *va1,
00243 void *va2);
00244
00245 virtual void write(FILE*) const;
00246 virtual void print(std::ostream&) const;
00247
00248 long countRawBytes(const bitvector&) const;
00249 int selectRawBytes(const bitvector&,
00250 array_t<unsigned char>&, array_t<uint32_t>&) const;
00251 int getBlob(uint32_t ind, unsigned char *&buf, uint32_t &size) const;
00252
00253 protected:
00254 int extractAll(const bitvector&,
00255 array_t<unsigned char>&, array_t<uint32_t>&,
00256 const array_t<unsigned char>&,
00257 const array_t<int64_t>&) const;
00258 int extractSome(const bitvector&,
00259 array_t<unsigned char>&, array_t<uint32_t>&,
00260 const array_t<unsigned char>&, const array_t<int64_t>&,
00261 const uint32_t) const;
00262 int extractAll(const bitvector&,
00263 array_t<unsigned char>&, array_t<uint32_t>&,
00264 const char*, const array_t<int64_t>&) const;
00265 int extractSome(const bitvector&,
00266 array_t<unsigned char>&, array_t<uint32_t>&,
00267 const char*, const array_t<int64_t>&, const uint32_t) const;
00268 int extractSome(const bitvector&,
00269 array_t<unsigned char>&, array_t<uint32_t>&,
00270 const char*, const char*, const uint32_t) const;
00271 int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00272 const array_t<int64_t> &starts, const char *datafile) const;
00273 int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00274 const char *spfile, const char *datafile) const;
00275 };
00276 #endif // IBIS_CATEGORY_H