00001
00002
00003
00004
00005 #ifndef IBIS_INDEX_H
00006 #define IBIS_INDEX_H
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #if defined(_WIN32) && defined(_MSC_VER)
00025 #pragma warning(disable:4786) // some identifier longer than 256 characters
00026 #endif
00027 #include "qExpr.h"
00028 #include "bitvector.h"
00029
00030 #include <string>
00031 #include <limits>
00032
00033 namespace ibis {
00034 class bin;
00035 class range;
00036 class mesa;
00037 class ambit;
00038 class pale;
00039 class pack;
00040 class zone;
00041 class relic;
00042 class slice;
00043 class fade;
00044 class sbiad;
00045 class sapid;
00046
00047 class egale;
00048
00049 class moins;
00050 class entre;
00051
00052 class bak;
00053
00054
00055 class bak2;
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 class keywords;
00066 class mesh;
00067 class band;
00068 class direkte;
00069 class bylt;
00070 class zona;
00071 class fuzz;
00072 class fuge;
00073 }
00074
00081 class ibis::index {
00082 public:
00086 enum INDEX_TYPE {
00087 BINNING=0,
00088 RANGE,
00089 MESA,
00090 AMBIT,
00091 PALE,
00092 PACK,
00093 ZONE,
00094 RELIC,
00095 ROSTER,
00096 SLICE,
00097 FADE,
00098 SBIAD,
00099 SAPID,
00100 EGALE,
00101 MOINS,
00102 ENTRE,
00103 BAK,
00104 BAK2,
00105
00106
00107 KEYWORDS,
00108 MESH,
00109 BAND,
00110 DIREKTE,
00111 GENERIC,
00112 BYLT,
00113 FUZZ,
00114 ZONA,
00115 FUGE,
00116 EXTERN
00117 };
00118
00119 static index* create(const column* c, const char* name=0,
00120 const char* spec=0, int inEntirety=0);
00121 static bool isIndex(const char* f, INDEX_TYPE t);
00123 virtual ~index () {clear();};
00124
00126 virtual INDEX_TYPE type() const = 0;
00129 virtual const char* name() const = 0;
00130
00133 virtual long evaluate(const ibis::qContinuousRange& expr,
00134 ibis::bitvector& hits) const = 0;
00136 virtual long select(const ibis::qContinuousRange&, void*) const =0;
00138 virtual long select(const ibis::qContinuousRange&, void*,
00139 ibis::bitvector&) const =0;
00142 virtual long evaluate(const ibis::qDiscreteRange&,
00143 ibis::bitvector&) const {
00144 return -1;}
00145
00156 virtual void estimate(const ibis::qContinuousRange&,
00157 ibis::bitvector& lower,
00158 ibis::bitvector& upper) const {
00159 lower.set(0, nrows); upper.set(1, nrows);}
00161 virtual uint32_t estimate(const ibis::qContinuousRange&) const {
00162 return nrows;}
00170 virtual float undecidable(const ibis::qContinuousRange&,
00171 ibis::bitvector&) const {return 0.5;}
00172
00175 virtual void estimate(const ibis::qDiscreteRange& expr,
00176 ibis::bitvector& lower,
00177 ibis::bitvector& upper) const;
00178 virtual uint32_t estimate(const ibis::qDiscreteRange& expr) const;
00179 virtual float undecidable(const ibis::qDiscreteRange& expr,
00180 ibis::bitvector& iffy) const;
00181
00183 virtual void estimate(const ibis::index& idx2,
00184 const ibis::deprecatedJoin& expr,
00185 ibis::bitvector64& lower,
00186 ibis::bitvector64& upper) const;
00189 virtual void estimate(const ibis::index& idx2,
00190 const ibis::deprecatedJoin& expr,
00191 const ibis::bitvector& mask,
00192 ibis::bitvector64& lower,
00193 ibis::bitvector64& upper) const;
00194 virtual void estimate(const ibis::index& idx2,
00195 const ibis::deprecatedJoin& expr,
00196 const ibis::bitvector& mask,
00197 const ibis::qRange* const range1,
00198 const ibis::qRange* const range2,
00199 ibis::bitvector64& lower,
00200 ibis::bitvector64& upper) const;
00202 virtual int64_t estimate(const ibis::index& idx2,
00203 const ibis::deprecatedJoin& expr) const;
00206 virtual int64_t estimate(const ibis::index& idx2,
00207 const ibis::deprecatedJoin& expr,
00208 const ibis::bitvector& mask) const;
00209 virtual int64_t estimate(const ibis::index& idx2,
00210 const ibis::deprecatedJoin& expr,
00211 const ibis::bitvector& mask,
00212 const ibis::qRange* const range1,
00213 const ibis::qRange* const range2) const;
00214
00216 virtual void estimate(const ibis::deprecatedJoin& expr,
00217 const ibis::bitvector& mask,
00218 const ibis::qRange* const range1,
00219 const ibis::qRange* const range2,
00220 ibis::bitvector64& lower,
00221 ibis::bitvector64& upper) const;
00222 virtual int64_t estimate(const ibis::deprecatedJoin& expr,
00223 const ibis::bitvector& mask,
00224 const ibis::qRange* const range1,
00225 const ibis::qRange* const range2) const;
00226
00228 virtual double estimateCost(const ibis::qContinuousRange&) const {
00229 return (offset32.empty() ? (nrows<<3) : offset32.back());}
00231 virtual double estimateCost(const ibis::qDiscreteRange&) const {
00232 return (offset32.empty() ? (nrows<<3) : offset32.back());}
00233
00236 virtual void print(std::ostream& out) const = 0;
00240 virtual int write(const char* name) const = 0;
00244 virtual int read(const char* name) = 0;
00247 virtual int read(ibis::fileManager::storage* st) = 0;
00248
00250 virtual long append(const char*, const char*, uint32_t) {return -1;}
00251
00252 inline float sizeInBytes() const;
00254 virtual void speedTest(std::ostream&) const {};
00256 virtual uint32_t numBitvectors() const {return bits.size();}
00258 virtual const ibis::bitvector* getBitvector(uint32_t i) const {
00259 if (i < bits.size()) {
00260 if (bits[i] == 0)
00261 activate(i);
00262 return bits[i];
00263 }
00264 else {
00265 return 0;
00266 }
00267 }
00268
00271 virtual void binBoundaries(std::vector<double>&) const {return;}
00272 virtual void binWeights(std::vector<uint32_t>&) const {return;}
00273
00275 virtual long getCumulativeDistribution
00276 (std::vector<double>& bds, std::vector<uint32_t>& cts) const;
00278 virtual long getDistribution
00279 (std::vector<double>& bbs, std::vector<uint32_t>& cts) const;
00281 virtual double getMin() const {
00282 return std::numeric_limits<double>::quiet_NaN();}
00284 virtual double getMax() const {
00285 return std::numeric_limits<double>::quiet_NaN();}
00289 virtual double getSum() const {
00290 return std::numeric_limits<double>::quiet_NaN();}
00292 uint32_t getNRows() const {return nrows;}
00293
00294 void addBins(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
00295 void addBins(uint32_t ib, uint32_t ie, ibis::bitvector& res,
00296 const ibis::bitvector& tot) const;
00297 void sumBins(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
00298 void sumBins(uint32_t ib, uint32_t ie, ibis::bitvector& res,
00299 uint32_t ib0, uint32_t ie0) const;
00300 void sumBins(const ibis::array_t<uint32_t> &, ibis::bitvector&) const;
00301
00307 virtual int expandRange(ibis::qContinuousRange&) const {return 0;}
00308 virtual int contractRange(ibis::qContinuousRange&) const {return 0;}
00309
00310 typedef std::map< double, ibis::bitvector* > VMap;
00311 typedef std::map< double, uint32_t > histogram;
00312 template <typename E>
00313 static void mapValues(const array_t<E>& val, VMap& bmap);
00314 template <typename E>
00315 static void mapValues(const array_t<E>& val, histogram& hist,
00316 uint32_t count=0);
00317 template <typename E>
00318 static void mapValues(const array_t<E>& val, array_t<E>& bounds,
00319 std::vector<uint32_t>& cnts);
00320 template <typename E1, typename E2>
00321 static void mapValues(const array_t<E1>& val1, const array_t<E2>& val2,
00322 array_t<E1>& bnd1, array_t<E2>& bnd2,
00323 std::vector<uint32_t>& cnts);
00326 static void divideCounts(array_t<uint32_t>& bounds,
00327 const array_t<uint32_t>& cnt);
00328
00329
00330 static void addBits(const array_t<bitvector*>& bits,
00331 uint32_t ib, uint32_t ie, ibis::bitvector& res);
00332 static void sumBits(const array_t<bitvector*>& bits,
00333 uint32_t ib, uint32_t ie, ibis::bitvector& res);
00334 static void sumBits(const array_t<bitvector*>& bits,
00335 const ibis::bitvector& tot, uint32_t ib, uint32_t ie,
00336 ibis::bitvector& res);
00337
00338 static void setBases(array_t<uint32_t>& bases, uint32_t card,
00339 uint32_t nbase = 2);
00340
00341 protected:
00342
00344 const ibis::column* col;
00347 mutable ibis::fileManager::storage* str;
00349 mutable const char* fname;
00351 mutable array_t<int32_t> offset32;
00355 mutable array_t<int64_t> offset64;
00357 mutable array_t<ibis::bitvector*> bits;
00361 uint32_t nrows;
00362
00366 index(const ibis::column* c=0) : col(c), str(0), fname(0), nrows(0) {}
00367 index(const ibis::column* c, ibis::fileManager::storage* s);
00368
00369 void dataFileName(std::string& name, const char* f=0) const;
00370 void indexFileName(std::string& name, const char* f=0) const;
00371 static void indexFileName(std::string& name, const ibis::column* col1,
00372 const ibis::column* col2, const char* f=0);
00373
00375 virtual void activate() const;
00377 virtual void activate(uint32_t i) const;
00380 virtual void activate(uint32_t i, uint32_t j) const;
00382 virtual void clear();
00383
00385
00386
00389 void mapValues(const char* f, VMap& bmap) const;
00391 void mapValues(const char* f, histogram& hist, uint32_t count=0) const;
00392
00393 void computeMinMax(const char* f, double& min, double& max) const;
00395 void optionalUnpack(array_t<ibis::bitvector*>& bits,
00396 const char* opt);
00397
00398 int initOffsets(int fdes, const char offsize, size_t start,
00399 uint32_t nobs);
00400 int initOffsets(ibis::fileManager::storage* st, size_t start,
00401 uint32_t nobs);
00402 void initBitmaps(int fdes);
00403 void initBitmaps(ibis::fileManager::storage* st);
00404
00405 class barrel;
00406
00407 private:
00408 index(const index&);
00409 index& operator=(const index&);
00410 };
00411
00412
00415 class ibis::index::barrel : public ibis::math::barrel {
00416 public:
00417 barrel(const ibis::math::term* t) : ibis::math::barrel(t) {}
00418
00419 void setValue(uint32_t i, double v) {varvalues[i] = v;}
00420 };
00421
00422
00425 inline float ibis::index::sizeInBytes() const {
00426 if (offset64.size() > bits.size()) {
00427 return (float)offset64[bits.size()];
00428 }
00429 else if (offset32.size() > bits.size()) {
00430 return (float)offset32[bits.size()];
00431 }
00432 else if (str != 0) {
00433 return (float)str->size();
00434 }
00435 else if (*fname != 0) {
00436 return (float)ibis::util::getFileSize(fname);
00437 }
00438 else {
00439 return FLT_MAX;
00440 }
00441 }
00442
00443 #endif // IBIS_INDEX_H