Go to the documentation of this file.00001
00002
00003
00004 #ifndef IBIS_KEYWORDS_H
00005 #define IBIS_KEYWORDS_H
00006
00007
00008
00009
00010
00011 #include "index.h"
00012 #include "category.h"
00013
00078 class ibis::keywords : public ibis::index {
00079 public:
00080 virtual ~keywords() {clear();}
00081 explicit keywords(const ibis::column* c, const char* f=0);
00082 keywords(const ibis::column* c, ibis::text::tokenizer& tkn,
00083 const char* f=0);
00084 keywords(const ibis::column* c, ibis::fileManager::storage* st);
00085
00086 virtual INDEX_TYPE type() const {return KEYWORDS;}
00087 virtual const char* name() const {return "keywords";}
00088 virtual void binBoundaries(std::vector<double>& b) const {b.clear();}
00089 virtual void binWeights(std::vector<uint32_t>& b) const;
00090 virtual double getMin() const {return DBL_MAX;}
00091 virtual double getMax() const {return -DBL_MAX;}
00092 virtual double getSum() const {return -DBL_MAX;}
00094 long search(const char* kw, ibis::bitvector& hits) const;
00096 long search(const char* kw) const;
00097
00098 virtual void print(std::ostream& out) const;
00099 virtual int write(const char* dt) const;
00100 virtual int read(const char* idxfile);
00101 virtual int read(ibis::fileManager::storage* st);
00102 virtual long append(const char* dt, const char* df, uint32_t nnew);
00103
00104 using ibis::index::evaluate;
00105 using ibis::index::estimate;
00106 using ibis::index::undecidable;
00107 virtual long evaluate(const ibis::qContinuousRange& expr,
00108 ibis::bitvector& hits) const;
00109 virtual void estimate(const ibis::qContinuousRange& expr,
00110 ibis::bitvector& lower,
00111 ibis::bitvector& upper) const;
00112 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00115 virtual float undecidable(const ibis::qContinuousRange&,
00116 ibis::bitvector& iffy) const {
00117 iffy.clear();
00118 return 0.0;
00119 }
00120 virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00121 virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00122
00123 virtual long select(const ibis::qContinuousRange&, void*) const {
00124 return -1;}
00125 virtual long select(const ibis::qContinuousRange&, void*,
00126 ibis::bitvector&) const {
00127 return -1;}
00128
00129 class tokenizer;
00130
00131 protected:
00132 virtual size_t getSerialSize() const throw();
00133 int readTermDocFile(const ibis::column* idcol, const char* f);
00134 inline char readTerm(const char*& buf, std::string &key) const;
00135 inline uint32_t readUInt(const char*& buf) const;
00136 int readTDLine(std::istream& in, std::string& key,
00137 std::vector<uint32_t>& idlist,
00138 char* buf, uint32_t nbuf) const;
00139 void setBits(std::vector<uint32_t>& pos, ibis::bitvector& bvec) const;
00140 int parseTextFile(ibis::text::tokenizer &tkn, const char *f);
00141
00143 void clear();
00144
00145 private:
00146 ibis::dictionary terms;
00147 };
00148
00154 inline char ibis::keywords::readTerm(const char*& buf,
00155 std::string &keyword) const {
00156 while (isspace(*buf))
00157 ++ buf;
00158 while (isprint(*buf)) {
00159 if (*buf == ':') {
00160 return *buf;
00161 }
00162 else if (isspace(*buf)) {
00163 for (++ buf; isspace(*buf); ++ buf);
00164 if (*buf == ':') {
00165 return *buf;
00166 }
00167 else {
00168 keyword += ' ';
00169 keyword += *buf;
00170 ++ buf;
00171 }
00172 }
00173 else {
00174 keyword += *buf;
00175 ++ buf;
00176 }
00177 }
00178 return *buf;
00179 }
00180
00182 inline uint32_t ibis::keywords::readUInt(const char*& buf) const {
00183 uint32_t res = 0;
00184 while (*buf && ! isdigit(*buf))
00185 ++ buf;
00186
00187 while (isdigit(*buf)) {
00188 res = res * 10 + (*buf - '0');
00189 ++ buf;
00190 }
00191 return res;
00192 }
00193
00195 class ibis::keywords::tokenizer : public ibis::text::tokenizer {
00196 public:
00202 tokenizer(const char *d=ibis::util::delimiters) : delim_(d) {}
00204 virtual ~tokenizer() {}
00205
00206 virtual int operator()(std::vector<const char*>& tkns, char *buf);
00207
00208 private:
00209 std::string delim_;
00210 };
00211 #endif