00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00019
00020
00021 #include <stxxl/vector>
00022 #include <stxxl/map>
00023 #include <stxxl/timer>
00024 #include <stxxl/stream>
00025
00026
00028 #include <db_cxx.h>
00029
00031 #include "app_config.h"
00032 #include <portability.h>
00033 #include <ami_btree.h>
00035
00036 #define BDB_BULK_SCAN
00037
00038 #define KEY_SIZE 8
00039 #define DATA_SIZE 32
00040
00041 #define NODE_BLOCK_SIZE (32 * 1024)
00042 #define LEAF_BLOCK_SIZE (32 * 1024)
00043
00044
00045 #define LEAF_BLOCK_SIZE (32 * 1024)
00046
00047 #define TOTAL_CACHE_SIZE (750 * 1024 * 1024)
00048
00049
00050
00051
00052
00053 #define NODE_CACHE_SIZE (64 * 1024 * 1024)
00054 #define LEAF_CACHE_SIZE (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE)
00055
00056 #define SORTER_MEM (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
00057
00058
00059 #define SCAN_LIMIT(x) (x)
00060
00061
00062 #define BDB_FILE "/var/tmp/bdb_file"
00063
00064
00065
00066 u_int32_t pagesize = LEAF_BLOCK_SIZE;
00067 u_int bulkbufsize = 4 * 1024 * 1024;
00068 u_int logbufsize = 8 * 1024 * 1024;
00069 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024);
00070 u_int datasize = DATA_SIZE;
00071 u_int keysize = KEY_SIZE;
00072 u_int numitems = 0;
00073
00074 const char * letters = "abcdefghijklmnopqrstuvwxuz";
00075
00076 struct my_key
00077 {
00078 char keybuf[KEY_SIZE];
00079 };
00080
00081
00082 std::ostream & operator << (std::ostream & o, const my_key & obj)
00083 {
00084 for (int i = 0; i < KEY_SIZE; ++i)
00085 o << obj.keybuf[i];
00086
00087 return o;
00088 }
00089
00090 bool operator == (const my_key & a, const my_key & b)
00091 {
00092 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0;
00093 }
00094
00095 bool operator != (const my_key & a, const my_key & b)
00096 {
00097 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0;
00098 }
00099
00100 bool operator < (const my_key & a, const my_key & b)
00101 {
00102 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
00103 }
00104
00105 bool operator > (const my_key & a, const my_key & b)
00106 {
00107 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0;
00108 }
00109
00110 bool operator <= (const my_key & a, const my_key & b)
00111 {
00112 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0;
00113 }
00114
00115 bool operator >= (const my_key & a, const my_key & b)
00116 {
00117 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
00118 }
00119
00120
00121 struct my_data
00122 {
00123 char databuf[DATA_SIZE];
00124 };
00125
00126 std::ostream & operator << (std::ostream & o, const my_data & obj)
00127 {
00128 o << "DATA(size=" << sizeof(obj) << ") ";
00129
00130 return o;
00131 }
00132
00133 my_key min_key, max_key;
00134
00135 struct comp_type : std::binary_function<my_key, my_key, bool>
00136 {
00137 bool operator () (const my_key & a, const my_key & b) const
00138 {
00139 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
00140 }
00141 static my_key max_value()
00142 {
00143 return max_key;
00144 }
00145 static my_key min_value()
00146 {
00147 return min_key;
00148 }
00149 };
00150
00151
00153
00154 typedef my_key bkey_t;
00155
00156
00157 struct el_t {
00158 bkey_t key_;
00159 my_data data_;
00160 el_t(bkey_t k) : key_(k) { }
00161 el_t() { }
00162 };
00163 struct key_from_el {
00164 bkey_t operator () (const el_t & v) const
00165 {
00166 return v.key_;
00167 }
00168 };
00169
00170
00171
00172
00173 #ifdef _WIN32
00174 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t;
00175 #else
00176 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
00177 #endif
00178
00179
00180 void init()
00181 {
00182 memset(max_key.keybuf, (std::numeric_limits<char>::max)(), KEY_SIZE);
00183 memset(min_key.keybuf, (std::numeric_limits<char>::min)(), KEY_SIZE);
00184 }
00185
00186 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type;
00187
00188 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size
00189 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size
00190 #define REAL_NODE_MELEMENTS map_type::node_block_type::size
00191 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size
00192
00193 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
00194
00195
00196
00197
00198
00199
00200
00201 #if 0
00202 unsigned ran32State = 0xdeadbeef;
00203 inline unsigned myrand()
00204 {
00205 return (ran32State = 1664525 * ran32State + 1013904223);
00206 }
00207 inline void rand_key(stxxl::int64 pos, my_key & Key)
00208 {
00209 for (int i = 0; i < KEY_SIZE; ++i)
00210 Key.keybuf[i] = letters[myrand() % 26];
00211 }
00212 #else // a longer pseudo random sequence
00213 long long unsigned ran32State = 0xdeadbeef;
00214 inline long long unsigned myrand()
00215 {
00216 return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL);
00217 }
00218 inline void rand_key(stxxl::int64 , my_key & Key)
00219 {
00220 long long unsigned r = myrand();
00221 for (int i = 0; i < KEY_SIZE; ++i)
00222 {
00223 Key.keybuf[i] = letters[r % 26];
00224 r >>= 5;
00225 }
00226 }
00227 #endif
00228
00229 void run_bdb_btree(stxxl::int64 ops)
00230 {
00231 const char * filename = BDB_FILE;
00232
00233 my_key key1_storage;
00234 my_data data1_storage;
00235
00236 unlink(filename);
00237
00238 memset(key1_storage.keybuf, 'a', KEY_SIZE);
00239 memset(data1_storage.databuf, 'b', DATA_SIZE);
00240
00241
00242 Db db(NULL, 0);
00243
00244 try {
00245 db.set_errfile(stderr);
00246 db.set_pagesize(pagesize);
00247 db.set_cachesize(0, cachesize, 1);
00248
00249
00250 db.open(NULL,
00251 filename,
00252 NULL,
00253 DB_BTREE,
00254 DB_CREATE,
00255 0);
00256
00257
00258
00259 Dbt key1(key1_storage.keybuf, KEY_SIZE);
00260 Dbt data1(data1_storage.databuf, DATA_SIZE);
00261
00262 stxxl::timer Timer;
00263 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00264 stxxl::int64 i;
00265
00266
00267 ran32State = 0xdeadbeef;
00268
00269
00270 DB_BTREE_STAT * dbstat;
00271
00272 db.stat(NULL, &dbstat, 0);
00273 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00274
00275 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
00276
00277 Timer.start();
00278
00279 for (i = 0; i < n_inserts; ++i)
00280 {
00281
00282 rand_key(i, key1_storage);
00283 db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
00284 }
00285
00286 Timer.stop();
00287 db.stat(NULL, &dbstat, 0);
00288 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00289 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00290 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00291
00292 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00293
00295 Timer.reset();
00296 Timer.start();
00297
00298
00299 Dbc * cursorp;
00300 db.cursor(NULL, &cursorp, 0);
00301
00302 for (i = 0; i < n_locates; ++i)
00303 {
00304
00305 rand_key(i, key1_storage);
00306 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00307 Dbt datax(data1_storage.databuf, DATA_SIZE);
00308
00309 cursorp->get(&keyx, &datax, DB_SET_RANGE);
00310 }
00311
00312 Timer.stop();
00313 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00314 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00315
00316 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00317
00319 Timer.reset();
00320
00321 Timer.start();
00322
00323 stxxl::int64 n_scanned = 0;
00324
00325 for (i = 0; i < n_range_queries; ++i)
00326 {
00327
00328 rand_key(i, key1_storage);
00329 my_key last_key = key1_storage;
00330
00331 rand_key(i, key1_storage);
00332 if (last_key < key1_storage)
00333 std::swap(last_key, key1_storage);
00334
00335
00336 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00337 Dbt datax(data1_storage.databuf, DATA_SIZE);
00338
00339 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
00340 continue;
00341
00342
00343 while (*((my_key *)keyx.get_data()) <= last_key)
00344 {
00345 ++n_scanned;
00346 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
00347 break;
00348 }
00349
00350 if (n_scanned >= 10 * n_range_queries)
00351 {
00352 ++i;
00353 break;
00354 }
00355 }
00356
00357 n_range_queries = i;
00358
00359 Timer.stop();
00360 if (cursorp != NULL)
00361 cursorp->close();
00362
00363
00364 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00365 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00366 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00367
00368 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00369
00371
00372 ran32State = 0xdeadbeef;
00373 memset(key1_storage.keybuf, 'a', KEY_SIZE);
00374
00375 Timer.reset();
00376 Timer.start();
00377
00378 for (i = 0; i < n_deletes; ++i)
00379 {
00380
00381 rand_key(i, key1_storage);
00382 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00383 db.del(NULL, &keyx, 0);
00384 }
00385
00386 Timer.stop();
00387 db.stat(NULL, &dbstat, 0);
00388 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00389 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00390 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00391
00392 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00393
00394 db.close(0);
00395 }
00396 catch (DbException & e)
00397 {
00398 STXXL_ERRMSG("DbException happened");
00399 }
00400 catch (std::exception & e)
00401 {
00402 STXXL_ERRMSG("std::exception happened");
00403 }
00404
00405 unlink(filename);
00406 }
00407
00408 void run_stxxl_map(stxxl::int64 ops)
00409 {
00410 map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE);
00411 Map.enable_prefetching();
00412 stxxl::stats * Stats = stxxl::stats::get_instance();
00413
00414 std::pair<my_key, my_data> element;
00415
00416 memset(element.first.keybuf, 'a', KEY_SIZE);
00417 memset(element.second.databuf, 'b', DATA_SIZE);
00418
00419
00420 stxxl::timer Timer;
00421 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00422 stxxl::int64 i;
00423
00424
00425 ran32State = 0xdeadbeef;
00426
00427
00428
00429 STXXL_MSG("Records in map: " << Map.size());
00430
00431 Stats->reset();
00432
00433 Timer.start();
00434
00435 for (i = 0; i < n_inserts; ++i)
00436 {
00437
00438 rand_key(i, element.first);
00439 Map.insert(element);
00440 }
00441
00442 Timer.stop();
00443
00444 STXXL_MSG("Records in map: " << Map.size());
00445 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00446 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00447
00448 std::cout << *Stats;
00449 Stats->reset();
00450
00452 Timer.reset();
00453
00454 const map_type & CMap(Map);
00455
00456 Timer.start();
00457
00458 for (i = 0; i < n_locates; ++i)
00459 {
00460
00461 rand_key(i, element.first);
00462 CMap.lower_bound(element.first);
00463 }
00464
00465 Timer.stop();
00466 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00467 " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00468
00469 std::cout << *Stats;
00470 Stats->reset();
00471
00473 Timer.reset();
00474
00475 Timer.start();
00476
00477 stxxl::int64 n_scanned = 0;
00478
00479 for (i = 0; i < n_range_queries; ++i)
00480 {
00481
00482 rand_key(i, element.first);
00483 my_key begin_key = element.first;
00484 map_type::const_iterator begin = CMap.lower_bound(element.first);
00485
00486 rand_key(i, element.first);
00487 map_type::const_iterator beyond = CMap.lower_bound(element.first);
00488 if (element.first < begin_key)
00489 std::swap(begin, beyond);
00490
00491 while (begin != beyond)
00492 {
00493 my_data tmp = begin->second;
00494 ++n_scanned;
00495 ++begin;
00496 }
00497 if (n_scanned >= 10 * n_range_queries)
00498 {
00499 ++i;
00500 break;
00501 }
00502 }
00503
00504 n_range_queries = i;
00505
00506 Timer.stop();
00507 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00508 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00509 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00510
00511 std::cout << *Stats;
00512 Stats->reset();
00513
00515 ran32State = 0xdeadbeef;
00516 memset(element.first.keybuf, 'a', KEY_SIZE);
00517 memset(element.second.databuf, 'b', DATA_SIZE);
00518
00519 Timer.reset();
00520 Timer.start();
00521
00522 for (i = 0; i < n_deletes; ++i)
00523 {
00524
00525 rand_key(i, element.first);
00526 Map.erase(element.first);
00527 }
00528
00529 Timer.stop();
00530 STXXL_MSG("Records in map: " << Map.size());
00531 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00532 " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00533
00534 std::cout << *Stats;
00535 Stats->reset();
00536 }
00537
00538 class rand_key_gen
00539 {
00540 stxxl::int64 counter;
00541 my_key & current;
00542 stxxl::random_number32 myrand;
00543 rand_key_gen();
00544
00545 public:
00546 typedef my_key value_type;
00547
00548 rand_key_gen(stxxl::int64 el, my_key & cur) :
00549 counter(el), current(cur)
00550 {
00551
00552
00553 rand_key(counter, current);
00554 }
00555 const my_key & operator * () { return current; }
00556 const my_key * operator -> () { return ¤t; }
00557
00558 rand_key_gen & operator ++ ()
00559 {
00560 --counter;
00561
00562
00563 rand_key(counter, current);
00564 return *this;
00565 }
00566 bool empty() const { return counter == 0; }
00567 };
00568
00569 template <class InputType>
00570 class key2pair
00571 {
00572 InputType & in;
00573 std::pair<my_key, my_data> current;
00574 key2pair();
00575
00576 public:
00577 typedef std::pair<my_key, my_data> value_type;
00578
00579 key2pair(InputType & in_) : in(in_)
00580 {
00581 if (!in.empty())
00582 current.first = *in;
00583 }
00584 const value_type & operator * () { return current; }
00585 const value_type * operator -> () { return ¤t; }
00586
00587 key2pair & operator ++ ()
00588 {
00589 ++in;
00590 if (!empty())
00591 current.first = *in;
00592
00593
00594 return *this;
00595 }
00596 bool empty() const { return in.empty(); }
00597 };
00598
00599 void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
00600 {
00601 stxxl::stats * Stats = stxxl::stats::get_instance();
00602
00603 std::pair<my_key, my_data> element;
00604
00605 memset(element.first.keybuf, 'a', KEY_SIZE);
00606 memset(element.second.databuf, 'b', DATA_SIZE);
00607
00608 stxxl::timer Timer;
00609 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00610 stxxl::int64 i;
00611
00612
00613 ran32State = 0xdeadbeef;
00614
00615
00616
00617 Timer.start();
00618
00619 vector_type SortedSeq(n);
00620 const vector_type & CSortedSeq(SortedSeq);
00621 {
00622 rand_key_gen Gen(n, element.first);
00623 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
00624 sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
00625 typedef key2pair<sorter_type> key2pair_type;
00626 key2pair_type Key2Pair(Sorter);
00627 stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
00628 }
00629
00630
00631 Timer.stop();
00632 Stats->reset();
00633
00634 STXXL_MSG("Finished sorting input. Elapsed time: " <<
00635 (Timer.mseconds() / 1000.) << " seconds.");
00636
00637 Timer.reset();
00638 Timer.start();
00639
00640
00641 map_type Map(CSortedSeq.begin(),
00642 CSortedSeq.end(),
00643 NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true);
00644
00645 Timer.stop();
00646
00647
00648 STXXL_MSG("Records in map: " << Map.size());
00649 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
00650 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00651
00652 using std::cout;
00653 Map.print_statistics(cout);
00654 Map.reset_statistics();
00655 std::cout << *Stats;
00656 Stats->reset();
00658 Timer.reset();
00659
00660
00661 Map.disable_prefetching();
00662
00663 Timer.start();
00664
00665 for (i = 0; i < n_inserts; ++i)
00666 {
00667
00668 rand_key(i, element.first);
00669 Map.insert(element);
00670 }
00671
00672 Timer.stop();
00673
00674 STXXL_MSG("Records in map: " << Map.size());
00675 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00676 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00677
00678 Map.print_statistics(cout);
00679 Map.reset_statistics();
00680
00681 std::cout << *Stats;
00682 Stats->reset();
00683
00685
00686
00688 Timer.reset();
00689
00690
00691 const map_type & CMap(Map);
00692
00693 Timer.start();
00694
00695 for (i = 0; i < n_locates; ++i)
00696 {
00697
00698 rand_key(i, element.first);
00699 CMap.lower_bound(element.first);
00700 }
00701
00702 Timer.stop();
00703 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00704 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00705
00706 Map.print_statistics(cout);
00707 Map.reset_statistics();
00708 std::cout << *Stats;
00709 Stats->reset();
00710
00712 Timer.reset();
00713
00714 Map.enable_prefetching();
00715
00716 Timer.start();
00717
00718 stxxl::int64 n_scanned = 0;
00719
00720 for (i = 0; i < n_range_queries; ++i)
00721 {
00722
00723 rand_key(i, element.first);
00724 my_key begin_key = element.first;
00725 map_type::const_iterator begin = CMap.lower_bound(element.first);
00726
00727 rand_key(i, element.first);
00728 map_type::const_iterator beyond = CMap.lower_bound(element.first);
00729 if (element.first < begin_key)
00730 std::swap(begin, beyond);
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744 while (begin != beyond)
00745 {
00746 ++n_scanned;
00747 ++begin;
00748 }
00749
00750 if (n_scanned >= SCAN_LIMIT(n))
00751 {
00752 ++i;
00753 break;
00754 }
00755 }
00756
00757 n_range_queries = i;
00758
00759 Timer.stop();
00760 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00761 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00762 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00763
00764 Map.print_statistics(cout);
00765 Map.reset_statistics();
00766
00767 std::cout << *Stats;
00768 Stats->reset();
00769
00771 ran32State = 0xdeadbeef;
00772 memset(element.first.keybuf, 'a', KEY_SIZE);
00773 memset(element.second.databuf, 'b', DATA_SIZE);
00774
00775 Timer.reset();
00776 Map.disable_prefetching();
00777
00778 Timer.start();
00779
00780 for (i = n_deletes; i > 0; --i)
00781 {
00782
00783 rand_key(i, element.first);
00784 Map.erase(element.first);
00785 }
00786
00787 Timer.stop();
00788 STXXL_MSG("Records in map: " << Map.size());
00789 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00790 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00791
00792 Map.print_statistics(cout);
00793 Map.reset_statistics();
00794 std::cout << *Stats;
00795 Stats->reset();
00796 }
00797
00799
00800 typedef AMI_STREAM<el_t> stream_t;
00801
00802 char dummy;
00803
00804 class MyFilter {
00805 public:
00806 bool operator () (const el_t & v) const
00807 {
00808 dummy += v.key_.keybuf[0];
00809 return true;
00810 }
00811 };
00812
00813
00814 void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
00815 {
00816 el_t element;
00817
00818 memset(element.key_.keybuf, 'a', KEY_SIZE);
00819 memset(element.data_.databuf, 'b', DATA_SIZE);
00820
00821
00822 tpie_log_init(TPIE_LOG_APP_DEBUG);
00823 MM_manager.set_memory_limit(TOTAL_CACHE_SIZE);
00824 MM_manager.enforce_memory_limit();
00825
00826 stream_t * is = new stream_t;
00827
00828 stxxl::timer Timer;
00829 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00830 stxxl::int64 i;
00831
00832
00833 ran32State = 0xdeadbeef;
00834
00835
00836
00837 Timer.start();
00838
00839
00840 {
00841 rand_key_gen Gen(n, element.key_);
00842 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
00843 sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
00844
00845
00846 while (!Sorter.empty())
00847 {
00848 is->write_item(*Sorter);
00849 ++Sorter;
00850 }
00851 }
00852
00853 Timer.stop();
00854
00855
00856 STXXL_MSG("Finished sorting input. Elapsed time: " <<
00857 (Timer.mseconds() / 1000.) << " seconds.");
00858
00859
00860 Timer.reset();
00861 Timer.start();
00862
00863
00864 u_btree_t * u_btree;
00865 AMI_btree_params params;
00866 params.node_block_factor = NODE_BLOCK_SIZE / 4096;
00867 params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096;
00868 params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE;
00869 params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE;
00870
00871 u_btree = new u_btree_t(params);
00872
00873 using std::cout;
00874 using std::cerr;
00875
00876 if (!u_btree->is_valid()) {
00877 cerr << "Error initializing btree. Aborting.\n";
00878 delete u_btree;
00879 exit(1);
00880 }
00881
00882 if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
00883 cerr << "Error during bulk loading.\n";
00884
00885
00886 Timer.stop();
00887
00888 STXXL_MSG("Records in map: " << u_btree->size());
00889 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
00890 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00891
00892
00894 Timer.reset();
00895
00896
00897 Timer.start();
00898
00899 for (i = 0; i < n_inserts; ++i)
00900 {
00901
00902 rand_key(i, element.key_);
00903 u_btree->insert(element);
00904 }
00905
00906 Timer.stop();
00907
00908 STXXL_MSG("Records in map: " << u_btree->size());
00909 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00910 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00911
00912
00914 Timer.reset();
00915
00916
00917 Timer.start();
00918
00919
00920 el_t result;
00921 for (i = 0; i < n_locates; ++i)
00922 {
00923
00924 rand_key(i, element.key_);
00925 u_btree->succ(element.key_, result);
00926 }
00927
00928 Timer.stop();
00929 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00930 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00931
00932
00934 Timer.reset();
00935
00936
00937 Timer.start();
00938
00939 stxxl::int64 n_scanned = 0;
00940 MyFilter filter;
00941
00942 for (i = 0; i < n_range_queries; ++i)
00943 {
00944 rand_key(i, element.key_);
00945 my_key begin_key = element.key_;
00946 rand_key(i, element.key_);
00947 if (element.key_ < begin_key)
00948 n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter);
00949
00950 else
00951 n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
00952
00953
00954 if (n_scanned >= SCAN_LIMIT(n))
00955 {
00956 ++i;
00957 break;
00958 }
00959 }
00960
00961 n_range_queries = i;
00962
00963 Timer.stop();
00964 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00965 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00966 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00967
00968
00970 ran32State = 0xdeadbeef;
00971 memset(element.key_.keybuf, 'a', KEY_SIZE);
00972 memset(element.data_.databuf, 'b', DATA_SIZE);
00973
00974 Timer.reset();
00975
00976 Timer.start();
00977
00978 for (i = n_deletes; i > 0; --i)
00979 {
00980
00981 rand_key(i, element.key_);
00982 u_btree->erase(element.key_);
00983 }
00984
00985 Timer.stop();
00986 STXXL_MSG("Records in map: " << u_btree->size());
00987 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00988 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00989 }
00990
00991 void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
00992 {
00993 const char * filename = BDB_FILE;
00994
00995 my_key key1_storage;
00996 my_data data1_storage;
00997
00998 #ifdef BDB_BULK_SCAN
00999 int * bulk_buffer = new int[logbufsize / sizeof(int)];
01000 #endif
01001
01002 unlink(filename);
01003
01004 memset(key1_storage.keybuf, 'a', KEY_SIZE);
01005 memset(data1_storage.databuf, 'b', DATA_SIZE);
01006
01007
01008 Db db(NULL, 0);
01009
01010 try {
01011
01012 Dbt key1(key1_storage.keybuf, KEY_SIZE);
01013 Dbt data1(data1_storage.databuf, DATA_SIZE);
01014
01015 stxxl::timer Timer;
01016 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
01017 stxxl::int64 i;
01018
01019
01020 ran32State = 0xdeadbeef;
01021
01022 Timer.start();
01023
01024 vector_type SortedSeq(n);
01025
01026 {
01027 rand_key_gen Gen(n, key1_storage);
01028 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
01029 sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
01030 typedef key2pair<sorter_type> key2pair_type;
01031 key2pair_type Key2Pair(Sorter);
01032 stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
01033 }
01034
01035 Timer.stop();
01036
01037 STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds.");
01038
01039 db.set_errfile(stderr);
01040 db.set_pagesize(pagesize);
01041 db.set_cachesize(0, cachesize, 10);
01042
01043 STXXL_MSG("BDB cache size set.");
01044
01045
01046 db.open(NULL,
01047 filename,
01048 NULL,
01049 DB_BTREE,
01050 DB_CREATE,
01051 0);
01052
01053 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
01054
01055 Timer.reset();
01056 Timer.start();
01057
01058
01059
01060
01061 vector_type::const_iterator cit = SortedSeq.begin();
01062 for (i = 0; i < n; ++i, ++cit)
01063 {
01064 key1_storage = cit->first;
01065 db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
01066 }
01067
01068 Timer.stop();
01069
01070 DB_BTREE_STAT * dbstat;
01071 db.stat(NULL, &dbstat, 0);
01072 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01073 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
01074 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01075
01076 db.stat_print(0);
01077 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01079
01080
01081 Timer.reset();
01082 Timer.start();
01083
01084 for (i = 0; i < n_inserts; ++i)
01085 {
01086
01087 rand_key(i, key1_storage);
01088 db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
01089 }
01090
01091 Timer.stop();
01092 db.stat(NULL, &dbstat, 0);
01093 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01094 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
01095 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01096
01097 db.stat_print(0);
01098 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01099
01101 Timer.reset();
01102 Timer.start();
01103
01104
01105 Dbc * cursorp;
01106 db.cursor(NULL, &cursorp, 0);
01107
01108 for (i = 0; i < n_locates; ++i)
01109 {
01110
01111 rand_key(i, key1_storage);
01112 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01113 Dbt datax(data1_storage.databuf, DATA_SIZE);
01114
01115 cursorp->get(&keyx, &datax, DB_SET_RANGE);
01116 }
01117
01118 Timer.stop();
01119 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
01120 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01121
01122 db.stat_print(0);
01123 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01124
01126 Timer.reset();
01127
01128 Timer.start();
01129
01130 stxxl::int64 n_scanned = 0;
01131
01132 for (i = 0; i < n_range_queries; ++i)
01133 {
01134
01135 rand_key(i, key1_storage);
01136 my_key last_key = key1_storage;
01137
01138 rand_key(i, key1_storage);
01139 if (last_key < key1_storage)
01140 std::swap(last_key, key1_storage);
01141
01142
01143
01144
01145
01146 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01147 #ifdef BDB_BULK_SCAN
01148 Dbt datax(bulk_buffer, DATA_SIZE);
01149 datax.set_ulen(logbufsize);
01150 datax.set_flags(DB_DBT_USERMEM);
01151 #else
01152 Dbt datax(data1_storage.databuf, DATA_SIZE);
01153 #endif
01154
01155
01156 #ifdef BDB_BULK_SCAN
01157 if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
01158 continue;
01159
01160
01161 do
01162 {
01163 DbMultipleKeyDataIterator BulkIterator(datax);
01164 Dbt key1, data1;
01165 while (BulkIterator.next(key1, data1) &&
01166 *((my_key *)key1.get_data()) <= last_key)
01167 {
01168 ++n_scanned;
01169
01170 }
01171 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
01172 break;
01173
01174
01175 if (*((my_key *)keyx.get_data()) > last_key)
01176 {
01177 break;
01178 }
01179 } while (1);
01180
01181 #else
01182 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
01183 continue;
01184
01185 while (*((my_key *)keyx.get_data()) <= last_key)
01186 {
01187 ++n_scanned;
01188 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
01189 break;
01190 }
01191 #endif
01192
01193
01194 if (n_scanned >= SCAN_LIMIT(n))
01195 {
01196 ++i;
01197 break;
01198 }
01199 }
01200
01201 n_range_queries = i;
01202
01203 Timer.stop();
01204 if (cursorp != NULL)
01205 cursorp->close();
01206
01207
01208 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
01209 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
01210 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
01211
01212 db.stat_print(0);
01213 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01214
01216
01217 ran32State = 0xdeadbeef;
01218 memset(key1_storage.keybuf, 'a', KEY_SIZE);
01219
01220 Timer.reset();
01221 Timer.start();
01222
01223 for (i = 0; i < n_deletes; ++i)
01224 {
01225
01226 rand_key(i, key1_storage);
01227 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01228 db.del(NULL, &keyx, 0);
01229 }
01230
01231 Timer.stop();
01232 db.stat(NULL, &dbstat, 0);
01233 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01234 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
01235 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01236
01237 db.stat_print(0);
01238 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01239
01240 db.close(0);
01241 }
01242 catch (DbException & e)
01243 {
01244 STXXL_ERRMSG("DbException happened");
01245 }
01246 catch (std::exception & e)
01247 {
01248 STXXL_ERRMSG("std::exception happened");
01249 }
01250
01251 unlink(filename);
01252
01253 #ifdef BDB_BULK_SCAN
01254 delete[] bulk_buffer;
01255 #endif
01256 }
01257
01258
01259 int main(int argc, char * argv[])
01260 {
01261 STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
01262 STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes");
01263 STXXL_MSG("stxxl::map Node max elements : " << REAL_NODE_MELEMENTS);
01264 STXXL_MSG("stxxl::map Leaf max elements : " << REAL_LEAF_MELEMENTS);
01265 #ifdef STXXL_DIRECT_IO_OFF
01266 STXXL_MSG("STXXL_DIRECT_IO_OFF is defined");
01267 #else
01268 STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined");
01269 #endif
01270
01271 if (argc < 3)
01272 {
01273 STXXL_MSG("Usage: " << argv[0] << " version #ops");
01274 STXXL_MSG("\t version = 1: test stxxl map");
01275 STXXL_MSG("\t version = 2: test Berkeley DB btree");
01276 STXXL_MSG("\t version = 3: big test stxxl map");
01277 STXXL_MSG("\t version = 4: big test Berkeley DB btree");
01278 STXXL_MSG("\t version = 5: big test TPIE btree");
01279 return -1;
01280 }
01281
01282 init();
01283
01284 int version = atoi(argv[1]);
01285 stxxl::int64 ops = stxxl::atoint64(argv[2]);
01286
01287 STXXL_MSG("Running version : " << version);
01288 STXXL_MSG("Operations to perform: " << ops);
01289 STXXL_MSG("Btree cache size : " << TOTAL_CACHE_SIZE << " bytes");
01290 STXXL_MSG("Leaf block size : " << LEAF_BLOCK_SIZE << " bytes");
01291
01292
01293 switch (version)
01294 {
01295 case 1:
01296 run_stxxl_map(ops);
01297 break;
01298 case 2:
01299 run_bdb_btree(ops);
01300 break;
01301 case 3:
01302 run_stxxl_map_big(ops, 100000);
01303 break;
01304 case 4:
01305 run_bdb_btree_big(ops, 100000);
01306 break;
01307 case 5:
01308 run_tpie_btree_big(ops, 100000);
01309 break;
01310 default:
01311 STXXL_MSG("Unsupported version " << version);
01312 }
01313 }