Grafalgo
Library of useful data structures and algorithms
/Users/jst/src/grafalgo/cpp/dataStructures/hash/perf/perfHashMap.cpp
00001 #include "stdinc.h"
00002 #include "HashMap.h"
00003 #include "Util.h"
00004 #include <vector>
00005 
00006 using namespace grafalgo;
00007 
00008 int64_t cycCnt(void)
00009 {
00010  uint32_t hi, lo;
00011  uint64_t v64;
00012 
00013  /* rdtsc: Loads the time-stamp counter value into %edx:%eax */
00014  asm volatile ("cpuid; rdtsc; movl %%edx, %0; movl %%eax, %1" /* Read the counter */
00015      : "=r" (hi), "=r" (lo)                  /* output */
00016      :                                       /* input (none) */
00017      : "%edx", "%eax");                      /* Let gcc know whch registers are used */
00018 
00019  v64 = ((uint64_t)hi << 32) + (uint64_t)lo;
00020 
00021  return (int64_t)v64;
00022 }
00023 
00024 int calibrate() {
00025         int result;
00026         for (int i = 1; i <= 5; i++) {
00027                 int64_t cyc0 = cycCnt(); uint32_t t0 = Util::getTime();
00028                 usleep(20000);
00029                 int64_t cyc1 = cycCnt(); uint32_t t1 = Util::getTime();
00030                 cout << (cyc1-cyc0) << " cycles, " << (t1-t0) << " us,"
00031                      << (cyc1-cyc0)/(t1-t0) << " cycles/us\n";
00032                 result = (int) ((cyc1-cyc0)/(t1-t0));
00033         }
00034         return result;
00035 }
00036 
00037 void perfTest(int n, int ticksPerUs) {
00038         int repCnt = 10;
00039         HashMap map(n);
00040         uint64_t keys[n+1];
00041 
00042         cout << "perfTest " << n << endl;
00043 
00044         // fill table with pairs having random keys
00045         int miss = 0;
00046         for (int i = 1; i <= n; ) {
00047                 keys[i] = random(); keys[i] *= random();
00048                 if (map.put(keys[i],1)) i++;
00049                 else miss++;
00050         }
00051         if (miss > 0)
00052                 cout << "put failed " << miss << " times during initial "
00053                         "insert operations\n";
00054 
00055         vector<int> loadDist(20);
00056         int b = map.loadStats(loadDist);
00057         cout << "initial load distribution: ";
00058         for (int i = 0; i < b; i++) cout << loadDist[i] << " ";
00059         cout << endl;
00060                 
00061         // next do random searches on full table, starting with
00062         // keys that are in table, then arbitrary keys (mostly
00063         // not in table)
00064         int cnt = 0; int badCnt = 0;
00065         double minTime = BIGINT; double maxTime = 0; double avgTime = 0;
00066         for (int i = 1; i <= n; i++) {
00067                 int *samples = new int[repCnt+1];
00068                 for (int j = 1; j <= repCnt; j++) {
00069                         samples[j] = randint(1,n);
00070                 }
00071                 int64_t t0 = cycCnt();
00072                 for (int j = 1; j <= repCnt; j++) {
00073                         int k = samples[j];
00074                         map.get(keys[k]);
00075                 }
00076                 int64_t t1 = cycCnt();
00077                 int64_t diff = t1 - t0; 
00078                 if (diff > 2*repCnt*ticksPerUs) { badCnt++; continue; }
00079                 cnt++;
00080                 double insertTime = ((double) diff)/repCnt;
00081                 minTime = min(minTime,insertTime);
00082                 maxTime = max(maxTime,insertTime);
00083                 avgTime += insertTime;
00084                 delete [] samples;
00085         }
00086         avgTime /= cnt;
00087         avgTime *= (1000.0/ticksPerUs);
00088         minTime *= (1000.0/ticksPerUs);
00089         maxTime *= (1000.0/ticksPerUs);
00090         cout << "time for get operations on random keys in table: "
00091              << minTime << " " << avgTime << " " << maxTime << endl;
00092         if (badCnt > 0) cout << badCnt << " rejected samples\n";
00093 
00094         cnt = 0; badCnt = 0; minTime = BIGINT; maxTime = 0; avgTime = 0;
00095         for (int i = 1; i <= n; i++) {
00096                 uint64_t *newKeys = new uint64_t[repCnt+1];
00097                 for (int j = 1; j <= repCnt; j++) {
00098                         newKeys[j] = random(); newKeys[j] *= random();
00099                 }
00100                 int64_t t0 = cycCnt();
00101                 for (int j = 1; j <= repCnt; j++) {
00102                         map.get(newKeys[j]);
00103                 }
00104                 int64_t t1 = cycCnt();
00105                 int64_t diff = t1 - t0;
00106                 if (diff > 2*repCnt*ticksPerUs) { badCnt++; continue; }
00107                 cnt++;
00108                 double insertTime = ((double) diff)/repCnt;
00109                 minTime = min(minTime,insertTime);
00110                 maxTime = max(maxTime,insertTime);
00111                 avgTime += insertTime;
00112                 delete [] newKeys;
00113         }
00114         avgTime /= cnt;
00115         avgTime *= (1000.0/ticksPerUs);
00116         minTime *= (1000.0/ticksPerUs);
00117         maxTime *= (1000.0/ticksPerUs);
00118         cout << "time for get operations on random keys : "
00119              << minTime << " " << avgTime << " " << maxTime << endl;
00120         if (badCnt > 0) cout << badCnt << " rejected samples\n";
00121 
00122         // now, do remove/insert operation pairs
00123         cnt = 0; badCnt = 0; minTime = BIGINT; maxTime = 0; avgTime = 0;
00124         for (int i = 1; i <= n; i++) {
00125                 int *samples = new int[repCnt+1];
00126                 uint64_t *newKeys = new uint64_t[repCnt+1];
00127                 for (int j = 1; j <= repCnt; j++) {
00128                         samples[j] = randint(1,n);
00129                         newKeys[j] = random(); newKeys[j] *= random();
00130                 }
00131                 int64_t t0 = cycCnt();
00132                 for (int j = 1; j <= repCnt; j++) {
00133                         int k = samples[j];
00134                         map.remove(keys[k]);
00135                         map.put(newKeys[j],1);
00136                         keys[k] = newKeys[j];
00137                 }
00138                 int64_t t1= cycCnt();
00139                 int64_t diff = t1 - t0;
00140                 if (diff > 4*repCnt*ticksPerUs) { badCnt++; continue; } 
00141                 cnt++;
00142                 double insertTime = ((double) diff)/repCnt;
00143                 minTime = min(minTime,insertTime);
00144                 maxTime = max(maxTime,insertTime);
00145                 avgTime += insertTime;
00146                 delete [] samples;
00147                 delete [] newKeys;
00148         }
00149         b = map.loadStats(loadDist);
00150         cout << "final load distribution: ";
00151         for (int i = 0; i < b; i++) cout << loadDist[i] << " ";
00152         cout << endl;
00153 
00154         avgTime /= cnt;
00155         avgTime *= (1000.0/ticksPerUs);
00156         minTime *= (1000.0/ticksPerUs);
00157         maxTime *= (1000.0/ticksPerUs);
00158         cout << "time for random remove/put operations: "
00159              << minTime << " " << avgTime << " " << maxTime << endl;
00160         if (badCnt > 0) cout << badCnt << " rejected samples\n";
00161 }
00162 
00163 int main() {
00164         int ticksPerUs = calibrate();
00165         perfTest(1 << 8,ticksPerUs);
00166         perfTest(1 << 9,ticksPerUs);
00167         perfTest(1 << 10,ticksPerUs);
00168         perfTest(1 << 11,ticksPerUs);
00169         perfTest(1 << 12,ticksPerUs);
00170         perfTest(1 << 13,ticksPerUs);
00171         perfTest(1 << 14,ticksPerUs);
00172         perfTest(1 << 15,ticksPerUs);
00173         perfTest(1 << 16,ticksPerUs);
00174         perfTest(1 << 17,ticksPerUs);
00175         perfTest(1 << 18,ticksPerUs);
00176         perfTest(1 << 19,ticksPerUs);
00177         perfTest((1 << 20)-1,ticksPerUs);
00178 }
 All Classes Files Functions Variables Typedefs Friends