/* * Given a subsequence of n 64-bit random numbers, compute the number * of bits set in each term, reduce that to low, medium or high number * of bits, and concatenate a bunch of those 3-item buckets. * Do this for len overlapping n-value sequences. And report the chi-square * measure of the results compared to the ideal distribution. */ #include #include #include #include #include #include #include #define MAXBITS 64 #define LOGBUCKETS 2 #define BUCKETS (1ULL<>(8*sizeof(d)-(lrot)))) class Random { public: Random(uint64_t seed1=0, uint64_t seed2=0) { m_a = seed1; m_b = seed2; m_count = -10ULL; for (int i=0; i<10; i++) (void)Next(); } uint64_t Next() { uint64_t temp = m_a + m_count++; m_a = m_b + ROTL(temp, 12); return m_b = 0x581af43eb71d8b3 * temp ^ ROTL(m_a, 28); } uint64_t Prev() { uint64_t temp = 0x6cc3621b095c967b * (m_b ^ ROTL(m_a, 28)); m_b = m_a - ROTL(temp, 12); m_a = temp - --m_count; return m_b; } uint64_t m_a, m_b, m_count, m_c; }; /* count how many bits are set in a 64-bit integer, returns 0..64 */ static uint64_t count(uint64_t x) { uint64_t c = x; if (GRAY_CODE) c = c^(c>>1); c = (c & 0x5555555555555555) + ((c>>1 ) & 0x5555555555555555); c = (c & 0x3333333333333333) + ((c>>2 ) & 0x3333333333333333); c = (c & 0x0f0f0f0f0f0f0f0f) + ((c>>4 ) & 0x0f0f0f0f0f0f0f0f); c = (c & 0x00ff00ff00ff00ff) + ((c>>8 ) & 0x00ff00ff00ff00ff); c = (c & 0x0000ffff0000ffff) + ((c>>16) & 0x0000ffff0000ffff); c = (c & 0x00000000ffffffff) + ((c>>32) & 0x00000000ffffffff); return c; } /* somehow covert [0,8] into 0..BUCKETS-1 */ static uint64_t ftab[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, }; static void datainit( uint64_t *data, uint64_t terms) { for (uint64_t i=0; i<(1ULL<<(LOGBUCKETS*terms)); ++i) { data[i] = 0; } } /* gather statistics on len overlapping subsequences of "terms" values each */ static void gather( Random *r, uint64_t *data, uint64_t len, uint64_t terms) { uint64_t val = 0; uint64_t mask = (1ULL<<(LOGBUCKETS*terms))-1; for (uint64_t i=0; iNext())]; } for (uint64_t i=0; iNext())]; ++data[val]; } } /* figure out the probability of 0..BUCKETS-1=ftab[count(uint64_t)] */ static void probinit( double *pc) { for (uint64_t i=0; i>= LOGBUCKETS; } /* calculate the variance for this bucket */ if (expect < 5.0) { // normal expectother += expect; countother += data[i]; } else { // too big ++buckets; double temp = (double)data[i] - expect; temp = temp*temp/expect; if (temp > 30.0) { k = i; for (uint64_t j=0; j>= LOGBUCKETS; } printf("%14.4f %14.4f %14.4f\n", (float)temp,(float)expect,(float)data[i]); } var += temp; } } /* lump all the others into one bucket */ if (expectother > 5.0) { ++buckets; double temp = (double)countother - expectother; temp = temp*temp/expectother; if (temp > 30.0) { printf("other %14.4f %14.4f %14.4f\n", (float)temp,(float)expectother,(float)countother); } var += temp; } --buckets; /* calculate the total variance and chi-square measure */ printf("expected variance: %11.4f got: %11.4f chi-square: %6.4f\n", (float)buckets, (float)var, (float)((var-buckets)/sqrt((float)buckets))); } int main( int argc, const char **argv) { uint64_t len; uint64_t terms; if (argc == 3) { uint64_t loglen; sscanf(argv[1], "%ld", &loglen); printf("sequence length: 2^^%lu\n", loglen); len = 1ULL< BUCKETS) { fprintf(stderr, "ftab[%d]=%d needs a bigger LOGBUCKETS\n", i, ftab[i]); return 1; } } datainit(data, terms); uint64_t a = GetTickCount(); Random r(0); gather(&r, data, len, terms); uint64_t z = GetTickCount(); printf("milliseconds: %lu\n", (z-a)); chi(data, len, terms); free(data); return 0; }