2017-03-25 20:12:56 +00:00
|
|
|
/// Taken from SMHasher.
|
2014-05-21 02:19:44 +00:00
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// Flipping a single bit of a key should cause an "avalanche" of changes in
|
|
|
|
// the hash function's output. Ideally, each output bits should flip 50% of
|
|
|
|
// the time - if the probability of an output bit flipping is not 50%, that bit
|
|
|
|
// is "biased". Too much bias means that patterns applied to the input will
|
|
|
|
// cause "echoes" of the patterns in the output, which in turn can cause the
|
|
|
|
// hash function to fail to create an even, random distribution of hash values.
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "Random.h"
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include <math.h>
|
2018-05-07 02:06:00 +00:00
|
|
|
#include <stdio.h>
|
2014-05-21 02:19:44 +00:00
|
|
|
|
|
|
|
// Avalanche fails if a bit is biased by more than 1%
|
|
|
|
|
|
|
|
#define AVALANCHE_FAIL 0.01
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
double maxBias(std::vector<int> & counts, int reps);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
typedef void (*pfHash)(const void * blob, const int len, const uint32_t seed, void * out);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
inline uint32_t getbit(const void * block, int len, uint32_t bit)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
uint8_t * b = reinterpret_cast<uint8_t *>(const_cast<void *>(block));
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
int byte = bit >> 3;
|
|
|
|
bit = bit & 0x7;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
if (byte < len)
|
|
|
|
return (b[byte] >> bit) & 1;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
return 0;
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
template <typename T>
|
|
|
|
inline uint32_t getbit(T & blob, uint32_t bit)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
return getbit(&blob, sizeof(blob), bit);
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
inline void flipbit(void * block, int len, uint32_t bit)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
uint8_t * b = reinterpret_cast<uint8_t *>(block);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
int byte = bit >> 3;
|
|
|
|
bit = bit & 0x7;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
if (byte < len)
|
|
|
|
b[byte] ^= (1 << bit);
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
template <typename T>
|
|
|
|
inline void flipbit(T & blob, uint32_t bit)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
flipbit(&blob, sizeof(blob), bit);
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
template <typename keytype, typename hashtype>
|
|
|
|
void calcBias(pfHash hash, std::vector<int> & counts, int reps, Rand & r)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
const int keybytes = sizeof(keytype);
|
|
|
|
const int hashbytes = sizeof(hashtype);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
const int keybits = keybytes * 8;
|
|
|
|
const int hashbits = hashbytes * 8;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
keytype K;
|
|
|
|
hashtype A, B;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int irep = 0; irep < reps; irep++)
|
|
|
|
{
|
|
|
|
if (irep % (reps / 10) == 0)
|
|
|
|
printf(".");
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
r.rand_p(&K, keybytes);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
hash(&K, keybytes, 0, &A);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-09-02 03:00:04 +00:00
|
|
|
int * cursor = counts.data();
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int iBit = 0; iBit < keybits; iBit++)
|
|
|
|
{
|
|
|
|
flipbit(&K, keybytes, iBit);
|
|
|
|
hash(&K, keybytes, 0, &B);
|
|
|
|
flipbit(&K, keybytes, iBit);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int iOut = 0; iOut < hashbits; iOut++)
|
|
|
|
{
|
|
|
|
int bitA = getbit(&A, hashbytes, iOut);
|
|
|
|
int bitB = getbit(&B, hashbytes, iOut);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
(*cursor++) += (bitA ^ bitB);
|
|
|
|
}
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
template <typename keytype, typename hashtype>
|
|
|
|
bool AvalancheTest(pfHash hash, const int reps)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
Rand r(48273);
|
2017-03-31 16:00:30 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
const int keybytes = sizeof(keytype);
|
|
|
|
const int hashbytes = sizeof(hashtype);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
const int keybits = keybytes * 8;
|
|
|
|
const int hashbits = hashbytes * 8;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps", keybits, hashbits, reps);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
//----------
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
std::vector<int> bins(keybits * hashbits, 0);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
calcBias<keytype, hashtype>(hash, bins, reps, r);
|
2017-03-31 16:00:30 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
//----------
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
bool result = true;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
double b = maxBias(bins, reps);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
printf(" worst bias is %f%%", b * 100.0);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
if (b > AVALANCHE_FAIL)
|
|
|
|
{
|
|
|
|
printf(" !!!!! ");
|
|
|
|
result = false;
|
|
|
|
}
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
printf("\n");
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
return result;
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// BIC test variant - store all intermediate data in a table, draw diagram
|
|
|
|
// afterwards (much faster)
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
template <typename keytype, typename hashtype>
|
|
|
|
void BicTest3(pfHash hash, const int reps, bool verbose = true)
|
2014-05-21 02:19:44 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
const int keybytes = sizeof(keytype);
|
|
|
|
const int keybits = keybytes * 8;
|
|
|
|
const int hashbytes = sizeof(hashtype);
|
|
|
|
const int hashbits = hashbytes * 8;
|
|
|
|
const int pagesize = hashbits * hashbits * 4;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
Rand r(11938);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
double maxBias = 0;
|
|
|
|
int maxK = 0;
|
|
|
|
int maxA = 0;
|
|
|
|
int maxB = 0;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
keytype key;
|
|
|
|
hashtype h1, h2;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
std::vector<int> bins(keybits * pagesize, 0);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int keybit = 0; keybit < keybits; keybit++)
|
|
|
|
{
|
|
|
|
if (keybit % (keybits / 10) == 0)
|
|
|
|
printf(".");
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
int * page = &bins[keybit * pagesize];
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int irep = 0; irep < reps; irep++)
|
|
|
|
{
|
|
|
|
r.rand_p(&key, keybytes);
|
|
|
|
hash(&key, keybytes, 0, &h1);
|
|
|
|
flipbit(key, keybit);
|
|
|
|
hash(&key, keybytes, 0, &h2);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
hashtype d = h1 ^ h2;
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int out1 = 0; out1 < hashbits - 1; out1++)
|
|
|
|
for (int out2 = out1 + 1; out2 < hashbits; out2++)
|
|
|
|
{
|
|
|
|
int * b = &page[(out1 * hashbits + out2) * 4];
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
uint32_t x = getbit(d, out1) | (getbit(d, out2) << 1);
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
b[x]++;
|
|
|
|
}
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
printf("\n");
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int out1 = 0; out1 < hashbits - 1; out1++)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int out2 = out1 + 1; out2 < hashbits; out2++)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
if (verbose)
|
|
|
|
printf("(%3d,%3d) - ", out1, out2);
|
|
|
|
|
|
|
|
for (int keybit = 0; keybit < keybits; keybit++)
|
|
|
|
{
|
|
|
|
int * page = &bins[keybit * pagesize];
|
2018-08-26 02:17:18 +00:00
|
|
|
int * bins_in_page = &page[(out1 * hashbits + out2) * 4];
|
2018-05-07 02:06:00 +00:00
|
|
|
|
|
|
|
double bias = 0;
|
|
|
|
|
|
|
|
for (int b = 0; b < 4; b++)
|
|
|
|
{
|
2018-08-26 02:17:18 +00:00
|
|
|
double b2 = static_cast<double>(bins_in_page[b]) / static_cast<double>(reps / 2);
|
2018-05-07 02:06:00 +00:00
|
|
|
b2 = fabs(b2 * 2 - 1);
|
|
|
|
|
|
|
|
if (b2 > bias)
|
|
|
|
bias = b2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bias > maxBias)
|
|
|
|
{
|
|
|
|
maxBias = bias;
|
|
|
|
maxK = keybit;
|
|
|
|
maxA = out1;
|
|
|
|
maxB = out2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (verbose)
|
|
|
|
{
|
|
|
|
if (bias < 0.01)
|
|
|
|
printf(".");
|
|
|
|
else if (bias < 0.05)
|
|
|
|
printf("o");
|
|
|
|
else if (bias < 0.33)
|
|
|
|
printf("O");
|
|
|
|
else
|
|
|
|
printf("X");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finished keybit
|
|
|
|
|
|
|
|
if (verbose)
|
|
|
|
printf("\n");
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
if (verbose)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-05-07 02:06:00 +00:00
|
|
|
for (int i = 0; i < keybits + 12; i++)
|
|
|
|
printf("-");
|
|
|
|
printf("\n");
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
2014-05-21 02:19:44 +00:00
|
|
|
|
2018-05-07 02:06:00 +00:00
|
|
|
printf("Max bias %f - (%3d : %3d,%3d)\n", maxBias, maxK, maxA, maxB);
|
2014-05-21 02:19:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|