/* ----------------------------------------------------------------- Software implementation of a 1024-bit hash. A possible candidate for the NIST SHA-3 cryptographic hash competition. Bits are arranged in an 8x8 grid of 16-bit chunks. A round consists of applying a 16-bit permutation to each chunk, then rotating each of the 16 bit positions by different amounts through the 64 chunks. The rotates are chosen so 2 bits each are shifted up by 0..7 and left by 0..7. The nonlinear mix of 16-bit chunks is actually an 8-bit permutation applied to bits 0..7 and again to 8..15, with the 16 bits shuffled at the end. This is 5 NAND-gates deep. 6 of the 16 bits implement a^b^c^d, and can be inverted and still be implemented 5 NAND-gates deep. Symmetry is broken by inverting different subsets of these 6 bits in each of the 64 chunks. Symmetry breaking is done by XORing constants. The 64 16-bit permutations are actually 128 8-bit permutations. All 128 8-bit permutations could be done at once with SSE instructions, but I'm only using 64-bit registers here. This isn't optimized much. It takes 8 rounds forward or 7 rounds backwards for all bits to be affected by all one-bit deltas for nearly-zero states. (7 forward or 6 backwards for random states.) I'll take a leap of faith and say that twice forward + backward avalanche, 30 rounds that is, is enough to thwart cryptanalysis, providing there are no self-referential differentials. So there have to be 30 rounds in between any two things the user can control. I plan to do that by combining a block every 3 rounds, but combining every block 10 times, 6 rounds apart. Every combine will be the XOR of 10 blocks (shifted by 0,1,3,6,10,15,5,12,4,13) plus a key. Back of the envelope says it'd be slightly faster if I went to using each block 15 times with 2 rounds between combines, but it was close, and those blocks aren't in registers, so I stuck with 10. (How come (i choose 2)%16 is a permutation?) (by Bob Jenkins, July 25 2008, ho ho ho, public domain) ----------------------------------------------------------------- */ #include #include #include typedef unsigned long u4; typedef unsigned long long u8; #define BLOCKS 20 /* blocks kept in memory at once */ #define LEN 16 /* number of 8-byte values per block */ #define ROWS 8 #define COLS 8 #define ROUNDS 20000 /* do one 8-bit permutation */ static void eight( u8 *x, u8 *y) { u8 a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16; a1 = x[1]; a7 = x[7]; a15 = a1 ^ a7; a0 = x[0]; a3 = x[3]; a5 = x[5]; y[4] = ((a15 & ((a0 & ~a5) | (a3 & ~a0))) | ~(a15 | (a0 ^ a5))); a12 = a3 ^ a5; a13 = a12 ^ a0; y[1] = a13 ^ a1; a2 = x[2]; y[7] = a13 ^ a2; a4 = x[4]; a16 = a2 ^ a4; y[3] = a12 ^ a16; a6 = x[6]; a11 = a1 & a6; a8 = a1 ^ a6; y[6] = ((a8 & ((a2 & ~a4) | (a7 & ~(a1 & a4)))) | ~(a8 | ((a2 | a7) & ~(a2 & a4)))); a14 = a0 ^ a4; y[2] = ((a14 & a8) | ~(a14 | ((a1 | a7) & ~a11))); a9 = ~a7; a10 = a1 & a7; y[5] = ((a16 & a15) | (~a16 & (a10 | (a6 & a9)))); y[0] = ((a8 & ((a2 & a9) | ~(a10 | a4))) | ((a11 | ~(a1 | (a4 & a6))) & ((a2 & a7) | (a4 & ~a2)))); } static int shift[LEN] = {56,43,22,41,15,52,28,18,38,1,48,35,13,2,31,61}; static int map[LEN] = {2,3,4,7,9,12,14,15,0,5,1,8,6,13,10,11}; /* one permutation of the internal state */ static void perm(u8 x[LEN]) { int i; u8 y[LEN]; /* do two identical 8-bit permutations to all 64 blocks */ eight(x, y); eight(&x[LEN/2], &y[LEN/2]); /* break symmetry among the 64 16-bit permutations */ y[1] ^= 0x18f8aa72369b75c2LL; y[3] ^= 0x337b824aab77201fLL; y[7] ^= 0x60bd51315e37b49cLL; y[9] ^= 0x82ed31eb138e02efLL; y[11] ^= 0x5fe101ed66fc3130LL; y[15] ^= 0x1019906dca58dffbLL; /* shuffle and rotate the output bits */ x[ 0] = (y[8] << 56) | (y[8] >> 8); x[ 1] = (y[10]<< 43) | (y[10]>> 21); x[ 2] = (y[0] << 22) | (y[0] >> 42); x[ 3] = (y[1] << 41) | (y[1] >> 23); x[ 4] = (y[2] << 15) | (y[2] >> 49); x[ 5] = (y[9] << 52) | (y[9] >> 12); x[ 6] = (y[12]<< 28) | (y[12]>> 36); x[ 7] = (y[3] << 18) | (y[3] >> 46); x[ 8] = (y[11]<< 38) | (y[11]>> 26); x[ 9] = (y[4] << 1) | (y[4] >> 63); x[10] = (y[14]<< 48) | (y[14]>> 16); x[11] = (y[15]<< 35) | (y[15]>> 29); x[12] = (y[5] << 13) | (y[5] >> 51); x[13] = (y[13]<< 2) | (y[13]>> 62); x[14] = (y[6] << 31) | (y[6] >> 33); x[15] = (y[7] << 61) | (y[7] >> 3); } static int unperm[256]; /* the inverse of eight(x,y) */ static void uneight(u8 *y, u8 *x) { int i, j; for (i=0; i<8; ++i) { x[i] = 0; } for (i=0; i<64; ++i) { int z = 0; for (j=0; j<8; ++j) { if (y[j] & (((u8)1)<> shift[i]) ^ (x[i] << (64-shift[i])); } /* unshuffle the output bits */ for (i=0; i>(32-k))) u4 ranval( ranctx *x ) { u4 e = x->a - rot(x->b, 27); x->a = x->b ^ rot(x->c, 17); x->b = x->c + x->d; x->c = x->d + e; x->d = e + x->a; return e; } void show(u8 x[LEN]) { int i,j,k; for (i=0; i=4; ) { printf(" %s", (x[k] & (((u8)1)<<(COLS*i+j))) ? "1" : "0"); } } printf("\n"); for (j=0; j=12; ) { printf(" %s", (x[k] & (((u8)1)<<(COLS*i+j))) ? "1" : "0"); } } printf("\n"); } printf("\n"); } void showx(float x, float *worst, int verbose) { x = x/ROUNDS; if (x < *worst) *worst = x; if (verbose) printf("%3d", (int)(100*x + 0.5)); if (1.0-x < *worst) *worst = 1.0-x; } float showc(float c[ROWS][COLS][LEN], int verbose) { int i,j,k; float worst = 1.0; for (i=0; i=4; ) { showx(c[i][j][k], &worst, verbose); } if (verbose) printf(" "); } if (verbose) printf("\n"); for (j=0; j=12; ) { showx(c[i][j][k], &worst, verbose); } if (verbose) printf(" "); } if (verbose) printf("\n\n"); } /* printf(" worst = %f\n", worst); */ return worst; } void test(ranctx *rctx) { u8 x[LEN], y[LEN]; u4 m,n,o,p,count,i,j,k; float val, worst = 1.0; float zcount = 0.0; /* check that perm() is reversible and iperm() is its reverse */ for (i=0; i>32), (u4)val[i]); } printf("\n"); for (i=0; i>32), (u4)val[i]); } }