diff --git a/bmw_large.c b/bmw_large.c index 454c81a..102f2ec 100644 --- a/bmw_large.c +++ b/bmw_large.c @@ -164,13 +164,25 @@ uint64_t bmw_large_r7(uint64_t x){ r = ROTR64(x, 64-53); return r; } - -#define K 0x0555555555555555LL +/* +#define K 0x0555555555555555LL +#define MASK 0xFFFFFFFFFFFFFFFFLL static uint64_t k_lut[] PROGMEM = { - 16LL*K, 17LL*K, 18LL*K, 19LL*K, 20LL*K, 21LL*K, 22LL*K, 23LL*K, - 24LL*K, 25LL*K, 26LL*K, 27LL*K, 28LL*K, 29LL*K, 30LL*K, 31LL*K }; - + 16LL*K, 17LL*K, 18LL*K, 19LL*K, + 20LL*K, 21LL*K, 22LL*K, 23LL*K, + 24LL*K, 25LL*K, 26LL*K, 27LL*K, + 28LL*K, 29LL*K, 30LL*K, 31LL*K }; +*/ +/* the same as above but precomputed to avoid compiler warnings */ +static +uint64_t k_lut[] PROGMEM = { + 0x5555555555555550LL, 0x5aaaaaaaaaaaaaa5LL, 0x5ffffffffffffffaLL, + 0x655555555555554fLL, 0x6aaaaaaaaaaaaaa4LL, 0x6ffffffffffffff9LL, + 0x755555555555554eLL, 0x7aaaaaaaaaaaaaa3LL, 0x7ffffffffffffff8LL, + 0x855555555555554dLL, 0x8aaaaaaaaaaaaaa2LL, 0x8ffffffffffffff7LL, + 0x955555555555554cLL, 0x9aaaaaaaaaaaaaa1LL, 0x9ffffffffffffff6LL, + 0xa55555555555554bLL }; uint64_t bmw_large_expand1(uint8_t j, const uint64_t* q, const void* m){ uint64_t(*s[])(uint64_t) = {bmw_large_s1, bmw_large_s2, bmw_large_s3, bmw_large_s0}; @@ -239,27 +251,29 @@ uint8_t f0_lut[] PROGMEM ={ }; void bmw_large_f0(uint64_t* q, uint64_t* h, const void* m){ - uint8_t i,j=0,v,sign,l=4; + uint8_t i,j=-1,v,sign,l=0; uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2, bmw_large_s3, bmw_large_s4 }; for(i=0; i<16; ++i){ h[i] ^= ((uint64_t*)m)[i]; } dump_x(h, 16, 'T'); - memset(q, 0, 4*16); +// memset(q, 0, 4*16); for(i=0; i<5*16; ++i){ v = pgm_read_byte(f0_lut+i); sign = v&1; v >>=1; + if(i==l){ + j++; + l+=5; + q[j] = h[v]; + continue; + } if(sign){ q[j] -= h[v]; }else{ q[j] += h[v]; } - if(i==l){ - j++; - l+=5; - } } dump_x(q, 16, 'W'); for(i=0; i<16; ++i){ diff --git a/bmw_small.c b/bmw_small.c index 006f1e0..0806f61 100644 --- a/bmw_small.c +++ b/bmw_small.c @@ -166,13 +166,25 @@ uint32_t bmw_small_r7(uint32_t x){ r = ROTR32(x, 5); return r; } - +/* #define K 0x05555555L static uint32_t k_lut[] PROGMEM = { 16L*K, 17L*K, 18L*K, 19L*K, 20L*K, 21L*K, 22L*K, 23L*K, 24L*K, 25L*K, 26L*K, 27L*K, 28L*K, 29L*K, 30L*K, 31L*K }; +*/ +/* same as above but precomputed to avoid compiler warnings */ + +static +uint32_t k_lut[] PROGMEM = { + 0x55555550L, 0x5aaaaaa5L, 0x5ffffffaL, + 0x6555554fL, 0x6aaaaaa4L, 0x6ffffff9L, + 0x7555554eL, 0x7aaaaaa3L, 0x7ffffff8L, + 0x8555554dL, 0x8aaaaaa2L, 0x8ffffff7L, + 0x9555554cL, 0x9aaaaaa1L, 0x9ffffff6L, + 0xa555554bL }; + uint32_t bmw_small_expand1(uint8_t j, const uint32_t* q, const void* m){ uint32_t(*s[])(uint32_t) = {bmw_small_s1, bmw_small_s2, bmw_small_s3, bmw_small_s0}; @@ -232,27 +244,29 @@ uint8_t f0_lut[] PROGMEM = { }; void bmw_small_f0(uint32_t* q, uint32_t* h, const void* m){ - uint8_t i,j=0,v,sign,l=4; + uint8_t i,j=-1,v,sign,l=0; uint32_t(*s[])(uint32_t)={ bmw_small_s0, bmw_small_s1, bmw_small_s2, bmw_small_s3, bmw_small_s4 }; for(i=0; i<16; ++i){ h[i] ^= ((uint32_t*)m)[i]; } dump_x(h, 16, 'T'); - memset(q, 0, 4*16); + // memset(q, 0, 4*16); for(i=0; i<5*16; ++i){ v = pgm_read_byte(f0_lut+i); sign = v&1; v >>=1; + if(i==l){ + j++; + l+=5; + q[j] = h[v]; + continue; + } if(sign){ q[j] -= h[v]; }else{ q[j] += h[v]; } - if(i==l){ - j++; - l+=5; - } } dump_x(q, 16, 'W'); for(i=0; i<16; ++i){