small optimizations to sha2 / sha256

This commit is contained in:
bg nerilex 2014-12-30 14:07:01 +01:00
parent a49bff0095
commit 8d9f80e5ae
5 changed files with 145 additions and 123 deletions

1
.bzrignore Normal file
View File

@ -0,0 +1 @@
.svn

View File

@ -53,8 +53,8 @@ uint64_t change_endian64(uint64_t x){
uint8_t i=8;
do{
r <<= 8;
r |= 0xff&x;
x >>=8;
r |= (uint8_t)x;
x >>= 8;
}while(--i);
return r;
}
@ -63,12 +63,12 @@ uint64_t change_endian64(uint64_t x){
static const
uint64_t rotr64(uint64_t x, uint8_t n){
return (x>>n)|(x<<(64-n));
return (x >> n) | (x << (64 - n));
}
static const
uint64_t rotl64(uint64_t x, uint8_t n){
return (x<<n)|(x>>(64-n));
return (x << n) | (x >> (64 - n));
}
static const
@ -86,8 +86,8 @@ uint64_t pgm_read_uint64_t_P(const uint64_t * p){
#define MAJ(x,y,z) (((x)&(y))^((x)&(z))^((y)&(z)))
#define SIGMA_0(x) (rotr64((x), 28) ^ rotl64((x), 30) ^ rotl64((x), 25))
#define SIGMA_1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotl64((x), 23))
#define SIGMA_a(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x)>>7))
#define SIGMA_b(x) (rotr64((x), 19) ^ rotl64((x), 3) ^ ((x)>>6))
#define SIGMA_a(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7))
#define SIGMA_b(x) (rotr64((x), 19) ^ rotl64((x), 3) ^ ((x) >> 6))
void sha2_large_common_nextBlock(sha2_large_common_ctx_t *ctx, const void *block){
uint64_t w[16], wx;
@ -95,23 +95,23 @@ void sha2_large_common_nextBlock(sha2_large_common_ctx_t *ctx, const void *block
uint64_t t1, t2;
const uint64_t *k=sha2_large_common_const;
uint8_t i;
i=16;
do{
w[16-i] = change_endian64(*((const uint64_t*)block));
i = 16;
do {
w[16 - i] = change_endian64(*((const uint64_t*)block));
block = (uint8_t*)block + 8;
}while(--i);
memcpy(a, ctx->h, 8*8);
for(i=0; i<80; ++i){
if(i<16){
wx=w[i];
} while(--i);
memcpy(a, ctx->h, 8 * 8);
for(i = 0; i < 80; ++i){
if(i < 16){
wx = w[i];
}else{
wx = SIGMA_b(w[14]) + w[9] + SIGMA_a(w[1]) + w[0];
memmove(&(w[0]), &(w[1]), 15*8);
memmove(&(w[0]), &(w[1]), 15 * 8);
w[15] = wx;
}
t1 = a[7] + SIGMA_1(a[4]) + CH(a[4], a[5], a[6]) + pgm_read_uint64_t_P(k++) + wx;
t2 = SIGMA_0(a[0]) + MAJ(a[0], a[1], a[2]);
memmove(&(a[1]), &(a[0]), 7*8);
memmove(&(a[1]), &(a[0]), 7 * 8);
a[0] = t1 + t2;
a[4] += t1;
}

View File

@ -22,6 +22,8 @@
#include <avr/pgmspace.h>
#include "sha2_small_common.h"
#include <stdio.h>
#include <inttypes.h>
#define LITTLE_ENDIAN
@ -29,34 +31,36 @@
* rotate x right by n positions
*/
static
uint32_t rotr32( uint32_t x, uint8_t n){
return ((x>>n) | (x<<(32-n)));
uint32_t rotr32(uint32_t x, uint8_t n){
return ((x >> n) | (x << (32 - n)));
}
static
uint32_t rotl32( uint32_t x, uint8_t n){
return ((x<<n) | (x>>(32-n)));
uint32_t rotl32(uint32_t x, uint8_t n){
return ((x << n) | (x >> (32 - n)));
}
/*************************************************************************/
// #define CHANGE_ENDIAN32(x) (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8))
static
uint32_t change_endian32(uint32_t x){
return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
return (((x) << 24) |
((x) >> 24) |
(((x) & 0x0000ff00) << 8) |
(((x) & 0x00ff0000) >> 8));
}
/* sha256 functions as macros for speed and size, cause they are called only once */
#define CH(x,y,z) (((x)&(y)) ^ ((~(x))&(z)))
#define MAJ(x,y,z) (((x)&(y)) ^ ((x)&(z)) ^ ((y)&(z)))
#define CH(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define SIGMA_0(x) (rotr32((x), 2) ^ rotr32((x),13) ^ rotl32((x),10))
#define SIGMA_1(x) (rotr32((x), 6) ^ rotr32((x),11) ^ rotl32((x),7))
#define SIGMA_a(x) (rotr32((x), 7) ^ rotl32((x),14) ^ ((x)>>3))
#define SIGMA_b(x) (rotl32((x),15) ^ rotl32((x),13) ^ ((x)>>10))
#define SIGMA_0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotl32((x), 10))
#define SIGMA_1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotl32((x), 7))
#define SIGMA_a(x) (rotr32((x), 7) ^ rotl32((x), 14) ^ ((x) >> 3))
#define SIGMA_b(x) (rotl32((x), 15) ^ rotl32((x), 13) ^ ((x) >> 10))
const
uint32_t k[] PROGMEM = {
@ -88,32 +92,35 @@ void sha2_small_common_nextBlock (sha2_small_common_ctx_t *state, const void *bl
#elif defined BIG_ENDIAN
memcpy((void*)w, block, 64);
#endif
/*
for (i=16; i<64; ++i){
w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
}
*/
/* init working variables */
memcpy((void*)a,(void*)(state->h), 8*4);
memcpy(&a[0], &state->h[0], sizeof(a));
/* do the, fun stuff, */
for (i=0; i<64; ++i){
if(i<16){
for (i = 0; i < 64; ++i){
if(i < 16){
wx = w[i];
}else{
wx = SIGMA_b(w[14]) + w[9] + SIGMA_a(w[1]) + w[0];
memmove(&(w[0]), &(w[1]), 15*4);
w[15] = wx;
wx = SIGMA_b(w[14])
+ w[9]
+ SIGMA_a(w[1])
+ w[0];
memmove(&w[0], &w[1], sizeof(w) - sizeof(w[0]));
w[15] = wx;
}
t1 = a[7] + SIGMA_1(a[4]) + CH(a[4],a[5],a[6]) + pgm_read_dword(&k[i]) + wx;
t2 = SIGMA_0(a[0]) + MAJ(a[0],a[1],a[2]);
memmove(&(a[1]), &(a[0]), 7*4); /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
t1 = a[7]
+ SIGMA_1(a[4])
+ CH(a[4], a[5], a[6])
+ pgm_read_dword(&k[i]) + wx;
t2 = SIGMA_0(a[0])
+ MAJ(a[0], a[1], a[2]);
t2 += t1;
memmove(&a[1], &a[0], sizeof(a) - sizeof(a[0]));
a[4] += t1;
a[0] = t1 + t2;
a[0] = t2;
}
/* update, the, state, */
for (i=0; i<8; ++i){
for (i = 0; i < 8; ++i){
state->h[i] += a[i];
}
state->length += 1;
@ -123,19 +130,19 @@ void sha2_small_common_nextBlock (sha2_small_common_ctx_t *state, const void *bl
void sha2_small_common_lastBlock(sha2_small_common_ctx_t *state, const void *block, uint16_t length_b){
uint8_t lb[512/8]; /* local block */
uint64_t len;
while(length_b>=512){
while(length_b >= 512){
sha2_small_common_nextBlock(state, block);
length_b -= 512;
block = (uint8_t*)block+64;
block = (uint8_t*)block + 64;
}
len = state->length*512 + length_b;
len = state->length * 512 + length_b;
memset(lb, 0, 64);
memcpy(lb, block, (length_b+7)/8);
memcpy(lb, block, (length_b + 7) / 8);
/* set the final one bit */
lb[length_b/8] |= 0x80>>(length_b & 0x7);
lb[length_b / 8] |= 0x80 >> (length_b & 0x7);
/* pad with zeros */
if (length_b>=512-64){ /* not enouth space for 64bit length value */
if (length_b >= 512 - 64){ /* not enough space for 64bit length value */
sha2_small_common_nextBlock(state, lb);
memset(lb, 0, 64);
}
@ -143,10 +150,10 @@ void sha2_small_common_lastBlock(sha2_small_common_ctx_t *state, const void *blo
#if defined LITTLE_ENDIAN
/* this is now rolled up */
uint8_t i;
i=7;
do{
lb[63-i] = ((uint8_t*)&len)[i];
}while(i--);
i = 7;
do {
lb[63 - i] = ((uint8_t*)&len)[i];
} while(i--);
#elif defined BIG_ENDIAN
*((uint64_t)&(lb[56])) = len;
#endif

View File

@ -31,6 +31,7 @@
#include <stdint.h>
#include <string.h> /* for memcpy, memmove, memset */
#include <avr/pgmspace.h>
#include "sha256.h"
#define LITTLE_ENDIAN
@ -44,9 +45,9 @@
/*************************************************************************/
uint32_t sha256_init_vector[]={
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
static const uint32_t sha256_init_vector[] PROGMEM = {
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL };
/*************************************************************************/
@ -58,8 +59,8 @@ uint32_t sha256_init_vector[]={
* @return none
*/
void sha256_init(sha256_ctx_t *state){
state->length=0;
memcpy(state->h, sha256_init_vector, 8*4);
state->length = 0;
memcpy_P(state->h, sha256_init_vector, 8 * 4);
}
/*************************************************************************/
@ -68,7 +69,7 @@ void sha256_init(sha256_ctx_t *state){
* rotate x right by n positions
*/
uint32_t rotr32( uint32_t x, uint8_t n){
return ((x>>n) | (x<<(32-n)));
return ((x >> n) | (x << (32 - n)));
}
@ -77,7 +78,10 @@ uint32_t rotr32( uint32_t x, uint8_t n){
// #define CHANGE_ENDIAN32(x) (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8))
uint32_t change_endian32(uint32_t x){
return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
return ( ((x) << 24)
| ((x) >> 24)
| (((x) & 0x0000ff00UL) << 8)
| (((x) & 0x00ff0000UL) >> 8) );
}
@ -85,66 +89,73 @@ uint32_t change_endian32(uint32_t x){
/* sha256 functions as macros for speed and size, cause they are called only once */
#define CH(x,y,z) (((x)&(y)) ^ ((~(x))&(z)))
#define MAJ(x,y,z) (((x)&(y)) ^ ((x)&(z)) ^ ((y)&(z)))
#define CH(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define SIGMA0(x) (rotr32((x),2) ^ rotr32((x),13) ^ rotr32((x),22))
#define SIGMA1(x) (rotr32((x),6) ^ rotr32((x),11) ^ rotr32((x),25))
#define SIGMA_a(x) (rotr32((x),7) ^ rotr32((x),18) ^ ((x)>>3))
#define SIGMA_b(x) (rotr32((x),17) ^ rotr32((x),19) ^ ((x)>>10))
#define SIGMA0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22))
#define SIGMA1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25))
#define SIGMA_a(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3))
#define SIGMA_b(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10))
uint32_t k[]={
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
static const uint32_t k[] PROGMEM = {
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
};
/*************************************************************************/
/**
* block must be, 512, Bit = 64, Byte, long !!!
* block must be 512 Bit = 64 Byte long !!!
*/
void sha256_nextBlock (sha256_ctx_t *state, const void *block){
uint32_t w[64]; /* this is 256, byte, large, */
uint32_t w[16]; /* this is 64 Byte large, */
uint8_t i;
uint32_t a[8],t1,t2;
uint32_t a[8], t1, t2;
/* init w */
/* init working variables */
memcpy((void*)a,(void*)(state->h), 8 * 4);
/* init w */
#if defined LITTLE_ENDIAN
for (i=0; i<16; ++i){
w[i]= change_endian32(((uint32_t*)block)[i]);
}
for (i = 0; i < 16; ++i) {
w[i] = change_endian32(((uint32_t*)block)[i]);
}
#elif defined BIG_ENDIAN
memcpy((void*)w, block, 64);
#endif
for (i=16; i<64; ++i){
w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
}
/* init working variables */
memcpy((void*)a,(void*)(state->h), 8*4);
/*
for (i = 16; i < 64; ++i) {
w[i] = SIGMA_b(w[i - 2]) + w[i - 7] + SIGMA_a(w[i - 15]) + w[i - 16];
}
*/
/* do the, fun stuff, */
for (i=0; i<64; ++i){
t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
memmove(&(a[1]), &(a[0]), 7*4); /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
a[4] += t1;
a[0] = t1 + t2;
}
for (i=0; i<64; ++i) {
if (i > 15) {
w[i % 16] = SIGMA_b(w[(i + 14) % 16])
+ w[(i + 9) % 16]
+ SIGMA_a(w[(i + 1) % 16])
+ w[i % 16];
}
t1 = a[7] + SIGMA1(a[4]) + CH(a[4], a[5], a[6]) + pgm_read_dword(&k[i]) + w[i % 16];
t2 = SIGMA0(a[0]) + MAJ(a[0], a[1], a[2]);
memmove(&(a[1]), &(a[0]), 7 * 4); /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
a[4] += t1;
a[0] = t1 + t2;
}
/* update, the, state, */
for (i=0; i<8; ++i){
state->h[i] += a[i];
}
state->length += 512;
for (i = 0; i < 8; ++i){
state->h[i] += a[i];
}
state->length += 1;
}
@ -159,39 +170,42 @@ void sha256_nextBlock (sha256_ctx_t *state, const void *block){
* if you have a message with bits at the end, the byte must be padded with zeros
*/
void sha256_lastBlock(sha256_ctx_t *state, const void *block, uint16_t length){
uint8_t lb[SHA256_BLOCK_BITS/8]; /* local block */
uint8_t lb[SHA256_BLOCK_BITS / 8]; /* local block */
uint64_t msg_len;
while(length>=SHA256_BLOCK_BITS){
sha256_nextBlock(state, block);
length -= SHA256_BLOCK_BITS;
block = (uint8_t*)block+SHA256_BLOCK_BYTES;
block = (uint8_t*)block + SHA256_BLOCK_BYTES;
}
state->length += length;
memcpy (&(lb[0]), block, length/8);
msg_len = state->length;
msg_len *= 512;
msg_len += length;
memcpy (&(lb[0]), block, length / 8);
/* set the final one bit */
if (length & 0x7){ // if we have single bits at the end
lb[length/8] = ((uint8_t*)(block))[length/8];
if (length & 7){ // if we have single bits at the end
lb[length / 8] = ((uint8_t*)(block))[length / 8];
} else {
lb[length/8] = 0;
lb[length / 8] = 0;
}
lb[length/8] |= 0x80>>(length & 0x7);
length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/
lb[length / 8] |= 0x80 >> (length & 7);
length = (length / 8) + 1; /* from now on length contains the number of BYTES in lb*/
/* pad with zeros */
if (length>64-8){ /* not enouth space for 64bit length value */
memset((void*)(&(lb[length])), 0, 64-length);
if (length > 64 - 8){ /* not enouth space for 64bit length value */
memset((void*)(&(lb[length])), 0, 64 - length);
sha256_nextBlock(state, lb);
state->length -= 512;
length = 0;
}
memset((void*)(&(lb[length])), 0, 56-length);
memset((void*)(&(lb[length])), 0, 56 - length);
/* store the 64bit length value */
#if defined LITTLE_ENDIAN
/* this is now rolled up */
uint8_t i;
for (i=1; i<=8; ++i){
lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
}
/* this is now rolled up */
uint8_t i = 7;
do {
lb[56 + i] = msg_len & 0xff;
msg_len >>= 8;
} while (i--);
#elif defined BIG_ENDIAN
*((uint64_t)&(lb[56])) = state->length;
#endif
@ -223,7 +237,7 @@ void sha256(sha256_hash_t *dest, const void *msg, uint32_t length){ /* length co
void sha256_ctx2hash(sha256_hash_t *dest, const sha256_ctx_t *state){
#if defined LITTLE_ENDIAN
uint8_t i;
for(i=0; i<8; ++i){
for(i = 0; i < 8; ++i){
((uint32_t*)dest)[i] = change_endian32(state->h[i]);
}
#elif BIG_ENDIAN

View File

@ -49,9 +49,9 @@
*/
#define SHA256_HASH_BITS 256
#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
#define SHA256_HASH_BYTES (SHA256_HASH_BITS / 8)
#define SHA256_BLOCK_BITS 512
#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS / 8)
/** \typedef sha256_ctx_t
* \brief SHA-256 context type
@ -60,7 +60,7 @@
*/
typedef struct {
uint32_t h[8];
uint64_t length;
uint32_t length;
} sha256_ctx_t;
/** \typedef sha256_hash_t