From a397db40b94b01a3ed72f9367961cf68d82593d1 Mon Sep 17 00:00:00 2001 From: bg Date: Fri, 5 Dec 2008 12:53:15 +0000 Subject: [PATCH] md5_lastBlock() now in ASM --- avr-asm-macros.S | 27 ++++++ md5-asm.S | 198 +++++++++++++++++++++++++++++++++++---- md5-stub.c | 119 +++-------------------- md5.c | 23 ++++- md5.h | 15 ++- test_src/main-md5-test.c | 7 +- 6 files changed, 257 insertions(+), 132 deletions(-) diff --git a/avr-asm-macros.S b/avr-asm-macros.S index f878be8..4991cee 100644 --- a/avr-asm-macros.S +++ b/avr-asm-macros.S @@ -82,6 +82,33 @@ out _SFR_IO_ADDR(SPL), \reg1 .endm + +.macro stack_alloc_large size:req, reg1=r30, reg2=r31 + in r0, _SFR_IO_ADDR(SREG) + in \reg1, _SFR_IO_ADDR(SPL) + in \reg2, _SFR_IO_ADDR(SPH) + subi \reg1, lo8(\size) + sbci \reg2, hi8(\size) + cli + out _SFR_IO_ADDR(SPH), \reg2 + out _SFR_IO_ADDR(SREG), r0 + out _SFR_IO_ADDR(SPL), \reg1 +.endm + +.macro stack_free_large size:req, reg1=r30, reg2=r31 + in r0, _SFR_IO_ADDR(SREG) + in \reg1, _SFR_IO_ADDR(SPL) + in \reg2, _SFR_IO_ADDR(SPH) + adiw \reg1, 63 + adiw \reg1, (\size-63) + cli + out _SFR_IO_ADDR(SPH), \reg2 + out _SFR_IO_ADDR(SREG), r0 + out _SFR_IO_ADDR(SPL), \reg1 +.endm + + + /******************************************************************************* * END of MACRO SECTION * *******************************************************************************/ diff --git a/md5-asm.S b/md5-asm.S index 4932bb5..2928fe3 100644 --- a/md5-asm.S +++ b/md5-asm.S @@ -301,9 +301,9 @@ ARG_Z3 = 17 md5_core_asm: - push r28 - push r29 - push_range 4, 17 + push r16 + push r17 + push_range 4, 8 ldi r30, lo8(T_table) ldi r31, hi8(T_table) lsl P_I @@ -445,9 +445,9 @@ fixrotl: st X+, r24 st X , r25 md5_core_exit: - pop_range 4, 17 - pop r29 - pop r28 + pop_range 4, 8 + pop r17 + pop r16 ret ;################################################################### @@ -531,9 +531,9 @@ I_REG = 8 .global md5_nextBlock md5_nextBlock: stack_alloc 16 - push_range 2, 8 - push r16 - push r17 + push_range 2, 17 + push r28 + push r29 push r24 push r25 adiw r30, 1 /* Z now points to the beginning of the allocated memory */ @@ -735,17 +735,181 @@ md5_nextBlock: st X+, r0 2: - pop r17 - pop r16 - pop_range 2, 8 + pop r29 + pop r28 + pop_range 2, 17 stack_free 16 ret +;############################################################################### +/* +void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){ + uint16_t l; + uint8_t b[64]; + while (length_b >= 512){ + md5_nextBlock(state, block); + length_b -= 512; + block = ((uint8_t*)block) + 512/8; + } + memset(b, 0, 64); + memcpy(b, block, length_b/8); + / * insert padding one * / + l=length_b/8; + if(length_b%8){ + uint8_t t; + t = ((uint8_t*)block)[l]; + t |= (0x80>>(length_b%8)); + b[l]=t; + }else{ + b[l]=0x80; + } + / * insert length value * / + if(l+sizeof(uint64_t) >= 512/8){ + md5_nextBlock(state, b); + state->counter--; + memset(b, 0, 64-8); + } + *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b; + md5_nextBlock(state, b); +} +*/ +; state_ptr : r24,r25 +; block_ptr : r22,r23 +; length_b : r20,r21 +.global md5_lastBlock +md5_lastBlock: + stack_alloc_large 64 + push_range 12, 17 + push r30 + push r31 + movw r16, r20 /* length_b */ + movw r14, r22 /* block_ptr */ + movw r12, r24 /* state_ptr */ + + cpi r17, 2 /* hi8(512) */ + brlo 2f +1: + movw r24, r12 + movw r22, r14 + rcall md5_nextBlock + ldi r18, 64 + add r14, r18 + adc r15, r1 + subi r17, 2 + brge 1b +2: + pop r31 + pop r30 + + adiw r30, 1 + movw r26, r14 + movw r24, r16 + adiw r24, 7 + + lsr r25 + ror r24 + lsr r24 + lsr r24 /* r24 now holds how many bytes are to copy */ + ldi r18, 64 + sub r18, r24 + tst r24 +4: + breq 5f + ld r0, X+ + st Z+, r0 + dec r24 + rjmp 4b +5: /* append 1-bit */ + mov r20, r16 + andi r20, 0x07 + brne bit_fucking + ldi r19, 0x80 + st Z+, r19 + dec r18 + rjmp after_bit_fucking +bit_fucking: + ldi r19, 0x80 +1: + lsr r19 + dec r20 + brne 1b + or r0, r19 + st -Z, r0 + adiw r30, 1 +after_bit_fucking: + clt + cpi r18, 8 + brmi 2f + set /* store in t if the counter will also fit in this block */ +2: + tst r18 + breq 2f +1: + st Z+, r1 + dec r18 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r14, r30 + brts load_counter + movw r24, r12 + movw r22, r14 + rcall md5_nextBlock + movw r30, r14 + ldi r20, 64-8 +3: + st Z+, r1 + dec r20 + brne 3b + +load_counter: + movw r26, r12 + adiw r26, 16 + ld r19, X+ + ld r20, X+ + ld r21, X+ + ld r22, X+ + brts post_counter_decrement + subi r19, 1 + sbci r20, 0 + sbci r21, 0 + sbci r22, 0 +post_counter_decrement: + clr r18 + clr r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + add r18, r16 + adc r19, r17 + adc r20, r1 + adc r21, r1 + adc r22, r1 + adc r23, r1 + movw r30, r14 + adiw r30, 64-8 + st Z+, r18 + st Z+, r19 + st Z+, r20 + st Z+, r21 + st Z+, r22 + st Z+, r23 + st Z+, r1 + st Z, r1 + + sbiw r30, 63 +; sbiw r30, 1 + movw r24, r12 + movw r22, r30 + rcall md5_nextBlock +md5_lastBlock_exit: + pop_range 12, 17 + stack_free_large 64 + ret + - - - - - diff --git a/md5-stub.c b/md5-stub.c index fedbe8b..2631c37 100644 --- a/md5-stub.c +++ b/md5-stub.c @@ -24,113 +24,20 @@ #undef DEBUG -void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi); -/* -#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n)))) - -static -void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){ - uint32_t t; - md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I}; - as &= 0x3; - // * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * / -#ifdef DEBUG - char funcc[]={'*', '-', '+', '~'}; - uart_putstr("\r\n DBG: md5_core ["); - uart_putc(funcc[fi]); - uart_hexdump(&as, 1); uart_putc(' '); - uart_hexdump(&k, 1); uart_putc(' '); - uart_hexdump(&s, 1); uart_putc(' '); - uart_hexdump(&i, 1); uart_putc(']'); -#endif - t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ; - a[as]=a[(as+1)&3] + ROTL32(t, s); -} -*/ - -#if 0 -void md5_nextBlock(md5_ctx_t *state, void* block){ - uint32_t a[4]; - uint8_t m,n,i=0; - /* this requires other mixed sboxes */ -#ifdef DEBUG - uart_putstr("\r\n DBG: md5_nextBlock: block:\r\n"); - uart_hexdump(block, 16); uart_putstr("\r\n"); - uart_hexdump(block+16, 16); uart_putstr("\r\n"); - uart_hexdump(block+32, 16); uart_putstr("\r\n"); - uart_hexdump(block+48, 16); uart_putstr("\r\n"); -#endif - - a[0]=state->a[0]; - a[1]=state->a[1]; - a[2]=state->a[2]; - a[3]=state->a[3]; - - /* round 1 */ - uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2 - for(m=0;m<4;++m){ - for(n=0;n<4;++n){ - md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0); - } - } - /* round 2 */ - uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4 - for(m=0;m<4;++m){ - for(n=0;n<4;++n){ - md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1); - } - } - /* round 3 */ - uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1 - for(m=0;m<4;++m){ - for(n=0;n<4;++n){ - md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2); - } - } - /* round 4 */ - uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3 - for(m=0;m<4;++m){ - for(n=0;n<4;++n){ - md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3); - } - } - state->a[0] += a[0]; - state->a[1] += a[1]; - state->a[2] += a[2]; - state->a[3] += a[3]; - state->counter++; -} -#endif - -void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){ - uint16_t l; - uint8_t b[64]; - while (length_b >= 512){ - md5_nextBlock(state, block); - length_b -= 512; - block = ((uint8_t*)block) + 512/8; - } - memset(b, 0, 64); - memcpy(b, block, length_b/8); - /* insert padding one */ - l=length_b/8; - if(length_b%8){ - uint8_t t; - t = ((uint8_t*)block)[l]; - t |= (0x80>>(length_b%8)); - b[l]=t; - }else{ - b[l]=0x80; - } - /* insert length value */ - if(l+sizeof(uint64_t) >= 512/8){ - md5_nextBlock(state, b); - state->counter--; - memset(b, 0, 64); - } - *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b; - md5_nextBlock(state, b); +void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){ + memcpy(dest, state->a, MD5_HASH_BYTES); } +void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){ + md5_ctx_t ctx; + md5_init(&ctx); + while(length_b>=MD5_BLOCK_BITS){ + md5_nextBlock(&ctx, msg); + msg = (uint8_t*)msg + MD5_BLOCK_BYTES; + length_b -= MD5_BLOCK_BITS; + } + md5_lastBlock(&ctx, msg, length_b); + md5_ctx2hash(dest, &ctx); +} diff --git a/md5.c b/md5.c index c995b2a..ac07042 100644 --- a/md5.c +++ b/md5.c @@ -84,7 +84,7 @@ void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_ a[as]=a[(as+1)&3] + ROTL32(t, s); } -void md5_nextBlock(md5_ctx_t *state, void* block){ +void md5_nextBlock(md5_ctx_t *state, const void* block){ uint32_t a[4]; uint8_t m,n,i=0; /* this requires other mixed sboxes */ @@ -136,7 +136,7 @@ void md5_nextBlock(md5_ctx_t *state, void* block){ state->counter++; } -void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){ +void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){ uint16_t l; uint8_t b[64]; while (length_b >= 512){ @@ -160,8 +160,25 @@ void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){ if(l+sizeof(uint64_t) >= 512/8){ md5_nextBlock(state, b); state->counter--; - memset(b, 0, 64); + memset(b, 0, 64-8); } *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b; md5_nextBlock(state, b); } + +void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){ + memcpy(dest, state->a, MD5_HASH_BYTES); +} + +void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){ + md5_ctx_t ctx; + md5_init(&ctx); + while(length_b>=MD5_BLOCK_BITS){ + md5_nextBlock(&ctx, msg); + msg = (uint8_t*)msg + MD5_BLOCK_BYTES; + length_b -= MD5_BLOCK_BITS; + } + md5_lastBlock(&ctx, msg, length_b); + md5_ctx2hash(dest, &ctx); +} + diff --git a/md5.h b/md5.h index f7e51f2..446575f 100644 --- a/md5.h +++ b/md5.h @@ -31,14 +31,25 @@ #include + +#define MD5_HASH_BITS 128 +#define MD5_HASH_BYTES (MD5_HASH_BITS/8) +#define MD5_BLOCK_BITS 512 +#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8) + + typedef struct md5_ctx_st { uint32_t a[4]; uint32_t counter; } md5_ctx_t; +typedef uint8_t md5_hash_t[MD5_HASH_BYTES]; + void md5_init(md5_ctx_t *s); -void md5_nextBlock(md5_ctx_t *state, void* block); -void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length); +void md5_nextBlock(md5_ctx_t *state, const void* block); +void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length); +void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state); +void md5(md5_hash_t* dest, const void* msg, uint32_t length_b); #endif /*MD5_H_*/ diff --git a/test_src/main-md5-test.c b/test_src/main-md5-test.c index c3e40da..1bc69fa 100644 --- a/test_src/main-md5-test.c +++ b/test_src/main-md5-test.c @@ -84,7 +84,7 @@ void testrun_nessie_md5(void){ */ void testrun_md5(void){ - md5_ctx_t s; + md5_hash_t hash; char* testv[]={ "", "a", @@ -100,9 +100,8 @@ void testrun_md5(void){ uart_putstr("\r\n MD5 (\""); uart_putstr(testv[i]); uart_putstr("\") = \r\n\t"); - md5_init(&s); - md5_lastBlock(&s, testv[i], strlen(testv[i])*8); - uart_hexdump(&(s.a[0]), 16); + md5(&hash, testv[i], strlen(testv[i])*8); + uart_hexdump(hash, 16); } }