md5_lastBlock() now in ASM

This commit is contained in:
bg 2008-12-05 12:53:15 +00:00
parent e9d9457ed0
commit a397db40b9
6 changed files with 257 additions and 132 deletions

View File

@ -82,6 +82,33 @@
out _SFR_IO_ADDR(SPL), \reg1
.endm
.macro stack_alloc_large size:req, reg1=r30, reg2=r31
in r0, _SFR_IO_ADDR(SREG)
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
subi \reg1, lo8(\size)
sbci \reg2, hi8(\size)
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
out _SFR_IO_ADDR(SPL), \reg1
.endm
.macro stack_free_large size:req, reg1=r30, reg2=r31
in r0, _SFR_IO_ADDR(SREG)
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
adiw \reg1, 63
adiw \reg1, (\size-63)
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
out _SFR_IO_ADDR(SPL), \reg1
.endm
/*******************************************************************************
* END of MACRO SECTION *
*******************************************************************************/

198
md5-asm.S
View File

@ -301,9 +301,9 @@ ARG_Z3 = 17
md5_core_asm:
push r28
push r29
push_range 4, 17
push r16
push r17
push_range 4, 8
ldi r30, lo8(T_table)
ldi r31, hi8(T_table)
lsl P_I
@ -445,9 +445,9 @@ fixrotl:
st X+, r24
st X , r25
md5_core_exit:
pop_range 4, 17
pop r29
pop r28
pop_range 4, 8
pop r17
pop r16
ret
;###################################################################
@ -531,9 +531,9 @@ I_REG = 8
.global md5_nextBlock
md5_nextBlock:
stack_alloc 16
push_range 2, 8
push r16
push r17
push_range 2, 17
push r28
push r29
push r24
push r25
adiw r30, 1 /* Z now points to the beginning of the allocated memory */
@ -735,17 +735,181 @@ md5_nextBlock:
st X+, r0
2:
pop r17
pop r16
pop_range 2, 8
pop r29
pop r28
pop_range 2, 17
stack_free 16
ret
;###############################################################################
/*
void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
uint16_t l;
uint8_t b[64];
while (length_b >= 512){
md5_nextBlock(state, block);
length_b -= 512;
block = ((uint8_t*)block) + 512/8;
}
memset(b, 0, 64);
memcpy(b, block, length_b/8);
/ * insert padding one * /
l=length_b/8;
if(length_b%8){
uint8_t t;
t = ((uint8_t*)block)[l];
t |= (0x80>>(length_b%8));
b[l]=t;
}else{
b[l]=0x80;
}
/ * insert length value * /
if(l+sizeof(uint64_t) >= 512/8){
md5_nextBlock(state, b);
state->counter--;
memset(b, 0, 64-8);
}
*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
md5_nextBlock(state, b);
}
*/
; state_ptr : r24,r25
; block_ptr : r22,r23
; length_b : r20,r21
.global md5_lastBlock
md5_lastBlock:
stack_alloc_large 64
push_range 12, 17
push r30
push r31
movw r16, r20 /* length_b */
movw r14, r22 /* block_ptr */
movw r12, r24 /* state_ptr */
cpi r17, 2 /* hi8(512) */
brlo 2f
1:
movw r24, r12
movw r22, r14
rcall md5_nextBlock
ldi r18, 64
add r14, r18
adc r15, r1
subi r17, 2
brge 1b
2:
pop r31
pop r30
adiw r30, 1
movw r26, r14
movw r24, r16
adiw r24, 7
lsr r25
ror r24
lsr r24
lsr r24 /* r24 now holds how many bytes are to copy */
ldi r18, 64
sub r18, r24
tst r24
4:
breq 5f
ld r0, X+
st Z+, r0
dec r24
rjmp 4b
5: /* append 1-bit */
mov r20, r16
andi r20, 0x07
brne bit_fucking
ldi r19, 0x80
st Z+, r19
dec r18
rjmp after_bit_fucking
bit_fucking:
ldi r19, 0x80
1:
lsr r19
dec r20
brne 1b
or r0, r19
st -Z, r0
adiw r30, 1
after_bit_fucking:
clt
cpi r18, 8
brmi 2f
set /* store in t if the counter will also fit in this block */
2:
tst r18
breq 2f
1:
st Z+, r1
dec r18
brne 1b
2:
sbiw r30, 63
sbiw r30, 1
movw r14, r30
brts load_counter
movw r24, r12
movw r22, r14
rcall md5_nextBlock
movw r30, r14
ldi r20, 64-8
3:
st Z+, r1
dec r20
brne 3b
load_counter:
movw r26, r12
adiw r26, 16
ld r19, X+
ld r20, X+
ld r21, X+
ld r22, X+
brts post_counter_decrement
subi r19, 1
sbci r20, 0
sbci r21, 0
sbci r22, 0
post_counter_decrement:
clr r18
clr r23
lsl r19
rol r20
rol r21
rol r22
rol r23
add r18, r16
adc r19, r17
adc r20, r1
adc r21, r1
adc r22, r1
adc r23, r1
movw r30, r14
adiw r30, 64-8
st Z+, r18
st Z+, r19
st Z+, r20
st Z+, r21
st Z+, r22
st Z+, r23
st Z+, r1
st Z, r1
sbiw r30, 63
; sbiw r30, 1
movw r24, r12
movw r22, r30
rcall md5_nextBlock
md5_lastBlock_exit:
pop_range 12, 17
stack_free_large 64
ret

View File

@ -24,113 +24,20 @@
#undef DEBUG
void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi);
/*
#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n))))
static
void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
uint32_t t;
md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
as &= 0x3;
// * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
#ifdef DEBUG
char funcc[]={'*', '-', '+', '~'};
uart_putstr("\r\n DBG: md5_core [");
uart_putc(funcc[fi]);
uart_hexdump(&as, 1); uart_putc(' ');
uart_hexdump(&k, 1); uart_putc(' ');
uart_hexdump(&s, 1); uart_putc(' ');
uart_hexdump(&i, 1); uart_putc(']');
#endif
t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
a[as]=a[(as+1)&3] + ROTL32(t, s);
}
*/
#if 0
void md5_nextBlock(md5_ctx_t *state, void* block){
uint32_t a[4];
uint8_t m,n,i=0;
/* this requires other mixed sboxes */
#ifdef DEBUG
uart_putstr("\r\n DBG: md5_nextBlock: block:\r\n");
uart_hexdump(block, 16); uart_putstr("\r\n");
uart_hexdump(block+16, 16); uart_putstr("\r\n");
uart_hexdump(block+32, 16); uart_putstr("\r\n");
uart_hexdump(block+48, 16); uart_putstr("\r\n");
#endif
a[0]=state->a[0];
a[1]=state->a[1];
a[2]=state->a[2];
a[3]=state->a[3];
/* round 1 */
uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
}
}
/* round 2 */
uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
}
}
/* round 3 */
uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
}
}
/* round 4 */
uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
}
}
state->a[0] += a[0];
state->a[1] += a[1];
state->a[2] += a[2];
state->a[3] += a[3];
state->counter++;
}
#endif
void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){
uint16_t l;
uint8_t b[64];
while (length_b >= 512){
md5_nextBlock(state, block);
length_b -= 512;
block = ((uint8_t*)block) + 512/8;
}
memset(b, 0, 64);
memcpy(b, block, length_b/8);
/* insert padding one */
l=length_b/8;
if(length_b%8){
uint8_t t;
t = ((uint8_t*)block)[l];
t |= (0x80>>(length_b%8));
b[l]=t;
}else{
b[l]=0x80;
}
/* insert length value */
if(l+sizeof(uint64_t) >= 512/8){
md5_nextBlock(state, b);
state->counter--;
memset(b, 0, 64);
}
*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
md5_nextBlock(state, b);
void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){
memcpy(dest, state->a, MD5_HASH_BYTES);
}
void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){
md5_ctx_t ctx;
md5_init(&ctx);
while(length_b>=MD5_BLOCK_BITS){
md5_nextBlock(&ctx, msg);
msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
length_b -= MD5_BLOCK_BITS;
}
md5_lastBlock(&ctx, msg, length_b);
md5_ctx2hash(dest, &ctx);
}

23
md5.c
View File

@ -84,7 +84,7 @@ void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_
a[as]=a[(as+1)&3] + ROTL32(t, s);
}
void md5_nextBlock(md5_ctx_t *state, void* block){
void md5_nextBlock(md5_ctx_t *state, const void* block){
uint32_t a[4];
uint8_t m,n,i=0;
/* this requires other mixed sboxes */
@ -136,7 +136,7 @@ void md5_nextBlock(md5_ctx_t *state, void* block){
state->counter++;
}
void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){
void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
uint16_t l;
uint8_t b[64];
while (length_b >= 512){
@ -160,8 +160,25 @@ void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){
if(l+sizeof(uint64_t) >= 512/8){
md5_nextBlock(state, b);
state->counter--;
memset(b, 0, 64);
memset(b, 0, 64-8);
}
*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
md5_nextBlock(state, b);
}
void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){
memcpy(dest, state->a, MD5_HASH_BYTES);
}
void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){
md5_ctx_t ctx;
md5_init(&ctx);
while(length_b>=MD5_BLOCK_BITS){
md5_nextBlock(&ctx, msg);
msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
length_b -= MD5_BLOCK_BITS;
}
md5_lastBlock(&ctx, msg, length_b);
md5_ctx2hash(dest, &ctx);
}

15
md5.h
View File

@ -31,14 +31,25 @@
#include <stdint.h>
#define MD5_HASH_BITS 128
#define MD5_HASH_BYTES (MD5_HASH_BITS/8)
#define MD5_BLOCK_BITS 512
#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8)
typedef struct md5_ctx_st {
uint32_t a[4];
uint32_t counter;
} md5_ctx_t;
typedef uint8_t md5_hash_t[MD5_HASH_BYTES];
void md5_init(md5_ctx_t *s);
void md5_nextBlock(md5_ctx_t *state, void* block);
void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length);
void md5_nextBlock(md5_ctx_t *state, const void* block);
void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length);
void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state);
void md5(md5_hash_t* dest, const void* msg, uint32_t length_b);
#endif /*MD5_H_*/

View File

@ -84,7 +84,7 @@ void testrun_nessie_md5(void){
*/
void testrun_md5(void){
md5_ctx_t s;
md5_hash_t hash;
char* testv[]={
"",
"a",
@ -100,9 +100,8 @@ void testrun_md5(void){
uart_putstr("\r\n MD5 (\"");
uart_putstr(testv[i]);
uart_putstr("\") = \r\n\t");
md5_init(&s);
md5_lastBlock(&s, testv[i], strlen(testv[i])*8);
uart_hexdump(&(s.a[0]), 16);
md5(&hash, testv[i], strlen(testv[i])*8);
uart_hexdump(hash, 16);
}
}