bmw tiny is now 1910 bytes in size

This commit is contained in:
bg 2010-04-08 19:44:30 +00:00
parent 03f98ddd20
commit ad11f38f79
1 changed files with 71 additions and 98 deletions

View File

@ -154,10 +154,9 @@ sn:
movw r24, r4 movw r24, r4
lpm r20, Z+ lpm r20, Z+
rcall rotateleft32 rcall rotateleft32
eor r22, r16 rcall eor_r22_in_r16
eor r23, r17 movw r22, r16
eor r24, r18 movw r24, r18
eor r25, r19
pop r19 pop r19
pop r17 pop r17
pop_range 2, 5 pop_range 2, 5
@ -172,6 +171,7 @@ sn:
memxor_short: memxor_short:
; tst r20 ; tst r20
; breq memxor_exit ; breq memxor_exit
ldi r20, 64
10: ld r21, X 10: ld r21, X
ld r22, Z+ ld r22, Z+
eor r21, r22 eor r21, r22
@ -193,23 +193,14 @@ add_hx_to_w:
movw r26, h0 movw r26, h0
add r26, r16 add r26, r16
adc r27, r1 adc r27, r1
ld r22, Y rcall load32_from_Y
ldd r23, Y+1 sbiw r28, 4
ldd r24, Y+2
ldd r25, Y+3
lsl r20 lsl r20
rol r21 rol r21
brcs 30f brcs 30f
/* addition */ /* addition */
ld r0, X+ rcall add_X_to_32
add r22, r0 rjmp store32_to_Y;50f
ld r0, X+
adc r23, r0
ld r0, X+
adc r24, r0
ld r0, X+
adc r25, r0
rjmp 50f
30: /* substract */ 30: /* substract */
ld r0, X+ ld r0, X+
sub r22, r0 sub r22, r0
@ -220,11 +211,9 @@ add_hx_to_w:
ld r0, X+ ld r0, X+
sbc r25, r0 sbc r25, r0
50: 50:
st Y+, r22 rjmp store32_to_Y
st Y+, r23 ; rcall store32_to_Y
st Y+, r24 ; ret
st Y+, r25
ret
/******************************************************************************/ /******************************************************************************/
load32_from_X: load32_from_X:
@ -241,6 +230,13 @@ load32_from_Y:
ld r25, Y+ ld r25, Y+
ret ret
store32_to_Y:
st Y+, r22
st Y+, r23
st Y+, r24
st Y+, r25
ret
add_X_to_32: add_X_to_32:
ld r0, X+ ld r0, X+
add r22, r0 add r22, r0
@ -292,7 +288,7 @@ f0:
; pop_range 22, 25 ; pop_range 22, 25
;--- END DBG ;--- END DBG
/* xor m into h */ /* xor m into h */
ldi r20, 64 ; ldi r20, 64
rcall memxor_short rcall memxor_short
movw r30, m0 movw r30, m0
movw r26, h0 movw r26, h0
@ -340,7 +336,7 @@ f0:
; pop_range 22, 25 ; pop_range 22, 25
;--- END DBG ;--- END DBG
/* xor m into h */ /* xor m into h */
ldi r20, 64 ; ldi r20, 64
movw r26, h0 movw r26, h0
movw r30, m0 movw r30, m0
rcall memxor_short rcall memxor_short
@ -351,38 +347,30 @@ f0:
ldi r21, 15 ldi r21, 15
mov r8, r21 mov r8, r21
50: 50:
ldd r22, Y+0 rcall load32_from_Y
ldd r23, Y+1 sbiw r28, 4
ldd r24, Y+2
ldd r25, Y+3
lpm r20, Z+ lpm r20, Z+
movw r2, r30 movw r2, r30
rcall sn rcall sn
movw r30, r2 movw r30, r2
rcall add_X_to_32 rcall add_X_to_32
rcall store32_to_Y
st Y+, r22
st Y+, r23
st Y+, r24
st Y+, r25
dec r8 dec r8
brne 50b brne 50b
;--- ;---
ldd r22, Y+0 rcall load32_from_Y
ldd r23, Y+1
ldd r24, Y+2
ldd r25, Y+3
clr r20 clr r20
rcall sn rcall sn
movw r30, r2 movw r30, r2
movw r26, h0 movw r26, h0
rcall add_X_to_32 rcall add_X_to_32
sbiw r26, 4 sbiw r26, 4
std Y+0, r22 st -Y, r25
std Y+1, r23 st -Y, r24
std Y+2, r24 st -Y, r23
std Y+3, r25 st -Y, r22
sbiw r28, 15*4 sbiw r28, 15*4
movw r20, h0 movw r20, h0
movw r22, m0 movw r22, m0
@ -464,10 +452,7 @@ load_rotate_add_M:
movw r26, m0 movw r26, m0
add r26, r0 add r26, r0
adc r27, r1 adc r27, r1
ld r22, X+ rcall load32_from_X
ld r23, X+
ld r24, X+
ld r25, X+
inc r20 inc r20
rcall rotateleft32 rcall rotateleft32
brts 10f brts 10f
@ -670,35 +655,23 @@ f2:
adiw r26, 63 adiw r26, 63
adiw r26, 1 adiw r26, 1
movw q16_0, r26 movw q16_0, r26
clr xl0 movw h0, r20
clr xl1 movw r28, r22
clr xl2 rcall load32_from_X
clr xl3 movw acc0, r22
ldi r17, 8 movw acc2, r24
10: ld r0, X+ ldi r17, 15
eor xl0, r0 10: rcall load32_from_X
ld r0, X+ rcall eor32_to_acc
eor xl1, r0 cpi r17, 9
ld r0, X+ brne 15f
eor xl2, r0 movw xl0, acc0
ld r0, X+ movw xl2, acc2
eor xl3, r0 15:
dec r17
brne 10b
;--- /* calc XH */
movw xh0, xl0
movw xh2, xl2
ldi r17, 8
10: ld r0, X+
eor xh0, r0
ld r0, X+
eor xh1, r0
ld r0, X+
eor xh2, r0
ld r0, X+
eor xh3, r0
dec r17 dec r17
brne 10b brne 10b
movw xh0, acc0
movw xh2, acc2
;--- DBG ;--- DBG
; push_range 22, 25 ; push_range 22, 25
; movw r22, xl0 ; movw r22, xl0
@ -711,8 +684,6 @@ f2:
;--- END DBG ;--- END DBG
;--- /* calc first half of h0..h15 */ ;--- /* calc first half of h0..h15 */
movw h0, r20
movw r28, r22
movw r26, q16_0 movw r26, q16_0
ldi r17, 16 ldi r17, 16
10: 10:
@ -912,11 +883,6 @@ bmw256_nextBlock:
ret ret
/******************************************************************************/ /******************************************************************************/
/*
param ctx: r24:r25
param msg: r22:r23
param len: r20:r21
*/
ctx0 = 2 ctx0 = 2
ctx1 = 3 ctx1 = 3
blc0 = 4 blc0 = 4
@ -926,6 +892,22 @@ len1 = 29
buf0 = 6 buf0 = 6
buf1 = 7 buf1 = 7
load32_from_Z_stub:
movw r30, ctx0
adiw r30, 60
ldd r21, Z+4
ldd r22, Z+5
ldd r23, Z+6
ldd r24, Z+7
ret
/******************************************************************************/
/*
param ctx: r24:r25
param msg: r22:r23
param len: r20:r21
*/
.global bmw_small_lastBlock .global bmw_small_lastBlock
.global bmw224_lastBlock .global bmw224_lastBlock
.global bmw256_lastBlock .global bmw256_lastBlock
@ -1020,12 +1002,7 @@ bmw256_lastBlock:
st X+, r1 st X+, r1
dec r20 dec r20
brne 350b brne 350b
movw r30, ctx0 rcall load32_from_Z_stub
adiw r30, 60
ldd r21, Z+4
ldd r22, Z+5
ldd r23, Z+6
ldd r24, Z+7
subi r21, 1 subi r21, 1
sbc r22, r1 sbc r22, r1
sbc r23, r1 sbc r23, r1
@ -1035,12 +1012,7 @@ bmw256_lastBlock:
bmw_small_nextBlock(ctx, pctx.buffer); bmw_small_nextBlock(ctx, pctx.buffer);
*/ */
400: 400:
movw r30, ctx0 rcall load32_from_Z_stub
adiw r30, 60
ldd r21, Z+4
ldd r22, Z+5
ldd r23, Z+6
ldd r24, Z+7
410: 410:
clr r25 clr r25
lsl r21 lsl r21
@ -1195,19 +1167,20 @@ ctx0 = 2
ctx1 = 3 ctx1 = 3
msg0 = 4 msg0 = 4
msg1 = 5 msg1 = 5
len0 = 6 len0 = 28
len1 = 7 len1 = 29
len2 = 8 len2 = 8
len3 = 9 len3 = 9
dst0 = 10 dst0 = 6
dst1 = 11 dst1 = 7
.global bmw224 .global bmw224
bmw224: bmw224:
push r16 push r16
clr r16 clr r16
bmw_small_all: bmw_small_all:
push_range 2, 11 push_range 2, 9
push_range 28, 29
stack_alloc_large 64+4 stack_alloc_large 64+4
adiw r30, 1 adiw r30, 1
movw ctx0, r30 movw ctx0, r30
@ -1228,8 +1201,7 @@ bmw_small_all:
movw r24, ctx0 movw r24, ctx0
movw r22, msg0 movw r22, msg0
rcall bmw_small_nextBlock rcall bmw_small_nextBlock
ldi r20, 2 subi len1, 2
sub len1, r20
sbc len2, r1 sbc len2, r1
sbc len3, r1 sbc len3, r1
ldi r20, 64 ldi r20, 64
@ -1249,7 +1221,8 @@ bmw_small_all:
adc r31, r1 adc r31, r1
icall icall
stack_free_large 64+4 stack_free_large 64+4
pop_range 2, 11 pop_range 28, 29
pop_range 2, 9
pop r16 pop r16
ret ret