big change for small size (reduction)

This commit is contained in:
bg 2010-05-10 22:07:45 +00:00
parent 0747bb9f3d
commit e9975b387f
3 changed files with 111 additions and 88 deletions

View File

@ -1,5 +1,21 @@
# Makefile for the AVR-Crypto-Lib project
# author: Daniel Otte
#
# This file is part of the AVR-Crypto-Lib.
# Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
SHELL = sh
BLOCK_CIPHERS :=
@ -292,7 +308,7 @@ clean:
.PHONY: depclean
depclean: clean
rm $(DEP_DIR)*.d
rm -f $(DEP_DIR)*.d
#-------------------------------------------------------------------------------
# dependency inclusion

View File

@ -33,6 +33,8 @@ acc3 = 9
acc0 = 14
acc1 = 15
#define DEBUG 0
/******************************************************************************/
/*
param a: r22:r23:r24:r25
@ -178,10 +180,11 @@ sn:
param src: r30:r31 (Z)
param len: r20
*/
memxor_short:
memxor_64:
; tst r20
; breq memxor_exit
ldi r20, 64
memxor:
10: ld r21, X
ld r22, Z+
eor r21, r22
@ -245,11 +248,6 @@ mov32_to_acc:
movw acc2, r24
ret
eor_acc_from_Y_add_to_Z:
rcall load32_from_Y
rcall eor32_to_acc
rjmp add_acc_to_Z
/******************************************************************************/
/*
param q: r28:r29 (Y)
@ -297,19 +295,19 @@ load_acc_from_X:
ld acc3, X+
ret
add_acc_to_Z:
ld r0, Z
add_acc_to_X:
ld r0, X
add r0, acc0
st Z+, r0
ld r0, Z
st X+, r0
ld r0, X
adc r0, acc1
st Z+, r0
ld r0, Z
st X+, r0
ld r0, X
adc r0, acc2
st Z+, r0
ld r0, Z
st X+, r0
ld r0, X
adc r0, acc3
st Z+, r0
st X+, r0
ret
load_rotate_add_M:
@ -417,13 +415,14 @@ expand1:
*/
f2_1_shift_table:
.byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
; .byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
.byte 0x55, 0x87, 0x55, 0x51, 0x03, 0x66, 0x64, 0x2B
f2_2_shift_table:
.byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
; .byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
.byte (8<<1)+1, (6<<1), (6<<1)+1, (4<<1)+1, (3<<1), (4<<1), (7<<1), (2<<1)
expand2_rot_table:
.byte 3,7,13,16,19,23,27
; .byte 0 ; just for alignment
.byte 0 ; just for alignment
expand2:
rcall expand_intro
@ -552,7 +551,7 @@ f0:
movw m0, r30
/* xor m into h */
; ldi r20, 64
rcall memxor_short
rcall memxor_64
movw r30, m0
movw r26, h0
@ -614,7 +613,7 @@ add_hx_to_w:
; ldi r20, 64
movw r26, h0
movw r30, m0
rcall memxor_short
rcall memxor_64
sbiw r26, 60
;---
clr r17
@ -695,14 +694,17 @@ h0 = 18
h1 = 19
f2:
movw r26, r24
/* calc XL */
/* calc XL & XH */
adiw r26, 63
adiw r26, 1
movw q16_0, r26
movw h0, r20
;---
; push h0
; push h1
;---
movw r28, r22
rcall load32_from_X
rcall mov32_to_acc
rcall load_acc_from_X
ldi r17, 15
10: rcall load32_from_X
rcall eor32_to_acc
@ -725,13 +727,21 @@ f2:
; rcall print32
; pop_range 22, 25
;--- END DBG
/* copy m(Y) into h */
movw r26, h0
ldi r22, 64
10:
ld r23, Y+
st X+, r23
dec r22
brne 10b
;--- /* calc first half of h0..h15 */
movw r26, q16_0
movw r28, q16_0
movw r26, h0
ldi r30, lo8(f2_1_shift_table)
ldi r31, hi8(f2_1_shift_table)
ldi r17, 16
10:
rcall load32_from_Y
rcall mov32_to_acc
;---
movw r22, xh0
movw r24, xh2
@ -739,11 +749,7 @@ f2:
brge 15f
clr r1
rjmp 26f
15: ldi r30, lo8(f2_1_shift_table-9)
ldi r31, hi8(f2_1_shift_table-9)
add r30, r17
adc r31, r1
lpm r20, Z
15: lpm r20, Z+
mov r1, r20
andi r20, 0x0f
clt
@ -756,9 +762,9 @@ f2:
rcall shiftright32
rjmp 26f
25: rcall shiftleft32
26: rcall eor32_to_acc
26: rcall mov32_to_acc
;---
rcall load32_from_X
rcall load32_from_Y
mov r20, r1
clr r1
swap r20
@ -769,80 +775,81 @@ f2:
27: rcall shiftright32
28: rcall eor32_to_acc
;---
movw r30, h0
st Z+, acc0
st Z+, acc1
st Z+, acc2
st Z+, acc3
movw h0, r30
ld r0, X
eor r0, acc0
st X+, r0
ld r0, X
eor r0, acc1
st X+, r0
ld r0, X
eor r0, acc2
st X+, r0
ld r0, X
eor r0, acc3
st X+, r0
;---
dec r17
brne 10b
;-----
sbiw r26, 4*8 /* X points to q[24] */
movw r28, r26
sbiw r28, 4*8 /* Y points to q[24] */
movw r30, r28
sbiw r28, 63
sbiw r28, 33 /* Y points to q[0] */
sbiw r30, 63
sbiw r30, 1 /* Z points to h0 */
ldi r17, 8
10: movw acc0, xl0
movw acc2, xl2
rcall load32_from_X
rcall eor32_to_acc
rcall eor_acc_from_Y_add_to_Z
dec r17
brne 10b
sbiw r26, 9*4 /* X points to q[23] */
rcall load_acc_from_X
eor acc1, xl0
eor acc2, xl1
eor acc3, xl2
rcall eor_acc_from_Y_add_to_Z
;---
sbiw r26, 8*4 /* X points to q[16] */
mov h0, r30
ldi r17, 7
10:
ldi r30, lo8(f2_2_shift_table-1)
ldi r31, hi8(f2_2_shift_table-1)
add r30, r17
adc r31, r1
lpm r20, Z
rcall load_acc_from_X
movw r22, xl0
movw r26, r28
ldi r20, 8*4
/* xor q[24..31] into q[0..7] */
rcall memxor
/* xor q[23] into q[8] */
sbiw r30, 9*4
ldi r20, 4
rcall memxor
/* xor q[16..22] into q[9..15] */
sbiw r30, 8*4
ldi r20, 7*4
rcall memxor
movw r26, h0
ldi r17, 15
ldi r30, lo8(f2_2_shift_table)
ldi r31, hi8(f2_2_shift_table)
10: movw r22, xl0
movw r24, xl2
sbrc r17, 3
rjmp 20f
lpm r20, Z+
lsr r20
brcc 20f
brcs 15f
rcall shiftright32
rjmp 20f
15:
rcall shiftleft32
rjmp 21f
20: rcall shiftright32
21:
movw r30, h0
20:
rcall mov32_to_acc
rcall load32_from_Y
rcall eor32_to_acc
rcall eor_acc_from_Y_add_to_Z
movw h0, r30
rcall add_acc_to_X
dec r17
brne 10b
brpl 10b
;-----
sbiw r30, 8*4 /* Z points to h8 */
movw r26, r30
sbiw r26, 4*4 /* X points to h4 */
sbiw r26, 8*4 /* X points to h8 */
movw r28, r26
sbiw r28, 4*4 /* Y points to h4 */
ldi r17, 8
ldi r18, 9
10:
rcall load32_from_X
rcall load32_from_Y
mov r20, r18
rcall rotateleft32
rcall mov32_to_acc
rcall add_acc_to_Z
rcall add_acc_to_X
inc r18
cpi r17, 5
brne 20f
sbiw r26, 8*4
sbiw r28, 8*4
20: dec r17
brne 10b
exit:
;--- DBG
; pop r25
; pop r24

View File

@ -1,5 +1,5 @@
#!/usr/bin/ruby
# performnce to wiki
# performance to wiki
=begin
This file is part of the AVR-Crypto-Lib.