887 lines
15 KiB
ArmAsm
887 lines
15 KiB
ArmAsm
/* sha1-asm.S */
|
|
/*
|
|
This file is part of the AVR-Crypto-Lib.
|
|
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
/*
|
|
* Author: Daniel Otte
|
|
*
|
|
* License: GPLv3 or later
|
|
*/
|
|
; SHA1 implementation in assembler for AVR
|
|
SHA1_BLOCK_BITS = 512
|
|
SHA1_HASH_BITS = 160
|
|
|
|
.macro precall
|
|
/* push r18 - r27, r30 - r31*/
|
|
push r0
|
|
push r1
|
|
push r18
|
|
push r19
|
|
push r20
|
|
push r21
|
|
push r22
|
|
push r23
|
|
push r24
|
|
push r25
|
|
push r26
|
|
push r27
|
|
push r30
|
|
push r31
|
|
clr r1
|
|
.endm
|
|
|
|
.macro postcall
|
|
pop r31
|
|
pop r30
|
|
pop r27
|
|
pop r26
|
|
pop r25
|
|
pop r24
|
|
pop r23
|
|
pop r22
|
|
pop r21
|
|
pop r20
|
|
pop r19
|
|
pop r18
|
|
pop r1
|
|
pop r0
|
|
.endm
|
|
|
|
|
|
.macro hexdump length
|
|
push r27
|
|
push r26
|
|
ldi r25, '\r'
|
|
mov r24, r25
|
|
call uart_putc
|
|
ldi r25, '\n'
|
|
mov r24, r25
|
|
call uart_putc
|
|
pop r26
|
|
pop r27
|
|
movw r24, r26
|
|
.if \length > 16
|
|
ldi r22, lo8(16)
|
|
ldi r23, hi8(16)
|
|
push r27
|
|
push r26
|
|
call uart_hexdump
|
|
pop r26
|
|
pop r27
|
|
adiw r26, 16
|
|
hexdump \length-16
|
|
.else
|
|
ldi r22, lo8(\length)
|
|
ldi r23, hi8(\length)
|
|
call uart_hexdump
|
|
.endif
|
|
.endm
|
|
|
|
.macro delay
|
|
/*
|
|
push r0
|
|
push r1
|
|
clr r0
|
|
1: clr r1
|
|
2: dec r1
|
|
brne 2b
|
|
dec r0
|
|
brne 1b
|
|
pop r1
|
|
pop r0 // */
|
|
.endm
|
|
|
|
/* X points to Block */
|
|
.macro dbg_hexdump length
|
|
/*
|
|
precall
|
|
hexdump \length
|
|
postcall
|
|
// */
|
|
.endm
|
|
|
|
|
|
|
|
.section .text
|
|
|
|
SPL = 0x3D
|
|
SPH = 0x3E
|
|
SREG = 0x3F
|
|
|
|
|
|
;
|
|
;sha1_ctx_t is:
|
|
;
|
|
; [h0][h1][h2][h3][h4][length]
|
|
; hn is 32 bit large, length is 64 bit large
|
|
|
|
;###########################################################
|
|
|
|
.global sha1_ctx2hash
|
|
; === sha1_ctx2hash ===
|
|
; this function converts a state into a normal hash (bytestring)
|
|
; param1: the 16-bit destination pointer
|
|
; given in r25,r24 (r25 is most significant)
|
|
; param2: the 16-bit pointer to sha1_ctx structure
|
|
; given in r23,r22
|
|
sha1_ctx2hash:
|
|
movw r26, r22
|
|
movw r30, r24
|
|
ldi r21, 5
|
|
sbiw r26, 4
|
|
1:
|
|
ldi r20, 4
|
|
adiw r26, 8
|
|
2:
|
|
ld r0, -X
|
|
st Z+, r0
|
|
dec r20
|
|
brne 2b
|
|
|
|
dec r21
|
|
brne 1b
|
|
|
|
ret
|
|
|
|
;###########################################################
|
|
|
|
.global sha1
|
|
; === sha1 ===
|
|
; this function calculates SHA-1 hashes from messages in RAM
|
|
; param1: the 16-bit hash destination pointer
|
|
; given in r25,r24 (r25 is most significant)
|
|
; param2: the 16-bit pointer to message
|
|
; given in r23,r22
|
|
; param3: 32-bit length value (length of message in bits)
|
|
; given in r21,r20,r19,r18
|
|
sha1:
|
|
sha1_prolog:
|
|
push r8
|
|
push r9
|
|
push r10
|
|
push r11
|
|
push r12
|
|
push r13
|
|
push r16
|
|
push r17
|
|
in r16, SPL
|
|
in r17, SPH
|
|
subi r16, 5*4+8
|
|
sbci r17, 0
|
|
in r0, SREG
|
|
cli
|
|
out SPL, r16
|
|
out SPH, r17
|
|
out SREG, r0
|
|
|
|
push r25
|
|
push r24
|
|
inc r16
|
|
adc r17, r1
|
|
|
|
movw r8, r18 /* backup of length*/
|
|
movw r10, r20
|
|
|
|
movw r12, r22 /* backup pf msg-ptr */
|
|
|
|
movw r24, r16
|
|
rcall sha1_init
|
|
/* if length >= 512 */
|
|
1:
|
|
tst r11
|
|
brne 4f
|
|
tst r10
|
|
brne 4f
|
|
mov r19, r9
|
|
cpi r19, 0x02
|
|
brlo 4f
|
|
|
|
movw r24, r16
|
|
movw r22, r12
|
|
rcall sha1_nextBlock
|
|
ldi r19, 0x64
|
|
add r22, r19
|
|
adc r23, r1
|
|
/* length -= 512 */
|
|
ldi r19, 0x02
|
|
sub r9, r19
|
|
sbc r10, r1
|
|
sbc r11, r1
|
|
rjmp 1b
|
|
|
|
4:
|
|
movw r24, r16
|
|
movw r22, r12
|
|
movw r20, r8
|
|
rcall sha1_lastBlock
|
|
|
|
pop r24
|
|
pop r25
|
|
movw r22, r16
|
|
rcall sha1_ctx2hash
|
|
|
|
sha1_epilog:
|
|
in r30, SPL
|
|
in r31, SPH
|
|
adiw r30, 5*4+8
|
|
in r0, SREG
|
|
cli
|
|
out SPL, r30
|
|
out SPH, r31
|
|
out SREG, r0
|
|
pop r17
|
|
pop r16
|
|
pop r13
|
|
pop r12
|
|
pop r11
|
|
pop r10
|
|
pop r9
|
|
pop r8
|
|
ret
|
|
|
|
;###########################################################
|
|
|
|
|
|
; block MUST NOT be larger than 64 bytes
|
|
|
|
.global sha1_lastBlock
|
|
; === sha1_lastBlock ===
|
|
; this function does padding & Co. for calculating SHA-1 hashes
|
|
; param1: the 16-bit pointer to sha1_ctx structure
|
|
; given in r25,r24 (r25 is most significant)
|
|
; param2: an 16-bit pointer to 64 byte block to hash
|
|
; given in r23,r22
|
|
; param3: an 16-bit integer specifing length of block in bits
|
|
; given in r21,r20
|
|
sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
|
|
|
|
|
|
sha1_lastBlock:
|
|
cpi r21, 0x02
|
|
brlo sha1_lastBlock_prolog
|
|
push r25
|
|
push r24
|
|
push r23
|
|
push r22
|
|
push r21
|
|
push r20
|
|
rcall sha1_nextBlock
|
|
pop r20
|
|
pop r21
|
|
pop r22
|
|
pop r23
|
|
pop r24
|
|
pop r25
|
|
subi r21, 2
|
|
subi r23, -2
|
|
rjmp sha1_lastBlock
|
|
sha1_lastBlock_prolog:
|
|
/* allocate space on stack */
|
|
in r30, SPL
|
|
in r31, SPH
|
|
in r1, SREG
|
|
subi r30, lo8(64)
|
|
sbci r31, hi8(64) /* ??? */
|
|
cli
|
|
out SPL, r30
|
|
out SPH, r31
|
|
out SREG,r1
|
|
|
|
adiw r30, 1 /* SP points to next free byte on stack */
|
|
mov r18, r20 /* r20 = LSB(length) */
|
|
lsr r18
|
|
lsr r18
|
|
lsr r18
|
|
bst r21, 0 /* may be we should explain this ... */
|
|
bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
|
|
|
|
|
|
movw r26, r22 /* X points to begin of msg */
|
|
tst r18
|
|
breq sha1_lastBlock_post_copy
|
|
mov r1, r18
|
|
sha1_lastBlock_copy_loop:
|
|
ld r0, X+
|
|
st Z+, r0
|
|
dec r1
|
|
brne sha1_lastBlock_copy_loop
|
|
sha1_lastBlock_post_copy:
|
|
sha1_lastBlock_insert_stuffing_bit:
|
|
ldi r19, 0x80
|
|
mov r0,r19
|
|
ldi r19, 0x07
|
|
and r19, r20 /* if we are in bitmode */
|
|
breq 2f /* no bitmode */
|
|
1:
|
|
lsr r0
|
|
dec r19
|
|
brne 1b
|
|
ld r19, X
|
|
/* maybe we should do some ANDing here, just for safety */
|
|
or r0, r19
|
|
2:
|
|
st Z+, r0
|
|
inc r18
|
|
|
|
/* checking stuff here */
|
|
cpi r18, 64-8+1
|
|
brsh 0f
|
|
rjmp sha1_lastBlock_insert_zeros
|
|
0:
|
|
/* oh shit, we landed here */
|
|
/* first we have to fill it up with zeros */
|
|
ldi r19, 64
|
|
sub r19, r18
|
|
breq 2f
|
|
1:
|
|
st Z+, r1
|
|
dec r19
|
|
brne 1b
|
|
2:
|
|
sbiw r30, 63
|
|
sbiw r30, 1
|
|
movw r22, r30
|
|
|
|
push r31
|
|
push r30
|
|
push r25
|
|
push r24
|
|
push r21
|
|
push r20
|
|
rcall sha1_nextBlock
|
|
pop r20
|
|
pop r21
|
|
pop r24
|
|
pop r25
|
|
pop r30
|
|
pop r31
|
|
|
|
/* now we should subtract 512 from length */
|
|
movw r26, r24
|
|
adiw r26, 4*5+1 /* we can skip the lowest byte */
|
|
ld r19, X
|
|
subi r19, hi8(512)
|
|
st X+, r19
|
|
ldi r18, 6
|
|
1:
|
|
ld r19, X
|
|
sbci r19, 0
|
|
st X+, r19
|
|
dec r18
|
|
brne 1b
|
|
|
|
; clr r18 /* not neccessary ;-) */
|
|
/* reset Z pointer to begin of block */
|
|
|
|
sha1_lastBlock_insert_zeros:
|
|
ldi r19, 64-8
|
|
sub r19, r18
|
|
breq sha1_lastBlock_insert_length
|
|
clr r1
|
|
1:
|
|
st Z+, r1 /* r1 is still zero */
|
|
dec r19
|
|
brne 1b
|
|
|
|
; rjmp sha1_lastBlock_epilog
|
|
sha1_lastBlock_insert_length:
|
|
movw r26, r24 /* X points to state */
|
|
adiw r26, 5*4 /* X points to (state.length) */
|
|
adiw r30, 8 /* Z points one after the last byte of block */
|
|
ld r0, X+
|
|
add r0, r20
|
|
st -Z, r0
|
|
ld r0, X+
|
|
adc r0, r21
|
|
st -Z, r0
|
|
ldi r19, 6
|
|
1:
|
|
ld r0, X+
|
|
adc r0, r1
|
|
st -Z, r0
|
|
dec r19
|
|
brne 1b
|
|
|
|
sbiw r30, 64-8
|
|
movw r22, r30
|
|
rcall sha1_nextBlock
|
|
|
|
sha1_lastBlock_epilog:
|
|
in r30, SPL
|
|
in r31, SPH
|
|
in r1, SREG
|
|
adiw r30, 63 ; lo8(64)
|
|
adiw r30, 1 ; hi8(64)
|
|
cli
|
|
out SPL, r30
|
|
out SPH, r31
|
|
out SREG,r1
|
|
clr r1
|
|
clr r0
|
|
ret
|
|
|
|
/**/
|
|
;###########################################################
|
|
|
|
.global sha1_nextBlock
|
|
; === sha1_nextBlock ===
|
|
; this is the core function for calculating SHA-1 hashes
|
|
; param1: the 16-bit pointer to sha1_ctx structure
|
|
; given in r25,r24 (r25 is most significant)
|
|
; param2: an 16-bit pointer to 64 byte block to hash
|
|
; given in r23,r22
|
|
sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
|
|
|
|
xtmp = 0
|
|
xNULL = 1
|
|
W1 = 10
|
|
W2 = 11
|
|
T1 = 12
|
|
T2 = 13
|
|
T3 = 14
|
|
T4 = 15
|
|
LoopC = 16
|
|
S = 17
|
|
tmp1 = 18
|
|
tmp2 = 19
|
|
tmp3 = 20
|
|
tmp4 = 21
|
|
F1 = 22
|
|
F2 = 23
|
|
F3 = 24
|
|
F4 = 25
|
|
|
|
/* byteorder: high number <--> high significance */
|
|
sha1_nextBlock:
|
|
; initial, let's make some space ready for local vars
|
|
/* replace push & pop by mem ops? */
|
|
push r10
|
|
push r11
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
push r16
|
|
push r17
|
|
push r28
|
|
push r29
|
|
in r20, SPL
|
|
in r21, SPH
|
|
movw r18, r20 ;backup SP
|
|
; movw r26, r20 ; X points to free space on stack /* maybe removeable? */
|
|
movw r30, r22 ; Z points to message
|
|
subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
|
|
sbci r21, hi8(sha1_nextBlock_localSpace)
|
|
movw r26, r20 ; X points to free space on stack
|
|
in r0, SREG
|
|
cli ; we want to be uninterrupted while updating SP
|
|
out SPL, r20
|
|
out SPH, r21
|
|
out SREG, r0
|
|
|
|
push r18
|
|
push r19 /* push old SP on new stack */
|
|
push r24
|
|
push r25 /* param1 will be needed later */
|
|
|
|
/* load a[] with state */
|
|
movw 28, r24 /* load pointer to state in Y */
|
|
adiw r26, 1 ; X++
|
|
|
|
ldi LoopC, 5*4
|
|
1: ld tmp1, Y+
|
|
st X+, tmp1
|
|
dec LoopC
|
|
brne 1b
|
|
|
|
movw W1, r26 /* save pointer to w[0] */
|
|
/* load w[] with endian fixed message */
|
|
/* we might also use the changeendian32() function at bottom */
|
|
movw r30, r22 /* mv param2 (ponter to msg) to Z */
|
|
ldi LoopC, 16
|
|
1:
|
|
ldd tmp1, Z+3
|
|
st X+, tmp1
|
|
ldd tmp1, Z+2
|
|
st X+, tmp1
|
|
ldd tmp1, Z+1
|
|
st X+, tmp1
|
|
ld tmp1, Z
|
|
st X+, tmp1
|
|
adiw r30, 4
|
|
dec LoopC
|
|
brne 1b
|
|
|
|
;clr LoopC /* LoopC is named t in FIPS 180-2 */
|
|
clr xtmp
|
|
sha1_nextBlock_mainloop:
|
|
mov S, LoopC
|
|
lsl S
|
|
lsl S
|
|
andi S, 0x3C /* S is a bytepointer so *4 */
|
|
/* load w[s] */
|
|
movw r26, W1
|
|
add r26, S /* X points at w[s] */
|
|
adc r27, xNULL
|
|
ld T1, X+
|
|
ld T2, X+
|
|
ld T3, X+
|
|
ld T4, X+
|
|
|
|
/**/
|
|
push r26
|
|
push r27
|
|
push T4
|
|
push T3
|
|
push T2
|
|
push T1
|
|
in r26, SPL
|
|
in r27, SPH
|
|
adiw r26, 1
|
|
dbg_hexdump 4
|
|
pop T1
|
|
pop T2
|
|
pop T3
|
|
pop T4
|
|
pop r27
|
|
pop r26
|
|
/**/
|
|
|
|
cpi LoopC, 16
|
|
brlt sha1_nextBlock_mainloop_core
|
|
/* update w[s] */
|
|
ldi tmp1, 2*4
|
|
rcall 1f
|
|
ldi tmp1, 8*4
|
|
rcall 1f
|
|
ldi tmp1, 13*4
|
|
rcall 1f
|
|
rjmp 2f
|
|
1: /* this might be "outsourced" to save the jump above */
|
|
add tmp1, S
|
|
andi tmp1, 0x3f
|
|
movw r26, W1
|
|
add r26, tmp1
|
|
adc r27, xNULL
|
|
ld tmp2, X+
|
|
eor T1, tmp2
|
|
ld tmp2, X+
|
|
eor T2, tmp2
|
|
ld tmp2, X+
|
|
eor T3, tmp2
|
|
ld tmp2, X+
|
|
eor T4, tmp2
|
|
ret
|
|
2: /* now we just hav to do a ROTL(T) and save T back */
|
|
mov tmp2, T4
|
|
rol tmp2
|
|
rol T1
|
|
rol T2
|
|
rol T3
|
|
rol T4
|
|
movw r26, W1
|
|
add r26, S
|
|
adc r27, xNULL
|
|
st X+, T1
|
|
st X+, T2
|
|
st X+, T3
|
|
st X+, T4
|
|
|
|
sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/
|
|
/* T already contains w[s] */
|
|
movw r26, W1
|
|
sbiw r26, 4*1 /* X points at a[4] aka e */
|
|
ld tmp1, X+
|
|
add T1, tmp1
|
|
ld tmp1, X+
|
|
adc T2, tmp1
|
|
ld tmp1, X+
|
|
adc T3, tmp1
|
|
ld tmp1, X+
|
|
adc T4, tmp1 /* T = w[s]+e */
|
|
sbiw r26, 4*5 /* X points at a[0] aka a */
|
|
ld F1, X+
|
|
ld F2, X+
|
|
ld F3, X+
|
|
ld F4, X+
|
|
mov tmp1, F4 /* X points at a[1] aka b */
|
|
ldi tmp2, 5
|
|
1:
|
|
rol tmp1
|
|
rol F1
|
|
rol F2
|
|
rol F3
|
|
rol F4
|
|
dec tmp2
|
|
brne 1b
|
|
|
|
add T1, F1
|
|
adc T2, F2
|
|
adc T3, F3
|
|
adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
|
|
|
|
/* now we have to do this fucking conditional stuff */
|
|
ldi r30, lo8(sha1_nextBlock_xTable)
|
|
ldi r31, hi8(sha1_nextBlock_xTable)
|
|
add r30, xtmp
|
|
adc r31, xNULL
|
|
lpm tmp1, Z
|
|
cp tmp1, LoopC
|
|
brne 1f
|
|
inc xtmp
|
|
1: ldi r30, lo8(sha1_nextBlock_KTable)
|
|
ldi r31, hi8(sha1_nextBlock_KTable)
|
|
lsl xtmp
|
|
lsl xtmp
|
|
add r30, xtmp
|
|
adc r31, xNULL
|
|
lsr xtmp
|
|
lsr xtmp
|
|
|
|
lpm tmp1, Z+
|
|
add T1, tmp1
|
|
lpm tmp1, Z+
|
|
adc T2, tmp1
|
|
lpm tmp1, Z+
|
|
adc T3, tmp1
|
|
lpm tmp1, Z+
|
|
adc T4, tmp1
|
|
/* T = ROTL(a,5) + e + kt + w[s] */
|
|
|
|
/* Z-4 is just pointing to kt ... */
|
|
movw r28, r26 /* copy X in Y */
|
|
adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
|
|
lsr r31
|
|
ror r30
|
|
|
|
icall
|
|
mov F1, tmp1
|
|
icall
|
|
mov F2, tmp1
|
|
icall
|
|
mov F3, tmp1
|
|
icall
|
|
|
|
add T1, F1
|
|
adc T2, F2
|
|
adc T3, F3
|
|
adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
|
|
/* X points still at a[1] aka b, Y points at a[2] aka c */
|
|
/* update a[] */
|
|
sha1_nextBlock_update_a:
|
|
/*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
|
|
//adiw r28, 3*4 /* Y should point at a[4] aka e */
|
|
movw r28, W1
|
|
sbiw r28, 4
|
|
|
|
ldi tmp2, 4*4
|
|
1:
|
|
ld tmp1, -Y
|
|
std Y+4, tmp1
|
|
dec tmp2
|
|
brne 1b
|
|
/* Y points at a[0] aka a*/
|
|
|
|
movw r28, W1
|
|
sbiw r28, 5*4
|
|
/* store T in a[0] aka a */
|
|
st Y+, T1
|
|
st Y+, T2
|
|
st Y+, T3
|
|
st Y+, T4
|
|
/* Y points at a[1] aka b*/
|
|
|
|
/* rotate c */
|
|
ldd T1, Y+1*4
|
|
ldd T2, Y+1*4+1
|
|
ldd T3, Y+1*4+2
|
|
ldd T4, Y+1*4+3
|
|
mov tmp1, T1
|
|
ldi tmp2, 2
|
|
1: ror tmp1
|
|
ror T4
|
|
ror T3
|
|
ror T2
|
|
ror T1
|
|
dec tmp2
|
|
brne 1b
|
|
std Y+1*4+0, T1
|
|
std Y+1*4+1, T2
|
|
std Y+1*4+2, T3
|
|
std Y+1*4+3, T4
|
|
|
|
push r27
|
|
push r26
|
|
movw r26, W1
|
|
sbiw r26, 4*5
|
|
dbg_hexdump 4*5
|
|
pop r26
|
|
pop r27
|
|
|
|
inc LoopC
|
|
cpi LoopC, 80
|
|
brge 1f
|
|
rjmp sha1_nextBlock_mainloop
|
|
/**************************************/
|
|
1:
|
|
/* littel patch */
|
|
sbiw r28, 4
|
|
|
|
/* add a[] to state and inc length */
|
|
pop r27
|
|
pop r26 /* now X points to state (and Y still at a[0]) */
|
|
ldi tmp4, 5
|
|
1: clc
|
|
ldi tmp3, 4
|
|
2: ld tmp1, X
|
|
ld tmp2, Y+
|
|
adc tmp1, tmp2
|
|
st X+, tmp1
|
|
dec tmp3
|
|
brne 2b
|
|
dec tmp4
|
|
brne 1b
|
|
|
|
/* now length += 512 */
|
|
adiw r26, 1 /* we skip the least significant byte */
|
|
ld tmp1, X
|
|
ldi tmp2, hi8(512) /* 2 */
|
|
add tmp1, tmp2
|
|
st X+, tmp1
|
|
ldi tmp2, 6
|
|
1:
|
|
ld tmp1, X
|
|
adc tmp1, xNULL
|
|
st X+, tmp1
|
|
dec tmp2
|
|
brne 1b
|
|
|
|
; EPILOG
|
|
sha1_nextBlock_epilog:
|
|
/* now we should clean up the stack */
|
|
pop r21
|
|
pop r20
|
|
in r0, SREG
|
|
cli ; we want to be uninterrupted while updating SP
|
|
out SPL, r20
|
|
out SPH, r21
|
|
out SREG, r0
|
|
|
|
clr r1
|
|
pop r29
|
|
pop r28
|
|
pop r17
|
|
pop r16
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
pop r11
|
|
pop r10
|
|
ret
|
|
|
|
sha1_nextBlock_xTable:
|
|
.byte 20,40,60,0
|
|
sha1_nextBlock_KTable:
|
|
.int 0x5a827999
|
|
.int 0x6ed9eba1
|
|
.int 0x8f1bbcdc
|
|
.int 0xca62c1d6
|
|
sha1_nextBlock_JumpTable:
|
|
rjmp sha1_nextBlock_Ch
|
|
nop
|
|
rjmp sha1_nextBlock_Parity
|
|
nop
|
|
rjmp sha1_nextBlock_Maj
|
|
nop
|
|
rjmp sha1_nextBlock_Parity
|
|
|
|
/* X and Y still point at a[1] aka b ; return value in tmp1 */
|
|
sha1_nextBlock_Ch:
|
|
ld tmp1, Y+
|
|
mov tmp2, tmp1
|
|
com tmp2
|
|
ldd tmp3, Y+3 /* load from c */
|
|
and tmp1, tmp3
|
|
ldd tmp3, Y+7 /* load from d */
|
|
and tmp2, tmp3
|
|
eor tmp1, tmp2
|
|
ret
|
|
|
|
sha1_nextBlock_Maj:
|
|
ld tmp1, Y+
|
|
mov tmp2, tmp1
|
|
ldd tmp3, Y+3 /* load from c */
|
|
and tmp1, tmp3
|
|
ldd tmp4, Y+7 /* load from d */
|
|
and tmp2, tmp4
|
|
eor tmp1, tmp2
|
|
and tmp3, tmp4
|
|
eor tmp1, tmp3
|
|
ret
|
|
|
|
sha1_nextBlock_Parity:
|
|
ld tmp1, Y+
|
|
ldd tmp2, Y+3 /* load from c */
|
|
eor tmp1, tmp2
|
|
ldd tmp2, Y+7 /* load from d */
|
|
eor tmp1, tmp2
|
|
ret
|
|
/*
|
|
ch_str: .asciz "\r\nCh"
|
|
maj_str: .asciz "\r\nMaj"
|
|
parity_str: .asciz "\r\nParity"
|
|
*/
|
|
;###########################################################
|
|
|
|
.global sha1_init
|
|
;void sha1_init(sha1_ctx_t *state){
|
|
; DEBUG_S("\r\nSHA1_INIT");
|
|
; state->h[0] = 0x67452301;
|
|
; state->h[1] = 0xefcdab89;
|
|
; state->h[2] = 0x98badcfe;
|
|
; state->h[3] = 0x10325476;
|
|
; state->h[4] = 0xc3d2e1f0;
|
|
; state->length = 0;
|
|
;}
|
|
; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
|
|
; modifys: Z(r30,r31), Func1, r22
|
|
sha1_init:
|
|
movw r26, r24 ; (24,25) --> (26,27) load X with param1
|
|
ldi r30, lo8((sha1_init_vector))
|
|
ldi r31, hi8((sha1_init_vector))
|
|
ldi r22, 5*4 /* bytes to copy */
|
|
sha1_init_vloop:
|
|
lpm r23, Z+
|
|
st X+, r23
|
|
dec r22
|
|
brne sha1_init_vloop
|
|
ldi r22, 8
|
|
sha1_init_lloop:
|
|
st X+, r1
|
|
dec r22
|
|
brne sha1_init_lloop
|
|
ret
|
|
|
|
sha1_init_vector:
|
|
.int 0x67452301;
|
|
.int 0xefcdab89;
|
|
.int 0x98badcfe;
|
|
.int 0x10325476;
|
|
.int 0xc3d2e1f0;
|
|
|