2008-05-26 19:13:21 +00:00
|
|
|
/* sha256-asm.S */
|
|
|
|
/*
|
|
|
|
This file is part of the Crypto-avr-lib/microcrypt-lib.
|
|
|
|
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2008-07-03 04:11:34 +00:00
|
|
|
/*
|
|
|
|
* Author: Daniel Otte
|
|
|
|
*
|
|
|
|
* License: GPLv3 or later
|
|
|
|
*/
|
|
|
|
; sha-256 implementation in assembler
|
|
|
|
SHA256_BLOCK_BITS = 512
|
|
|
|
SHA256_HASH_BITS = 256
|
|
|
|
|
|
|
|
.macro precall
|
|
|
|
/* push r18 - r27, r30 - r31*/
|
|
|
|
push r0
|
|
|
|
push r1
|
|
|
|
push r18
|
|
|
|
push r19
|
|
|
|
push r20
|
|
|
|
push r21
|
|
|
|
push r22
|
|
|
|
push r23
|
|
|
|
push r24
|
|
|
|
push r25
|
|
|
|
push r26
|
|
|
|
push r27
|
|
|
|
push r30
|
|
|
|
push r31
|
|
|
|
clr r1
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro postcall
|
|
|
|
pop r31
|
|
|
|
pop r30
|
|
|
|
pop r27
|
|
|
|
pop r26
|
|
|
|
pop r25
|
|
|
|
pop r24
|
|
|
|
pop r23
|
|
|
|
pop r22
|
|
|
|
pop r21
|
|
|
|
pop r20
|
|
|
|
pop r19
|
|
|
|
pop r18
|
|
|
|
pop r1
|
|
|
|
pop r0
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
|
|
.macro hexdump length
|
|
|
|
push r27
|
|
|
|
push r26
|
|
|
|
ldi r25, '\r'
|
|
|
|
mov r24, r25
|
|
|
|
call uart_putc
|
|
|
|
ldi r25, '\n'
|
|
|
|
mov r24, r25
|
|
|
|
call uart_putc
|
|
|
|
pop r26
|
|
|
|
pop r27
|
|
|
|
movw r24, r26
|
|
|
|
.if \length > 16
|
|
|
|
ldi r22, lo8(16)
|
|
|
|
ldi r23, hi8(16)
|
|
|
|
push r27
|
|
|
|
push r26
|
|
|
|
call uart_hexdump
|
|
|
|
pop r26
|
|
|
|
pop r27
|
|
|
|
adiw r26, 16
|
|
|
|
hexdump \length-16
|
|
|
|
.else
|
|
|
|
ldi r22, lo8(\length)
|
|
|
|
ldi r23, hi8(\length)
|
|
|
|
call uart_hexdump
|
|
|
|
.endif
|
|
|
|
.endm
|
|
|
|
|
|
|
|
/* X points to Block */
|
|
|
|
.macro dbg_hexdump length
|
|
|
|
precall
|
|
|
|
hexdump \length
|
|
|
|
postcall
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.section .text
|
|
|
|
|
|
|
|
SPL = 0x3D
|
|
|
|
SPH = 0x3E
|
|
|
|
SREG = 0x3F
|
|
|
|
|
|
|
|
|
|
|
|
;
|
|
|
|
;sha256_ctx_t is:
|
|
|
|
;
|
|
|
|
; [h0][h1][h2][h3][h4][h5][h6][h7][length]
|
|
|
|
; hn is 32 bit large, length is 64 bit large
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global sha256_ctx2hash
|
|
|
|
; === sha256_ctx2hash ===
|
|
|
|
; this function converts a state into a normal hash (bytestring)
|
|
|
|
; param1: the 16-bit destination pointer
|
|
|
|
; given in r25,r24 (r25 is most significant)
|
|
|
|
; param2: the 16-bit pointer to sha256_ctx structure
|
|
|
|
; given in r23,r22
|
|
|
|
sha256_ctx2hash:
|
|
|
|
movw r26, r22
|
|
|
|
movw r30, r24
|
|
|
|
ldi r21, 8
|
|
|
|
sbiw r26, 4
|
|
|
|
1:
|
|
|
|
ldi r20, 4
|
|
|
|
adiw r26, 8
|
|
|
|
2:
|
|
|
|
ld r0, -X
|
|
|
|
st Z+, r0
|
|
|
|
dec r20
|
|
|
|
brne 2b
|
|
|
|
|
|
|
|
dec r21
|
|
|
|
brne 1b
|
|
|
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global sha256
|
|
|
|
; === sha256 ===
|
|
|
|
; this function calculates SHA-256 hashes from messages in RAM
|
|
|
|
; param1: the 16-bit hash destination pointer
|
|
|
|
; given in r25,r24 (r25 is most significant)
|
|
|
|
; param2: the 16-bit pointer to message
|
|
|
|
; given in r23,r22
|
|
|
|
; param3: 32-bit length value (length of message in bits)
|
|
|
|
; given in r21,r20,r19,r18
|
|
|
|
sha256:
|
|
|
|
sha256_prolog:
|
|
|
|
push r8
|
|
|
|
push r9
|
|
|
|
push r10
|
|
|
|
push r11
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r16
|
|
|
|
push r17
|
|
|
|
in r16, SPL
|
|
|
|
in r17, SPH
|
|
|
|
subi r16, 8*4+8
|
|
|
|
sbci r17, 0
|
|
|
|
in r0, SREG
|
|
|
|
cli
|
|
|
|
out SPL, r16
|
|
|
|
out SPH, r17
|
|
|
|
out SREG, r0
|
|
|
|
|
|
|
|
push r25
|
|
|
|
push r24
|
|
|
|
inc r16
|
|
|
|
adc r17, r1
|
|
|
|
|
|
|
|
movw r8, r18 /* backup of length*/
|
|
|
|
movw r10, r20
|
|
|
|
|
|
|
|
movw r12, r22 /* backup pf msg-ptr */
|
|
|
|
|
|
|
|
movw r24, r16
|
|
|
|
rcall sha256_init
|
|
|
|
/* if length >= 512 */
|
|
|
|
1:
|
|
|
|
tst r11
|
|
|
|
brne 4f
|
|
|
|
tst r10
|
|
|
|
brne 4f
|
|
|
|
mov r19, r9
|
|
|
|
cpi r19, 0x02
|
|
|
|
brlo 4f
|
|
|
|
|
|
|
|
movw r24, r16
|
|
|
|
movw r22, r12
|
|
|
|
rcall sha256_nextBlock
|
|
|
|
ldi r19, 0x64
|
|
|
|
add r22, r19
|
|
|
|
adc r23, r1
|
|
|
|
/* length -= 512 */
|
|
|
|
ldi r19, 0x02
|
|
|
|
sub r9, r19
|
|
|
|
sbc r10, r1
|
|
|
|
sbc r11, r1
|
|
|
|
rjmp 1b
|
|
|
|
|
|
|
|
4:
|
|
|
|
movw r24, r16
|
|
|
|
movw r22, r12
|
|
|
|
movw r20, r8
|
|
|
|
rcall sha256_lastBlock
|
|
|
|
|
|
|
|
pop r24
|
|
|
|
pop r25
|
|
|
|
movw r22, r16
|
|
|
|
rcall sha256_ctx2hash
|
|
|
|
|
|
|
|
sha256_epilog:
|
|
|
|
in r30, SPL
|
|
|
|
in r31, SPH
|
|
|
|
adiw r30, 8*4+8
|
|
|
|
in r0, SREG
|
|
|
|
cli
|
|
|
|
out SPL, r30
|
|
|
|
out SPH, r31
|
|
|
|
out SREG, r0
|
|
|
|
pop r17
|
|
|
|
pop r16
|
|
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
pop r11
|
|
|
|
pop r10
|
|
|
|
pop r9
|
|
|
|
pop r8
|
|
|
|
ret
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
|
|
|
|
; block MUST NOT be larger than 64 bytes
|
|
|
|
|
|
|
|
.global sha256_lastBlock
|
|
|
|
; === sha256_lastBlock ===
|
|
|
|
; this function does padding & Co. for calculating SHA-256 hashes
|
|
|
|
; param1: the 16-bit pointer to sha256_ctx structure
|
|
|
|
; given in r25,r24 (r25 is most significant)
|
|
|
|
; param2: an 16-bit pointer to 64 byte block to hash
|
|
|
|
; given in r23,r22
|
|
|
|
; param3: an 16-bit integer specifing length of block in bits
|
|
|
|
; given in r21,r20
|
|
|
|
sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
|
|
|
|
|
|
|
|
|
|
|
|
sha256_lastBlock:
|
|
|
|
tst r20
|
|
|
|
brne sha256_lastBlock_prolog
|
|
|
|
cpi r21, 0x02
|
|
|
|
brne sha256_lastBlock_prolog
|
|
|
|
push r25
|
|
|
|
push r24
|
|
|
|
push r23
|
|
|
|
push r22
|
|
|
|
rcall sha256_nextBlock
|
|
|
|
pop r22
|
|
|
|
pop r23
|
|
|
|
pop r24
|
|
|
|
pop r25
|
|
|
|
clr r21
|
|
|
|
clr r22
|
|
|
|
sha256_lastBlock_prolog:
|
|
|
|
/* allocate space on stack */
|
|
|
|
in r30, SPL
|
|
|
|
in r31, SPH
|
|
|
|
in r1, SREG
|
|
|
|
subi r30, lo8(64)
|
|
|
|
sbci r31, hi8(64)
|
|
|
|
cli
|
|
|
|
out SPL, r30
|
|
|
|
out SPH, r31
|
|
|
|
out SREG,r1
|
|
|
|
|
|
|
|
adiw r30, 1 /* SP points to next free byte on stack */
|
|
|
|
mov r18, r20 /* r20 = LSB(length) */
|
|
|
|
lsr r18
|
|
|
|
lsr r18
|
|
|
|
lsr r18
|
|
|
|
bst r21, 0 /* may be we should explain this ... */
|
|
|
|
bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
|
|
|
|
|
|
|
|
|
|
|
|
movw r26, r22 /* X points to begin of msg */
|
|
|
|
tst r18
|
|
|
|
breq sha256_lastBlock_post_copy
|
|
|
|
mov r1, r18
|
|
|
|
sha256_lastBlock_copy_loop:
|
|
|
|
ld r0, X+
|
|
|
|
st Z+, r0
|
|
|
|
dec r1
|
|
|
|
brne sha256_lastBlock_copy_loop
|
|
|
|
sha256_lastBlock_post_copy:
|
|
|
|
sha256_lastBlock_insert_stuffing_bit:
|
|
|
|
ldi r19, 0x80
|
|
|
|
mov r0,r19
|
|
|
|
ldi r19, 0x07
|
|
|
|
and r19, r20 /* if we are in bitmode */
|
|
|
|
breq 2f /* no bitmode */
|
|
|
|
1:
|
|
|
|
lsr r0
|
|
|
|
dec r19
|
|
|
|
brne 1b
|
|
|
|
ld r19, X
|
|
|
|
/* maybe we should do some ANDing here, just for safety */
|
|
|
|
or r0, r19
|
|
|
|
2:
|
|
|
|
st Z+, r0
|
|
|
|
inc r18
|
|
|
|
|
|
|
|
/* checking stuff here */
|
|
|
|
cpi r18, 64-8+1
|
|
|
|
brsh 0f
|
|
|
|
rjmp sha256_lastBlock_insert_zeros
|
|
|
|
0:
|
|
|
|
/* oh shit, we landed here */
|
|
|
|
/* first we have to fill it up with zeros */
|
|
|
|
ldi r19, 64
|
|
|
|
sub r19, r18
|
|
|
|
breq 2f
|
|
|
|
1:
|
|
|
|
st Z+, r1
|
|
|
|
dec r19
|
|
|
|
brne 1b
|
|
|
|
2:
|
|
|
|
sbiw r30, 63
|
|
|
|
sbiw r30, 1
|
|
|
|
movw r22, r30
|
|
|
|
|
|
|
|
push r31
|
|
|
|
push r30
|
|
|
|
push r25
|
|
|
|
push r24
|
|
|
|
push r21
|
|
|
|
push r20
|
|
|
|
rcall sha256_nextBlock
|
|
|
|
pop r20
|
|
|
|
pop r21
|
|
|
|
pop r24
|
|
|
|
pop r25
|
|
|
|
pop r30
|
|
|
|
pop r31
|
|
|
|
|
|
|
|
/* now we should subtract 512 from length */
|
|
|
|
movw r26, r24
|
|
|
|
adiw r26, 4*8+1 /* we can skip the lowest byte */
|
|
|
|
ld r19, X
|
|
|
|
subi r19, hi8(512)
|
|
|
|
st X+, r19
|
|
|
|
ldi r18, 6
|
|
|
|
1:
|
|
|
|
ld r19, X
|
|
|
|
sbci r19, 0
|
|
|
|
st X+, r19
|
|
|
|
dec r18
|
|
|
|
brne 1b
|
|
|
|
|
|
|
|
; clr r18 /* not neccessary ;-) */
|
|
|
|
/* reset Z pointer to begin of block */
|
|
|
|
|
|
|
|
sha256_lastBlock_insert_zeros:
|
|
|
|
ldi r19, 64-8
|
|
|
|
sub r19, r18
|
|
|
|
breq sha256_lastBlock_insert_length
|
|
|
|
clr r1
|
|
|
|
1:
|
|
|
|
st Z+, r1 /* r1 is still zero */
|
|
|
|
dec r19
|
|
|
|
brne 1b
|
|
|
|
|
|
|
|
; rjmp sha256_lastBlock_epilog
|
|
|
|
sha256_lastBlock_insert_length:
|
|
|
|
movw r26, r24 /* X points to state */
|
|
|
|
adiw r26, 8*4 /* X points to (state.length) */
|
|
|
|
adiw r30, 8 /* Z points one after the last byte of block */
|
|
|
|
ld r0, X+
|
|
|
|
add r0, r20
|
|
|
|
st -Z, r0
|
|
|
|
ld r0, X+
|
|
|
|
adc r0, r21
|
|
|
|
st -Z, r0
|
|
|
|
ldi r19, 6
|
|
|
|
1:
|
|
|
|
ld r0, X+
|
|
|
|
adc r0, r1
|
|
|
|
st -Z, r0
|
|
|
|
dec r19
|
|
|
|
brne 1b
|
|
|
|
|
|
|
|
sbiw r30, 64-8
|
|
|
|
movw r22, r30
|
|
|
|
rcall sha256_nextBlock
|
|
|
|
|
|
|
|
sha256_lastBlock_epilog:
|
|
|
|
in r30, SPL
|
|
|
|
in r31, SPH
|
|
|
|
in r1, SREG
|
|
|
|
adiw r30, 63 ; lo8(64)
|
|
|
|
adiw r30, 1 ; hi8(64)
|
|
|
|
cli
|
|
|
|
out SPL, r30
|
|
|
|
out SPH, r31
|
|
|
|
out SREG,r1
|
|
|
|
clr r1
|
|
|
|
clr r0
|
|
|
|
ret
|
|
|
|
|
|
|
|
/**/
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global sha256_nextBlock
|
|
|
|
; === sha256_nextBlock ===
|
|
|
|
; this is the core function for calculating SHA-256 hashes
|
|
|
|
; param1: the 16-bit pointer to sha256_ctx structure
|
|
|
|
; given in r25,r24 (r25 is most significant)
|
|
|
|
; param2: an 16-bit pointer to 64 byte block to hash
|
|
|
|
; given in r23,r22
|
|
|
|
sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
|
|
|
|
|
|
|
|
Bck1 = 12
|
|
|
|
Bck2 = 13
|
|
|
|
Bck3 = 14
|
|
|
|
Bck4 = 15
|
|
|
|
Func1 = 22
|
|
|
|
Func2 = 23
|
|
|
|
Func3 = 24
|
|
|
|
Func4 = 25
|
|
|
|
Accu1 = 16
|
|
|
|
Accu2 = 17
|
|
|
|
Accu3 = 18
|
|
|
|
Accu4 = 19
|
|
|
|
XAccu1 = 8
|
|
|
|
XAccu2 = 9
|
|
|
|
XAccu3 = 10
|
|
|
|
XAccu4 = 11
|
|
|
|
T1 = 4
|
|
|
|
T2 = 5
|
|
|
|
T3 = 6
|
|
|
|
T4 = 7
|
|
|
|
LoopC = 1
|
|
|
|
/* byteorder: high number <--> high significance */
|
|
|
|
sha256_nextBlock:
|
|
|
|
; initial, let's make some space ready for local vars
|
|
|
|
push r4 /* replace push & pop by mem ops? */
|
|
|
|
push r5
|
|
|
|
push r6
|
|
|
|
push r7
|
|
|
|
push r8
|
|
|
|
push r9
|
|
|
|
push r10
|
|
|
|
push r11
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
push r16
|
|
|
|
push r17
|
|
|
|
push r28
|
|
|
|
push r29
|
|
|
|
in r20, SPL
|
|
|
|
in r21, SPH
|
|
|
|
movw r18, r20 ;backup SP
|
|
|
|
; movw r26, r20 ; X points to free space on stack
|
|
|
|
movw r30, r22 ; Z points to message
|
|
|
|
subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
|
|
|
|
sbci r21, hi8(sha256_nextBlock_localSpace)
|
|
|
|
movw r26, r20 ; X points to free space on stack
|
|
|
|
in r0, SREG
|
|
|
|
cli ; we want to be uninterrupted while updating SP
|
|
|
|
out SPL, r20
|
|
|
|
out SPH, r21
|
|
|
|
out SREG, r0
|
|
|
|
push r18
|
|
|
|
push r19
|
|
|
|
push r24
|
|
|
|
push r25 /* param1 will be needed later */
|
|
|
|
; now we fill the w array with message (think about endianess)
|
|
|
|
adiw r26, 1 ; X++
|
|
|
|
ldi r20, 16
|
|
|
|
sha256_nextBlock_wcpyloop:
|
|
|
|
ld r23, Z+
|
|
|
|
ld r22, Z+
|
|
|
|
ld r19, Z+
|
|
|
|
ld r18, Z+
|
|
|
|
st X+, r18
|
|
|
|
st X+, r19
|
|
|
|
st X+, r22
|
|
|
|
st X+, r23
|
|
|
|
dec r20
|
|
|
|
brne sha256_nextBlock_wcpyloop
|
|
|
|
/* for (i=16; i<64; ++i){
|
|
|
|
w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
|
|
|
|
} */
|
|
|
|
/* r25,r24,r23,r24 (r21,r20) are function values
|
|
|
|
r19,r18,r17,r16 are the accumulator
|
|
|
|
r15,r14,r13,rBck1 are backup1
|
|
|
|
r11,r10,r9 ,r8 are xor accu
|
|
|
|
r1 is round counter */
|
|
|
|
|
|
|
|
ldi r20, 64-16
|
|
|
|
mov LoopC, r20
|
|
|
|
sha256_nextBlock_wcalcloop:
|
|
|
|
movw r30, r26 ; cp X to Z
|
|
|
|
sbiw r30, 63
|
|
|
|
sbiw r30, 1 ; substract 64 = 16*4
|
|
|
|
ld Accu1, Z+
|
|
|
|
ld Accu2, Z+
|
|
|
|
ld Accu3, Z+
|
|
|
|
ld Accu4, Z+ /* w[i] = w[i-16] */
|
|
|
|
ld Bck1, Z+
|
|
|
|
ld Bck2, Z+
|
|
|
|
ld Bck3, Z+
|
|
|
|
ld Bck4, Z+ /* backup = w[i-15] */
|
|
|
|
/* now sigma 0 */
|
|
|
|
mov Func1, Bck2
|
|
|
|
mov Func2, Bck3
|
|
|
|
mov Func3, Bck4
|
|
|
|
mov Func4, Bck1 /* prerotated by 8 */
|
|
|
|
ldi r20, 1
|
|
|
|
rcall bitrotl
|
|
|
|
movw XAccu1, Func1
|
|
|
|
movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */
|
|
|
|
movw Func1, Bck3
|
|
|
|
movw Func3, Bck1 /* prerotated by 16 */
|
|
|
|
ldi r20, 2
|
|
|
|
rcall bitrotr
|
|
|
|
eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/
|
|
|
|
eor XAccu2, Func2
|
|
|
|
eor XAccu3, Func3
|
|
|
|
eor XAccu4, Func4
|
|
|
|
ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/
|
|
|
|
sigma0_shr:
|
|
|
|
lsr Bck4
|
|
|
|
ror Bck3
|
|
|
|
ror Bck2
|
|
|
|
ror Bck1
|
|
|
|
dec Func2
|
|
|
|
brne sigma0_shr
|
|
|
|
eor XAccu1, Bck1
|
|
|
|
eor XAccu2, Bck2
|
|
|
|
eor XAccu3, Bck3
|
|
|
|
eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
|
|
|
|
add Accu1, XAccu1
|
|
|
|
adc Accu2, XAccu2
|
|
|
|
adc Accu3, XAccu3
|
|
|
|
adc Accu4, XAccu4 /* finished with sigma0 */
|
|
|
|
ldd Func1, Z+7*4 /* now accu += w[i-7] */
|
|
|
|
ldd Func2, Z+7*4+1
|
|
|
|
ldd Func3, Z+7*4+2
|
|
|
|
ldd Func4, Z+7*4+3
|
|
|
|
add Accu1, Func1
|
|
|
|
adc Accu2, Func2
|
|
|
|
adc Accu3, Func3
|
|
|
|
adc Accu4, Func4
|
|
|
|
ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
|
|
|
|
ldd Bck2, Z+12*4+1
|
|
|
|
ldd Bck3, Z+12*4+2
|
|
|
|
ldd Bck4, Z+12*4+3
|
|
|
|
/* now sigma 1 */
|
|
|
|
movw Func1, Bck3
|
|
|
|
movw Func3, Bck1 /* prerotated by 16 */
|
|
|
|
ldi r20, 1
|
|
|
|
rcall bitrotr
|
|
|
|
movw XAccu3, Func3
|
|
|
|
movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */
|
|
|
|
; movw Func1, Bck3
|
|
|
|
; movw Func3, Bck1 /* prerotated by 16 */
|
|
|
|
ldi r20, 2
|
|
|
|
rcall bitrotr
|
|
|
|
eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/
|
|
|
|
eor XAccu2, Func2
|
|
|
|
eor XAccu3, Func3
|
|
|
|
eor XAccu4, Func4
|
|
|
|
ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
|
|
|
|
sigma1_shr:
|
|
|
|
lsr Bck4
|
|
|
|
ror Bck3
|
|
|
|
ror Bck2
|
|
|
|
dec Func2
|
|
|
|
brne sigma1_shr
|
|
|
|
eor XAccu1, Bck2
|
|
|
|
eor XAccu2, Bck3
|
|
|
|
eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
|
|
|
|
add Accu1, XAccu1
|
|
|
|
adc Accu2, XAccu2
|
|
|
|
adc Accu3, XAccu3
|
|
|
|
adc Accu4, XAccu4 /* finished with sigma0 */
|
|
|
|
/* now let's store the shit */
|
|
|
|
st X+, Accu1
|
|
|
|
st X+, Accu2
|
|
|
|
st X+, Accu3
|
|
|
|
st X+, Accu4
|
|
|
|
dec LoopC
|
|
|
|
breq 3f ; skip if zero
|
|
|
|
rjmp sha256_nextBlock_wcalcloop
|
|
|
|
3:
|
|
|
|
/* we are finished with w array X points one byte post w */
|
|
|
|
/* init a array */
|
|
|
|
pop r31
|
|
|
|
pop r30
|
|
|
|
push r30
|
|
|
|
push r31
|
|
|
|
ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
|
|
|
|
init_a_array:
|
|
|
|
ld r1, Z+
|
|
|
|
st X+, r1
|
|
|
|
dec r25
|
|
|
|
brne init_a_array
|
|
|
|
|
|
|
|
/* now the real fun begins */
|
|
|
|
/* for (i=0; i<64; ++i){
|
|
|
|
t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
|
|
|
|
t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
|
|
|
|
memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
|
|
|
|
a[4] += t1;
|
|
|
|
a[0] = t1 + t2;
|
|
|
|
} */
|
|
|
|
/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
|
|
|
|
sbiw r26, 8*4 /* X still points at a[7]+1*/
|
|
|
|
movw r28, r26
|
|
|
|
ldi r30, lo8(sha256_kv)
|
|
|
|
ldi r31, hi8(sha256_kv)
|
|
|
|
dec r27 /* X - (64*4 == 256) */
|
|
|
|
ldi r25, 64
|
|
|
|
mov LoopC, r25
|
|
|
|
sha256_main_loop:
|
|
|
|
/* now calculate t1 */
|
|
|
|
/*CH(x,y,z) = (x&y)^((~x)&z)*/
|
|
|
|
ldd T1, Y+5*4
|
|
|
|
ldd T2, Y+5*4+1
|
|
|
|
ldd T3, Y+5*4+2
|
|
|
|
ldd T4, Y+5*4+3 /* y in T */
|
|
|
|
ldd Func1, Y+4*4
|
|
|
|
ldd Func2, Y+4*4+1
|
|
|
|
ldd Func3, Y+4*4+2
|
|
|
|
ldd Func4, Y+4*4+3 /* x in Func */
|
|
|
|
ldd Bck1, Y+6*4
|
|
|
|
ldd Bck2, Y+6*4+1
|
|
|
|
ldd Bck3, Y+6*4+2
|
|
|
|
ldd Bck4, Y+6*4+3 /* z in Bck */
|
|
|
|
and T1, Func1
|
|
|
|
and T2, Func2
|
|
|
|
and T3, Func3
|
|
|
|
and T4, Func4
|
|
|
|
com Func1
|
|
|
|
com Func2
|
|
|
|
com Func3
|
|
|
|
com Func4
|
|
|
|
and Bck1, Func1
|
|
|
|
and Bck2, Func2
|
|
|
|
and Bck3, Func3
|
|
|
|
and Bck4, Func4
|
|
|
|
eor T1, Bck1
|
|
|
|
eor T2, Bck2
|
|
|
|
eor T3, Bck3
|
|
|
|
eor T4, Bck4 /* done, CH(x,y,z) is in T */
|
|
|
|
/* now SIGMA1(a[4]) */
|
|
|
|
ldd Bck4, Y+4*4 /* think about using it from Func reg above*/
|
|
|
|
ldd Bck1, Y+4*4+1
|
|
|
|
ldd Bck2, Y+4*4+2
|
|
|
|
ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
|
|
|
|
movw Func1, Bck1
|
|
|
|
movw Func3, Bck3
|
|
|
|
ldi r20, 2
|
|
|
|
rcall bitrotl /* rotr(x,6) */
|
|
|
|
movw XAccu1, Func1
|
|
|
|
movw XAccu3, Func3
|
|
|
|
movw Func1, Bck1
|
|
|
|
movw Func3, Bck3
|
|
|
|
ldi r20, 3
|
|
|
|
rcall bitrotr /* rotr(x,11) */
|
|
|
|
eor XAccu1, Func1
|
|
|
|
eor XAccu2, Func2
|
|
|
|
eor XAccu3, Func3
|
|
|
|
eor XAccu4, Func4
|
|
|
|
movw Func1, Bck3 /* this prerotates furteh 16 bits*/
|
|
|
|
movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
|
|
|
|
ldi r20, 1
|
|
|
|
rcall bitrotr /* rotr(x,11) */
|
|
|
|
eor XAccu1, Func1
|
|
|
|
eor XAccu2, Func2
|
|
|
|
eor XAccu3, Func3
|
|
|
|
eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
|
|
|
|
add T1, XAccu1
|
|
|
|
adc T2, XAccu2
|
|
|
|
adc T3, XAccu3
|
|
|
|
adc T4, XAccu4
|
|
|
|
/* now we've to add a[7], w[i] and k[i] */
|
|
|
|
ldd XAccu1, Y+4*7
|
|
|
|
ldd XAccu2, Y+4*7+1
|
|
|
|
ldd XAccu3, Y+4*7+2
|
|
|
|
ldd XAccu4, Y+4*7+3
|
|
|
|
add T1, XAccu1
|
|
|
|
adc T2, XAccu2
|
|
|
|
adc T3, XAccu3
|
|
|
|
adc T4, XAccu4 /* add a[7] */
|
|
|
|
ld XAccu1, X+
|
|
|
|
ld XAccu2, X+
|
|
|
|
ld XAccu3, X+
|
|
|
|
ld XAccu4, X+
|
|
|
|
add T1, XAccu1
|
|
|
|
adc T2, XAccu2
|
|
|
|
adc T3, XAccu3
|
|
|
|
adc T4, XAccu4 /* add w[i] */
|
|
|
|
lpm XAccu1, Z+
|
|
|
|
lpm XAccu2, Z+
|
|
|
|
lpm XAccu3, Z+
|
|
|
|
lpm XAccu4, Z+
|
|
|
|
add T1, XAccu1
|
|
|
|
adc T2, XAccu2
|
|
|
|
adc T3, XAccu3
|
|
|
|
adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
|
|
|
|
/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
|
|
|
|
/* starting with MAJ(x,y,z) */
|
|
|
|
ldd Func1, Y+4*0+0
|
|
|
|
ldd Func2, Y+4*0+1
|
|
|
|
ldd Func3, Y+4*0+2
|
|
|
|
ldd Func4, Y+4*0+3 /* load x=a[0] */
|
|
|
|
ldd XAccu1, Y+4*1+0
|
|
|
|
ldd XAccu2, Y+4*1+1
|
|
|
|
ldd XAccu3, Y+4*1+2
|
|
|
|
ldd XAccu4, Y+4*1+3 /* load y=a[1] */
|
|
|
|
and XAccu1, Func1
|
|
|
|
and XAccu2, Func2
|
|
|
|
and XAccu3, Func3
|
|
|
|
and XAccu4, Func4 /* XAccu == (x & y) */
|
|
|
|
ldd Bck1, Y+4*2+0
|
|
|
|
ldd Bck2, Y+4*2+1
|
|
|
|
ldd Bck3, Y+4*2+2
|
|
|
|
ldd Bck4, Y+4*2+3 /* load z=a[2] */
|
|
|
|
and Func1, Bck1
|
|
|
|
and Func2, Bck2
|
|
|
|
and Func3, Bck3
|
|
|
|
and Func4, Bck4
|
|
|
|
eor XAccu1, Func1
|
|
|
|
eor XAccu2, Func2
|
|
|
|
eor XAccu3, Func3
|
|
|
|
eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */
|
|
|
|
ldd Func1, Y+4*1+0
|
|
|
|
ldd Func2, Y+4*1+1
|
|
|
|
ldd Func3, Y+4*1+2
|
|
|
|
ldd Func4, Y+4*1+3 /* load y=a[1] */
|
|
|
|
and Func1, Bck1
|
|
|
|
and Func2, Bck2
|
|
|
|
and Func3, Bck3
|
|
|
|
and Func4, Bck4
|
|
|
|
eor XAccu1, Func1
|
|
|
|
eor XAccu2, Func2
|
|
|
|
eor XAccu3, Func3
|
|
|
|
eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
|
|
|
|
/* SIGMA0(a[0]) */
|
|
|
|
ldd Bck1, Y+4*0+0 /* we should combine this with above */
|
|
|
|
ldd Bck2, Y+4*0+1
|
|
|
|
ldd Bck3, Y+4*0+2
|
|
|
|
ldd Bck4, Y+4*0+3
|
|
|
|
movw Func1, Bck1
|
|
|
|
movw Func3, Bck3
|
|
|
|
ldi r20, 2
|
|
|
|
rcall bitrotr
|
|
|
|
movw Accu1, Func1
|
|
|
|
movw Accu3, Func3 /* Accu = shr(a[0], 2) */
|
|
|
|
movw Func1, Bck3
|
|
|
|
movw Func3, Bck1 /* prerotate by 16 bits */
|
|
|
|
ldi r20, 3
|
|
|
|
rcall bitrotl
|
|
|
|
eor Accu1, Func1
|
|
|
|
eor Accu2, Func2
|
|
|
|
eor Accu3, Func3
|
|
|
|
eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
|
|
|
|
mov Func1, Bck4
|
|
|
|
mov Func2, Bck1
|
|
|
|
mov Func3, Bck2
|
|
|
|
mov Func4, Bck3 /* prerotate by 24 bits */
|
|
|
|
ldi r20, 2
|
|
|
|
rcall bitrotl
|
|
|
|
eor Accu1, Func1
|
|
|
|
eor Accu2, Func2
|
|
|
|
eor Accu3, Func3
|
|
|
|
eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
|
|
|
|
add Accu1, XAccu1 /* add previous result (MAJ)*/
|
|
|
|
adc Accu2, XAccu2
|
|
|
|
adc Accu3, XAccu3
|
|
|
|
adc Accu4, XAccu4
|
|
|
|
/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
|
|
|
|
/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
|
|
|
|
|
|
|
|
ldi r21, 7*4
|
|
|
|
adiw r28, 7*4
|
|
|
|
a_shift_loop:
|
|
|
|
ld r25, -Y /* warning: this is PREdecrement */
|
|
|
|
std Y+4, r25
|
|
|
|
dec r21
|
|
|
|
brne a_shift_loop
|
|
|
|
|
|
|
|
ldd Bck1, Y+4*4+0
|
|
|
|
ldd Bck2, Y+4*4+1
|
|
|
|
ldd Bck3, Y+4*4+2
|
|
|
|
ldd Bck4, Y+4*4+3
|
|
|
|
add Bck1, T1
|
|
|
|
adc Bck2, T2
|
|
|
|
adc Bck3, T3
|
|
|
|
adc Bck4, T4
|
|
|
|
std Y+4*4+0, Bck1
|
|
|
|
std Y+4*4+1, Bck2
|
|
|
|
std Y+4*4+2, Bck3
|
|
|
|
std Y+4*4+3, Bck4
|
|
|
|
add Accu1, T1
|
|
|
|
adc Accu2, T2
|
|
|
|
adc Accu3, T3
|
|
|
|
adc Accu4, T4
|
|
|
|
std Y+4*0+0, Accu1
|
|
|
|
std Y+4*0+1, Accu2
|
|
|
|
std Y+4*0+2, Accu3
|
|
|
|
std Y+4*0+3, Accu4 /* a array updated */
|
|
|
|
|
|
|
|
|
|
|
|
dec LoopC
|
|
|
|
breq update_state
|
|
|
|
rjmp sha256_main_loop ;brne sha256_main_loop
|
|
|
|
update_state:
|
|
|
|
/* update state */
|
|
|
|
/* pointers to state should still exist on the stack ;-) */
|
|
|
|
pop r31
|
|
|
|
pop r30
|
|
|
|
ldi r21, 8
|
|
|
|
update_state_loop:
|
|
|
|
ldd Accu1, Z+0
|
|
|
|
ldd Accu2, Z+1
|
|
|
|
ldd Accu3, Z+2
|
|
|
|
ldd Accu4, Z+3
|
|
|
|
ld Func1, Y+
|
|
|
|
ld Func2, Y+
|
|
|
|
ld Func3, Y+
|
|
|
|
ld Func4, Y+
|
|
|
|
add Accu1, Func1
|
|
|
|
adc Accu2, Func2
|
|
|
|
adc Accu3, Func3
|
|
|
|
adc Accu4, Func4
|
|
|
|
st Z+, Accu1
|
|
|
|
st Z+, Accu2
|
|
|
|
st Z+, Accu3
|
|
|
|
st Z+, Accu4
|
|
|
|
dec r21
|
|
|
|
brne update_state_loop
|
|
|
|
/* now we just have to update the length */
|
|
|
|
adiw r30, 1 /* since we add 512, we can simply skip the LSB */
|
|
|
|
ldi r21, 2
|
|
|
|
ldi r22, 6
|
|
|
|
ld r20, Z
|
|
|
|
add r20, r21
|
|
|
|
st Z+, r20
|
|
|
|
clr r21
|
|
|
|
sha256_nextBlock_fix_length:
|
|
|
|
brcc sha256_nextBlock_epilog
|
|
|
|
ld r20, Z
|
|
|
|
adc r20, r21
|
|
|
|
st Z+, r20
|
|
|
|
dec r22
|
|
|
|
brne sha256_nextBlock_fix_length
|
|
|
|
|
|
|
|
; EPILOG
|
|
|
|
sha256_nextBlock_epilog:
|
|
|
|
/* now we should clean up the stack */
|
|
|
|
|
|
|
|
pop r21
|
|
|
|
pop r20
|
|
|
|
in r0, SREG
|
|
|
|
cli ; we want to be uninterrupted while updating SP
|
|
|
|
out SPL, r20
|
|
|
|
out SPH, r21
|
|
|
|
out SREG, r0
|
|
|
|
|
|
|
|
clr r1
|
|
|
|
pop r29
|
|
|
|
pop r28
|
|
|
|
pop r17
|
|
|
|
pop r16
|
|
|
|
pop r15
|
|
|
|
pop r14
|
|
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
pop r11
|
|
|
|
pop r10
|
|
|
|
pop r9
|
|
|
|
pop r8
|
|
|
|
pop r7
|
|
|
|
pop r6
|
|
|
|
pop r5
|
|
|
|
pop r4
|
|
|
|
ret
|
|
|
|
|
|
|
|
sha256_kv: ; round-key-vector stored in ProgMem
|
|
|
|
.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
|
|
|
|
.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
|
|
|
|
.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
|
|
|
|
.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
|
|
|
|
.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
|
|
|
|
.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
|
|
|
|
.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
|
|
|
|
.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
|
|
|
|
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global sha256_init
|
|
|
|
;uint32_t sha256_init_vector[]={
|
|
|
|
; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
|
|
|
; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
|
|
|
|
;
|
|
|
|
;void sha256_init(sha256_ctx_t *state){
|
|
|
|
; state->length=0;
|
|
|
|
; memcpy(state->h, sha256_init_vector, 8*4);
|
|
|
|
;}
|
|
|
|
; param1: (Func3,r24) 16-bit pointer to sha256_ctx_t struct in ram
|
|
|
|
; modifys: Z(r30,r31), Func1, r22
|
|
|
|
sha256_init:
|
|
|
|
movw r26, r24 ; (24,25) --> (26,27) load X with param1
|
|
|
|
ldi r30, lo8((sha256_init_vector))
|
|
|
|
ldi r31, hi8((sha256_init_vector))
|
|
|
|
ldi r22, 32
|
|
|
|
sha256_init_vloop:
|
|
|
|
lpm r23, Z+
|
|
|
|
st X+, r23
|
|
|
|
dec r22
|
|
|
|
brne sha256_init_vloop
|
|
|
|
ldi r22, 8
|
|
|
|
clr r1 ;this should not be needed
|
|
|
|
sha256_init_lloop:
|
|
|
|
st X+, r1
|
|
|
|
dec r22
|
|
|
|
brne sha256_init_lloop
|
|
|
|
ret
|
|
|
|
|
|
|
|
sha256_init_vector:
|
|
|
|
.word 0xE667, 0x6A09
|
|
|
|
.word 0xAE85, 0xBB67
|
|
|
|
.word 0xF372, 0x3C6E
|
|
|
|
.word 0xF53A, 0xA54F
|
|
|
|
.word 0x527F, 0x510E
|
|
|
|
.word 0x688C, 0x9B05
|
|
|
|
.word 0xD9AB, 0x1F83
|
|
|
|
.word 0xCD19, 0x5BE0
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global rotl32
|
|
|
|
; === ROTL32 ===
|
|
|
|
; function that rotates a 32 bit word to the left
|
|
|
|
; param1: the 32-bit word to rotate
|
|
|
|
; given in r25,r24,r23,r22 (r25 is most significant)
|
|
|
|
; param2: an 8-bit value telling how often to rotate
|
|
|
|
; given in r20
|
|
|
|
; modifys: r21, r22
|
|
|
|
rotl32:
|
|
|
|
cpi r20, 8
|
|
|
|
brlo bitrotl
|
|
|
|
mov r21, r25
|
|
|
|
mov r25, r24
|
|
|
|
mov r24, r23
|
|
|
|
mov r23, r22
|
|
|
|
mov r22, r21
|
|
|
|
subi r20, 8
|
|
|
|
rjmp rotr32
|
|
|
|
bitrotl:
|
|
|
|
clr r21
|
|
|
|
clc
|
|
|
|
bitrotl_loop:
|
|
|
|
tst r20
|
|
|
|
breq fixrotl
|
|
|
|
rol r22
|
|
|
|
rol r23
|
|
|
|
rol r24
|
|
|
|
rol r25
|
|
|
|
rol r21
|
|
|
|
dec r20
|
|
|
|
rjmp bitrotl_loop
|
|
|
|
fixrotl:
|
|
|
|
or r22, r21
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global rotr32
|
|
|
|
; === ROTR32 ===
|
|
|
|
; function that rotates a 32 bit word to the right
|
|
|
|
; param1: the 32-bit word to rotate
|
|
|
|
; given in r25,r24,r23,22 (r25 is most significant)
|
|
|
|
; param2: an 8-bit value telling how often to rotate
|
|
|
|
; given in r20
|
|
|
|
; modifys: r21, r22
|
|
|
|
rotr32:
|
|
|
|
cpi r20, 8
|
|
|
|
brlo bitrotr
|
|
|
|
mov r21, r22
|
|
|
|
mov r22, r23
|
|
|
|
mov r23, r24
|
|
|
|
mov r24, r25
|
|
|
|
mov r25, r21
|
|
|
|
subi r20, 8
|
|
|
|
rjmp rotr32
|
|
|
|
bitrotr:
|
|
|
|
clr r21
|
|
|
|
clc
|
|
|
|
bitrotr_loop:
|
|
|
|
tst r20
|
|
|
|
breq fixrotr
|
|
|
|
ror r25
|
|
|
|
ror r24
|
|
|
|
ror r23
|
|
|
|
ror r22
|
|
|
|
ror r21
|
|
|
|
dec r20
|
|
|
|
rjmp bitrotr_loop
|
|
|
|
fixrotr:
|
|
|
|
or r25, r21
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
;###########################################################
|
|
|
|
|
|
|
|
.global change_endian32
|
|
|
|
; === change_endian32 ===
|
|
|
|
; function that changes the endianess of a 32-bit word
|
|
|
|
; param1: the 32-bit word
|
|
|
|
; given in r25,r24,r23,22 (r25 is most significant)
|
|
|
|
; modifys: r21, r22
|
|
|
|
change_endian32:
|
|
|
|
movw r20, r22 ; (r22,r23) --> (r20,r21)
|
|
|
|
mov r22, r25
|
|
|
|
mov r23, r24
|
|
|
|
mov r24, r21
|
|
|
|
mov r25, r20
|
|
|
|
ret
|
|
|
|
|