517 lines
7.9 KiB
ArmAsm
517 lines
7.9 KiB
ArmAsm
/* twister-large-asm.S */
|
|
/*
|
|
This file is part of the AVR-Crypto-Lib.
|
|
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
/**
|
|
* \file twister-large-asm.S
|
|
* \email daniel.otte@rub.de
|
|
* \author Daniel Otte
|
|
* \date 2008-12-27
|
|
* \license GPLv3 or later
|
|
*
|
|
*/
|
|
|
|
#include "avr-asm-macros.S"
|
|
|
|
/* void checksum_update(twister_large_ctx_t* ctx, uint8_t col) */
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param col: r22
|
|
*/
|
|
checksum_update:
|
|
push r16
|
|
push r28
|
|
push r29
|
|
lsl r22
|
|
lsl r22
|
|
lsl r22
|
|
mov r16, r22
|
|
movw r30, r24 /* X points to ctx->state.s */
|
|
|
|
ldd r18, Z+7*8
|
|
ldd r19, Z+6*8
|
|
ldd r20, Z+5*8
|
|
ldd r21, Z+4*8
|
|
ldd r22, Z+3*8
|
|
ldd r23, Z+2*8
|
|
ldd r24, Z+1*8
|
|
ldd r25, Z+0*8
|
|
|
|
adiw r30, 63
|
|
adiw r30, 1+3*8 /* Z points at ctx->checksum[0][8] */
|
|
movw r28, r30 /* Y points at ctx->checksum[0][8] */
|
|
andi r16, 63
|
|
add r30, r16 /* Z points at ctx->checksum[col][8]*/
|
|
adc r31, r1
|
|
ldi r26, 8
|
|
add r16, r26
|
|
andi r16, 63
|
|
add r28, r16
|
|
adc r29, r1 /* Y points at ctx->checksum[(col+1)%8][8]*/
|
|
|
|
ld r0, -Y
|
|
add r18, r0
|
|
ld r0, -Z
|
|
eor r0, r18
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r19, r0
|
|
ld r0, -Z
|
|
eor r0, r19
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r20, r0
|
|
ld r0, -Z
|
|
eor r0, r20
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r21, r0
|
|
ld r0, -Z
|
|
eor r0, r21
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r22, r0
|
|
ld r0, -Z
|
|
eor r0, r22
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r23, r0
|
|
ld r0, -Z
|
|
eor r0, r23
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r24, r0
|
|
ld r0, -Z
|
|
eor r0, r24
|
|
st Z, r0
|
|
|
|
ld r0, -Y
|
|
adc r25, r0
|
|
ld r0, -Z
|
|
eor r0, r25
|
|
st Z, r0
|
|
|
|
pop r29
|
|
pop r28
|
|
pop r16
|
|
ret
|
|
|
|
/*********************************************************************/
|
|
/* void twister_large_init(twister_large_ctx_t* ctx, uint16_t hashsize_b)*/
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param hashsize_b: r22:r23
|
|
*/
|
|
.global twister384_init
|
|
twister384_init:
|
|
ldi r22, lo8(384)
|
|
ldi r23, hi8(384)
|
|
rjmp twister_large_init
|
|
|
|
.global twister512_init
|
|
twister512_init:
|
|
ldi r22, lo8(512)
|
|
ldi r23, hi8(512)
|
|
|
|
.global twister_large_init
|
|
twister_large_init:
|
|
movw r30, r24
|
|
ldi r24, 64
|
|
1:
|
|
st Z+, r1
|
|
dec r24
|
|
brne 1b
|
|
|
|
dec r1
|
|
ldi r24, 8
|
|
1:
|
|
st Z+, r1
|
|
dec r24
|
|
brne 1b
|
|
|
|
inc r1
|
|
ldi r24, 8+64
|
|
1:
|
|
st Z+, r1
|
|
dec r24
|
|
brne 1b
|
|
|
|
subi r30, lo8(1+8+8+8*7+64)
|
|
sbci r31, hi8(1+8+8+8*7+64)
|
|
st Z, r23
|
|
std Z+8, r22
|
|
ret
|
|
|
|
/*********************************************************************/
|
|
/* void twister_large_nextBlock(twister_state_t* ctx, void* msg) */
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param msg: r22:r23
|
|
*/
|
|
CTX_SAVE0 = 14
|
|
CTX_SAVE1 = 15
|
|
TMP_SAVE0 = 12
|
|
TMP_SAVE1 = 13
|
|
MSG_SAVE0 = 28
|
|
MSG_SAVE1 = 29
|
|
.global twister_large_nextBlock
|
|
.global twister384_nextBlock
|
|
.global twister512_nextBlock
|
|
|
|
twister384_nextBlock:
|
|
twister512_nextBlock:
|
|
twister_large_nextBlock:
|
|
push_range 12, 15
|
|
push r28
|
|
push r29
|
|
stack_alloc_large 64
|
|
adiw r30, 1
|
|
movw TMP_SAVE0, r30
|
|
movw CTX_SAVE0, r24
|
|
movw MSG_SAVE0, r22
|
|
movw r26, CTX_SAVE0
|
|
ldi r18, 64/8
|
|
1:
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
ld r0, X+
|
|
st Z+, r0
|
|
dec r18
|
|
brne 1b
|
|
/* maxi round 1 */
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 0
|
|
rcall checksum_update
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 1
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 2
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r30, TMP_SAVE0
|
|
movw r26, CTX_SAVE0
|
|
ldi r18, 64
|
|
1:
|
|
ld r0, X
|
|
ld r23, Z
|
|
eor r0, r23
|
|
st X+, r0
|
|
st Z+, r0
|
|
dec r18
|
|
brne 1b
|
|
/* maxi round 2 */
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 3
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_blank_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 4
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r30, TMP_SAVE0
|
|
movw r26, CTX_SAVE0
|
|
ldi r18, 64
|
|
1:
|
|
ld r0, X
|
|
ld r23, Z
|
|
eor r0, r23
|
|
st X+, r0
|
|
st Z+, r0
|
|
dec r18
|
|
brne 1b
|
|
/* maxi round 3 */
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 5
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 6
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
ldi r22, 7
|
|
rcall checksum_update
|
|
adiw MSG_SAVE0, 8
|
|
movw r22, MSG_SAVE0
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
rcall twister_blank_round
|
|
|
|
movw r30, TMP_SAVE0
|
|
movw r26, CTX_SAVE0
|
|
ldi r18, 64
|
|
1:
|
|
ld r0, X
|
|
ld r23, Z+
|
|
eor r0, r23
|
|
st X+, r0
|
|
dec r18
|
|
brne 1b
|
|
|
|
adiw r26, 9
|
|
ldi r19, 2
|
|
ld r0, X
|
|
add r0, r19
|
|
st X+, r0
|
|
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
adc r0, r1
|
|
st X+, r0
|
|
|
|
stack_free_large 64
|
|
pop r29
|
|
pop r28
|
|
pop_range 12, 15
|
|
ret
|
|
|
|
/*********************************************************************/
|
|
/* void twister_large_lastBlock(twister_state_t* ctx, void* msg, uint16_t length_b) */
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param msg: r22:r23
|
|
* param length_b: r20:r21
|
|
*/
|
|
TMP_SAVE0 = 12
|
|
TMP_SAVE1 = 13
|
|
CTX_SAVE0 = 14
|
|
CTX_SAVE1 = 15
|
|
LEN_SAVE0 = 16
|
|
LEN_SAVE1 = 17
|
|
MSG_SAVE0 = 28
|
|
MSG_SAVE1 = 29
|
|
.global twister_large_lastBlock
|
|
.global twister384_lastBlock
|
|
.global twister512_lastBlock
|
|
|
|
twister384_lastBlock:
|
|
twister512_lastBlock:
|
|
twister_large_lastBlock:
|
|
push_range 12, 17
|
|
push r28
|
|
push r29
|
|
stack_alloc_large 64
|
|
adiw r30, 1
|
|
movw TMP_SAVE0, r30
|
|
movw CTX_SAVE0, r24
|
|
movw MSG_SAVE0, r22
|
|
movw LEN_SAVE0, r20
|
|
1:
|
|
cpi LEN_SAVE1, 2
|
|
brmi 2f
|
|
movw r24, CTX_SAVE0
|
|
movw r22, MSG_SAVE0
|
|
rcall twister_large_nextBlock
|
|
adiw MSG_SAVE0, 8
|
|
subi LEN_SAVE1, 2
|
|
rjmp 1b
|
|
2:
|
|
movw r18, LEN_SAVE0
|
|
lsr r19
|
|
ror r18
|
|
lsr r18
|
|
lsr r18
|
|
ldi r19, 63
|
|
movw r26, MSG_SAVE0
|
|
movw r30, TMP_SAVE0
|
|
ldi r20, 0x80
|
|
sub r19, r18 /* r18: bytes to copy, r19: bytes to clear */
|
|
|
|
ld r0, X+
|
|
3:
|
|
tst r18
|
|
breq 4f
|
|
31:
|
|
st Z+, r0
|
|
ld r0, X+
|
|
dec r18
|
|
brne 31b
|
|
4:
|
|
mov r18, LEN_SAVE0
|
|
andi r18, 0x07
|
|
ldi r20, 0x80
|
|
breq 5f
|
|
4:
|
|
lsr r20
|
|
dec r18
|
|
brne 4b
|
|
or r20, r0
|
|
rjmp 5f
|
|
|
|
5:
|
|
st Z+, r20
|
|
tst r19
|
|
breq 7f
|
|
6:
|
|
st Z+, r1
|
|
dec r19
|
|
brne 6b
|
|
7:
|
|
movw r24, CTX_SAVE0
|
|
movw r22, TMP_SAVE0
|
|
rcall twister_large_nextBlock
|
|
|
|
ldi r19, 2
|
|
clr r18
|
|
|
|
sub r18, LEN_SAVE0
|
|
sbc r19, LEN_SAVE1
|
|
movw r26, CTX_SAVE0
|
|
adiw r26, 63
|
|
adiw r26, 1+8
|
|
|
|
ld r0, X
|
|
sub r0, r18
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r19
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r1
|
|
st X+, r0
|
|
ld r0, X
|
|
sbc r0, r1
|
|
st X+, r0
|
|
|
|
sbiw r26, 8
|
|
movw r24, CTX_SAVE0
|
|
movw r22, r26
|
|
rcall twister_mini_round
|
|
|
|
movw r24, CTX_SAVE0
|
|
movw r22, CTX_SAVE0
|
|
ldi r16, 64+8+8
|
|
add r22, r16
|
|
adc r23, r1
|
|
movw r30, r22
|
|
ldi r26, 8
|
|
1:
|
|
ld r12, Z+
|
|
ld r13, Z+
|
|
ld r16, Z+
|
|
ld r17, Z+
|
|
ld r18, Z+
|
|
ld r19, Z+
|
|
ld r20, Z+
|
|
ld r21, Z+
|
|
st -Z, r12
|
|
st -Z, r13
|
|
st -Z, r16
|
|
st -Z, r17
|
|
st -Z, r18
|
|
st -Z, r19
|
|
st -Z, r20
|
|
st -Z, r21
|
|
adiw r30, 8
|
|
dec r26
|
|
brne 1b
|
|
|
|
movw r24, CTX_SAVE0
|
|
movw r22, CTX_SAVE0
|
|
ldi r26, 64+2*8
|
|
add r22, r26
|
|
adc r23, r1
|
|
rcall twister_small_nextBlock
|
|
|
|
stack_free_large 64
|
|
pop r29
|
|
pop r28
|
|
pop_range 12, 17
|
|
ret
|