avr-crypto-lib/twister/twister-large-asm.S

517 lines
7.9 KiB
ArmAsm

/* twister-large-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file twister-large-asm.S
* \email bg@nerilex.org
* \author Daniel Otte
* \date 2008-12-27
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
/* void checksum_update(twister_large_ctx_t *ctx, uint8_t col) */
/*
* param ctx: r24:r25
* param col: r22
*/
checksum_update:
push r16
push r28
push r29
lsl r22
lsl r22
lsl r22
mov r16, r22
movw r30, r24 /* X points to ctx->state.s */
ldd r18, Z+7*8
ldd r19, Z+6*8
ldd r20, Z+5*8
ldd r21, Z+4*8
ldd r22, Z+3*8
ldd r23, Z+2*8
ldd r24, Z+1*8
ldd r25, Z+0*8
adiw r30, 63
adiw r30, 1+3*8 /* Z points at ctx->checksum[0][8] */
movw r28, r30 /* Y points at ctx->checksum[0][8] */
andi r16, 63
add r30, r16 /* Z points at ctx->checksum[col][8]*/
adc r31, r1
ldi r26, 8
add r16, r26
andi r16, 63
add r28, r16
adc r29, r1 /* Y points at ctx->checksum[(col+1)%8][8]*/
ld r0, -Y
add r18, r0
ld r0, -Z
eor r0, r18
st Z, r0
ld r0, -Y
adc r19, r0
ld r0, -Z
eor r0, r19
st Z, r0
ld r0, -Y
adc r20, r0
ld r0, -Z
eor r0, r20
st Z, r0
ld r0, -Y
adc r21, r0
ld r0, -Z
eor r0, r21
st Z, r0
ld r0, -Y
adc r22, r0
ld r0, -Z
eor r0, r22
st Z, r0
ld r0, -Y
adc r23, r0
ld r0, -Z
eor r0, r23
st Z, r0
ld r0, -Y
adc r24, r0
ld r0, -Z
eor r0, r24
st Z, r0
ld r0, -Y
adc r25, r0
ld r0, -Z
eor r0, r25
st Z, r0
pop r29
pop r28
pop r16
ret
/*********************************************************************/
/* void twister_large_init(twister_large_ctx_t *ctx, uint16_t hashsize_b)*/
/*
* param ctx: r24:r25
* param hashsize_b: r22:r23
*/
.global twister384_init
twister384_init:
ldi r22, lo8(384)
ldi r23, hi8(384)
rjmp twister_large_init
.global twister512_init
twister512_init:
ldi r22, lo8(512)
ldi r23, hi8(512)
.global twister_large_init
twister_large_init:
movw r30, r24
ldi r24, 64
1:
st Z+, r1
dec r24
brne 1b
dec r1
ldi r24, 8
1:
st Z+, r1
dec r24
brne 1b
inc r1
ldi r24, 8+64
1:
st Z+, r1
dec r24
brne 1b
subi r30, lo8(1+8+8+8*7+64)
sbci r31, hi8(1+8+8+8*7+64)
st Z, r23
std Z+8, r22
ret
/*********************************************************************/
/* void twister_large_nextBlock(twister_state_t *ctx, void *msg) */
/*
* param ctx: r24:r25
* param msg: r22:r23
*/
CTX_SAVE0 = 14
CTX_SAVE1 = 15
TMP_SAVE0 = 12
TMP_SAVE1 = 13
MSG_SAVE0 = 28
MSG_SAVE1 = 29
.global twister_large_nextBlock
.global twister384_nextBlock
.global twister512_nextBlock
twister384_nextBlock:
twister512_nextBlock:
twister_large_nextBlock:
push_range 12, 15
push r28
push r29
stack_alloc_large 64
adiw r30, 1
movw TMP_SAVE0, r30
movw CTX_SAVE0, r24
movw MSG_SAVE0, r22
movw r26, CTX_SAVE0
ldi r18, 64/8
1:
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
dec r18
brne 1b
/* maxi round 1 */
movw r24, CTX_SAVE0
ldi r22, 0
rcall checksum_update
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
ldi r22, 1
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
ldi r22, 2
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r18, 64
1:
ld r0, X
ld r23, Z
eor r0, r23
st X+, r0
st Z+, r0
dec r18
brne 1b
/* maxi round 2 */
movw r24, CTX_SAVE0
ldi r22, 3
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
rcall twister_blank_round
movw r24, CTX_SAVE0
ldi r22, 4
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r18, 64
1:
ld r0, X
ld r23, Z
eor r0, r23
st X+, r0
st Z+, r0
dec r18
brne 1b
/* maxi round 3 */
movw r24, CTX_SAVE0
ldi r22, 5
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
ldi r22, 6
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
ldi r22, 7
rcall checksum_update
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
rcall twister_blank_round
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r18, 64
1:
ld r0, X
ld r23, Z+
eor r0, r23
st X+, r0
dec r18
brne 1b
adiw r26, 9
ldi r19, 2
ld r0, X
add r0, r19
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
stack_free_large 64
pop r29
pop r28
pop_range 12, 15
ret
/*********************************************************************/
/* void twister_large_lastBlock(twister_state_t *ctx, void *msg, uint16_t length_b) */
/*
* param ctx: r24:r25
* param msg: r22:r23
* param length_b: r20:r21
*/
TMP_SAVE0 = 12
TMP_SAVE1 = 13
CTX_SAVE0 = 14
CTX_SAVE1 = 15
LEN_SAVE0 = 16
LEN_SAVE1 = 17
MSG_SAVE0 = 28
MSG_SAVE1 = 29
.global twister_large_lastBlock
.global twister384_lastBlock
.global twister512_lastBlock
twister384_lastBlock:
twister512_lastBlock:
twister_large_lastBlock:
push_range 12, 17
push r28
push r29
stack_alloc_large 64
adiw r30, 1
movw TMP_SAVE0, r30
movw CTX_SAVE0, r24
movw MSG_SAVE0, r22
movw LEN_SAVE0, r20
1:
cpi LEN_SAVE1, 2
brmi 2f
movw r24, CTX_SAVE0
movw r22, MSG_SAVE0
rcall twister_large_nextBlock
adiw MSG_SAVE0, 8
subi LEN_SAVE1, 2
rjmp 1b
2:
movw r18, LEN_SAVE0
lsr r19
ror r18
lsr r18
lsr r18
ldi r19, 63
movw r26, MSG_SAVE0
movw r30, TMP_SAVE0
ldi r20, 0x80
sub r19, r18 /* r18: bytes to copy, r19: bytes to clear */
ld r0, X+
3:
tst r18
breq 4f
31:
st Z+, r0
ld r0, X+
dec r18
brne 31b
4:
mov r18, LEN_SAVE0
andi r18, 0x07
ldi r20, 0x80
breq 5f
4:
lsr r20
dec r18
brne 4b
or r20, r0
rjmp 5f
5:
st Z+, r20
tst r19
breq 7f
6:
st Z+, r1
dec r19
brne 6b
7:
movw r24, CTX_SAVE0
movw r22, TMP_SAVE0
rcall twister_large_nextBlock
ldi r19, 2
clr r18
sub r18, LEN_SAVE0
sbc r19, LEN_SAVE1
movw r26, CTX_SAVE0
adiw r26, 63
adiw r26, 1+8
ld r0, X
sub r0, r18
st X+, r0
ld r0, X
sbc r0, r19
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
sbiw r26, 8
movw r24, CTX_SAVE0
movw r22, r26
rcall twister_mini_round
movw r24, CTX_SAVE0
movw r22, CTX_SAVE0
ldi r16, 64+8+8
add r22, r16
adc r23, r1
movw r30, r22
ldi r26, 8
1:
ld r12, Z+
ld r13, Z+
ld r16, Z+
ld r17, Z+
ld r18, Z+
ld r19, Z+
ld r20, Z+
ld r21, Z+
st -Z, r12
st -Z, r13
st -Z, r16
st -Z, r17
st -Z, r18
st -Z, r19
st -Z, r20
st -Z, r21
adiw r30, 8
dec r26
brne 1b
movw r24, CTX_SAVE0
movw r22, CTX_SAVE0
ldi r26, 64+2*8
add r22, r26
adc r23, r1
rcall twister_small_nextBlock
stack_free_large 64
pop r29
pop r28
pop_range 12, 17
ret