avr-crypto-lib/twister/twister-asm.S

605 lines
9.8 KiB
ArmAsm

/* twister-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file twister-asm.S
* \email bg@nerilex.org
* \author Daniel Otte
* \date 2008-12-22
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
twister_sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
/*
* param ctx: r24:r25
* param msg: r22:r23
*/
.global twister_mini_round
twister_mini_round:
movw r26, r24
movw r30, r22
adiw r26, 8*7
adiw r30, 8
ldi r21, 8
1:
ld r22, X
ld r23, -Z
eor r22, r23
st X+, r22
dec r21
brne 1b
/*
* param ctx: r24:r25
*
*/
X_SAVE0 = 4
X_SAVE1 = 5
Y_SAVE0 = 6
Y_SAVE1 = 7
MDS0 = 8
MDS1 = 9
MDS2 = 10
MDS3 = 11
MDS4 = 12
MDS5 = 13
MDS6 = 14
MDS7 = 15
.global twister_blank_round
twister_blank_round:
push_range 4, 17
push r28
push r29
stack_alloc_large 64, r28, r29
movw X_SAVE0, r24
movw r30, r24
adiw r30, 63
adiw r30, 1+8 /* Z points behind counter */
movw r26, r24
adiw r26, 1
ldi r22, 8
1: /* "add" counter */
ld r16, -Z
ld r21, X
eor r21, r16
st X, r21
adiw r26, 8
dec r22
brne 1b
/* decrement counter */
subi r16, 1
st Z+, r16
ldi r17, 7
1:
ld r16, Z
sbci r16, 0
st Z+, r16
dec r17
brne 1b
movw r26, r24
adiw r28, 1 /* Y points to stack memory */
movw Y_SAVE0, r28
movw r24, r28
ldi r20, lo8(twister_sbox)
ldi r21, hi8(twister_sbox)
ldi r18, 8
1:
ldi r19, 0
2: /* sbox substitution */
ld r0, X+
movw r30, r20
add r30, r0
adc r31, r1
lpm r0, Z
movw r28, r24
mov r16, r18
add r16, r19
andi r16, 0x07
add r28, r16
adc r29, r1
st Y, r0
inc r19
cpi r19, 8
brne 2b
adiw r24, 8
dec r18
brne 1b
/* load MDS-Table to MDS0:MDS7 */
ldi r18, 1
mov MDS1, r18
mov MDS2, r18
mov MDS7, r18
ldi r18, 2
mov MDS0, r18
ldi r18, 5
mov MDS3, r18
ldi r18, 6
mov MDS6, r18
ldi r18, 7
mov MDS4, r18
ldi r18, 8
mov MDS5, r18
ldi r20, 0x4D /* reducer for gf256mul*/
ldi r16, 0
1:
movw r26, X_SAVE0
add r26, r16
adc r27, r1
ldi r17, 8
2:
mov r24, MDS0
movw r28, Y_SAVE0
add r28, r16
adc r29, r1
ld r22, Y
rcall gf256mul
mov r0, r24
mov r24, MDS1
ldd r22, Y+8
rcall gf256mul
eor r0, r24
mov r24, MDS2
ldd r22, Y+8*2
rcall gf256mul
eor r0, r24
mov r24, MDS3
ldd r22, Y+8*3
rcall gf256mul
eor r0, r24
mov r24, MDS4
ldd r22, Y+8*4
rcall gf256mul
eor r0, r24
mov r24, MDS5
ldd r22, Y+8*5
rcall gf256mul
eor r0, r24
mov r24, MDS6
ldd r22, Y+8*6
rcall gf256mul
eor r0, r24
mov r24, MDS7
ldd r22, Y+8*7
rcall gf256mul
eor r0, r24
st X, r0
adiw r26, 8
mov r0, MDS7
mov MDS7, MDS6
mov MDS6, MDS5
mov MDS5, MDS4
mov MDS4, MDS3
mov MDS3, MDS2
mov MDS2, MDS1
mov MDS1, MDS0
mov MDS0, r0
dec r17
brne 2b
8:
inc r16
cpi r16, 8
brne 1b
9:
stack_free_large 64
pop r29
pop r28
pop_range 4, 17
ret
/*********************************************************************/
A = 23
B = 22
P = 24
gf256mul:
mov A, r24
clr P
1:
lsr A
breq 4f
brcc 2f
eor P, B
2:
lsl B
brcc 3f
eor B, r20
3:
rjmp 1b
4:
brcc 2f
eor P, B
2:
ret
/*********************************************************************/
/* twister_ctx2hash */
/*
* param dest: r24:r25
* param ctx: r22:r23
* param hashsize_b: r20:r21
*/
DEST_SAVE0 = 10
DEST_SAVE1 = 11
CTX_SAVE0 = 12
CTX_SAVE1 = 13
LEN_SAVE = 14
LEN32_SAVE = 15
TMP_SAVE0 = 16
TMP_SAVE1 = 17
.global twister_ctx2hash
.global twister_small_ctx2hash
.global twister_large_ctx2hash
.global twister224_ctx2hash
.global twister256_ctx2hash
.global twister384_ctx2hash
.global twister512_ctx2hash
twister224_ctx2hash:
ldi r20, lo8(224)
ldi r21, hi8(224)
rjmp twister_ctx2hash
twister256_ctx2hash:
ldi r20, lo8(256)
ldi r21, hi8(256)
rjmp twister_ctx2hash
twister384_ctx2hash:
ldi r20, lo8(384)
ldi r21, hi8(384)
rjmp twister_ctx2hash
twister512_ctx2hash:
ldi r20, lo8(512)
ldi r21, hi8(512)
; rjmp twister_ctx2hash
twister_large_ctx2hash:
twister_small_ctx2hash:
twister_ctx2hash:
push_range 10, 17
push r28
push r29
stack_alloc_large 64
movw DEST_SAVE0, r24
movw CTX_SAVE0, r22
clr LEN32_SAVE
sbrc r20, 5
inc LEN32_SAVE
lsr r21
ror r20
lsr r21
ror r20 /* length is max 512 so we now only have to shift r20 */
swap r20 /* this is faster than 4 shifts */
andi r20, 0x0f
add r20, LEN32_SAVE
mov LEN_SAVE, r20
adiw r30, 1
movw TMP_SAVE0, r30
1:
dec LEN_SAVE
brmi 9f
/* tmp <- ctx-s */
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r20, 64/4
3:
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
dec r20
brne 3b
movw r24, CTX_SAVE0
rcall twister_blank_round
/* ctx-s ^= tmp */
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r20, 64
3:
ld r0, X
ld r21, Z+
eor r0, r21
st X+, r0
dec r20
brne 3b
movw r24, CTX_SAVE0
rcall twister_blank_round
movw r26, CTX_SAVE0
tst LEN_SAVE
brne 2f
tst LEN32_SAVE
brne 5f
2:
adiw r26, 8*7
movw r30, TMP_SAVE0
adiw r30, 8*7
movw r28, DEST_SAVE0
ldi r20, 8
3:
ld r0, Z
ld r21, X
eor r0, r21
st Y+, r0
sbiw r26, 8
sbiw r30, 8
dec r20
brne 3b
movw DEST_SAVE0, r28
7:
rjmp 1b
5:
adiw r26, 8*3
movw r30, TMP_SAVE0
adiw r30, 8*3
movw r28, DEST_SAVE0
ldi r20, 4
3:
ld r0, Z
ld r21, X
eor r0, r21
st Y+, r0
sbiw r26, 8
sbiw r30, 8
dec r20
brne 3b
9:
stack_free_large 64
pop r29
pop r28
pop_range 10, 17
ret
/*********************************************************************/
/* void twister_small_nextBlock(twister_state_t *ctx, void *msg) */
/*
* param ctx: r24:r25
* param msg: r22:r23
*/
CTX_SAVE0 = 14
CTX_SAVE1 = 15
TMP_SAVE0 = 12
TMP_SAVE1 = 13
MSG_SAVE0 = 28
MSG_SAVE1 = 29
.global twister_small_nextBlock
.global twister224_nextBlock
.global twister256_nextBlock
twister224_nextBlock:
twister256_nextBlock:
twister_small_nextBlock:
push_range 12, 15
push r28
push r29
stack_alloc_large 64
adiw r30, 1
movw TMP_SAVE0, r30
movw CTX_SAVE0, r24
movw MSG_SAVE0, r22
movw r26, CTX_SAVE0
ldi r18, 64/8
1:
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
ld r0, X+
st Z+, r0
dec r18
brne 1b
rcall twister_mini_round
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r18, 64
1:
ld r0, X
ld r23, Z
eor r0, r23
st X+, r0
st Z+, r0
dec r18
brne 1b
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r18, 64
1:
ld r0, X
ld r23, Z
eor r0, r23
st X+, r0
st Z+, r0
dec r18
brne 1b
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
adiw MSG_SAVE0, 8
movw r22, MSG_SAVE0
movw r24, CTX_SAVE0
rcall twister_mini_round
movw r24, CTX_SAVE0
rcall twister_blank_round
movw r30, TMP_SAVE0
movw r26, CTX_SAVE0
ldi r18, 64
1:
ld r0, X
ld r23, Z+
eor r0, r23
st X+, r0
dec r18
brne 1b
adiw r26, 9
ldi r19, 2
ld r0, X
add r0, r19
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
stack_free_large 64
pop r29
pop r28
pop_range 12, 15
ret