avr-crypto-lib/twister/twister-asm.S_shiftmul

352 lines
6.2 KiB
Plaintext

/* twister-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file twister-asm.S
* \email bg@nerilex.org
* \author Daniel Otte
* \date 2008-12-22
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
twister_sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
/*
* param ctx: r24:r25
*
*/
X_SAVE0 = 4
X_SAVE1 = 5
Y_SAVE0 = 6
Y_SAVE1 = 7
MDS0 = 8
MDS1 = 9
MDS2 = 10
MDS3 = 11
MDS4 = 12
MDS5 = 13
MDS6 = 14
MDS7 = 15
IDX1 = 18
IDX2 = 19
TMP = 21
.global twister_blank_round
twister_blank_round:
push_range 4, 17
push r28
push r29
stack_alloc_large 64, r28, r29
movw r30, r24
adiw r30, 63
adiw r30, 1+8 /* Z points behind counter */
movw r26, r24
adiw r26, 1
ldi r22, 8
1: /* "add" counter */
ld r16, -Z
ld r21, X
eor r21, r16
st X, r21
adiw r26, 8
dec r22
brne 1b
/* decrement counter */
subi r16, 1
st Z+, r16
ldi r17, 7
1:
ld r16, Z
sbci r16, 0
st Z+, r16
dec r17
brne 1b
movw r26, r24
adiw r28, 1 /* Y points to stack memory */
movw Y_SAVE0, r28
ldi r20, lo8(twister_sbox)
ldi r21, hi8(twister_sbox)
ldi r19, 64
1: /* sbox substitution */
ld r0, X+
movw r30, r20
add r30, r0
adc r31, r1
lpm r0, Z
st Y+, r0
dec r19
brne 1b
/* load MDS-Table to MDS0:MDS7 */
/*
ldi r18, 1
mov MDS1, r18
mov MDS6, r18
mov MDS7, r18
ldi r18, 2
mov MDS0, r18
ldi r18, 5
mov MDS5, r18
ldi r18, 6
mov MDS2, r18
ldi r18, 7
mov MDS4, r18
ldi r18, 8
mov MDS3, r18
*/
ldi r18, 1
mov MDS1, r18
mov MDS2, r18
mov MDS7, r18
ldi r18, 2
mov MDS0, r18
ldi r18, 5
mov MDS3, r18
ldi r18, 6
mov MDS6, r18
ldi r18, 7
mov MDS4, r18
ldi r18, 8
mov MDS5, r18
; sbiw r28, 63
; sbiw r28, 1 /* Y points again at tmp buffer */
; movw Y_SAVE0, r28
; sbiw r26, 63
; sbiw r26, 1 /* X points again at state buffer */
movw X_SAVE0, r24
ldi r20, 0x4D /* reducer for gf256mul*/
ldi r16, 0
; rjmp 9f
1:
mov IDX1, r16
movw r26, X_SAVE0
add r26, r16
adc r27, r1
ldi r17, 0
2:
mov IDX2, r17
mov r24, MDS0
movw r28, Y_SAVE0
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
mov r0, r24
mov r24, MDS1
movw r28, Y_SAVE0
adiw r28, 8
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
mov r24, MDS2
movw r28, Y_SAVE0
adiw r28, 8*2
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
mov r24, MDS3
movw r28, Y_SAVE0
adiw r28, 8*3
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
mov r24, MDS4
movw r28, Y_SAVE0
adiw r28, 8*4
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
mov r24, MDS5
movw r28, Y_SAVE0
adiw r28, 8*5
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
mov r24, MDS6
movw r28, Y_SAVE0
adiw r28, 8*6
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
mov r24, MDS7
movw r28, Y_SAVE0
adiw r28, 8*7
inc IDX1
andi IDX1, 0x07
add r28, IDX1
adc r29, r1
ld r22, Y
rcall gf256mul
eor r0, r24
st X, r0
adiw r26, 8
/*
mov r0, MDS0
mov MDS0, MDS1
mov MDS1, MDS2
mov MDS2, MDS3
mov MDS3, MDS4
mov MDS4, MDS5
mov MDS5, MDS6
mov MDS6, MDS7
mov MDS7, r0
*/
mov r0, MDS7
mov MDS7, MDS6
mov MDS6, MDS5
mov MDS5, MDS4
mov MDS4, MDS3
mov MDS3, MDS2
mov MDS2, MDS1
mov MDS1, MDS0
mov MDS0, r0
cpi r17, 7
breq 8f
inc r17
rjmp 2b
8:
mov r0, MDS7
mov MDS7, MDS6
mov MDS6, MDS5
mov MDS5, MDS4
mov MDS4, MDS3
mov MDS3, MDS2
mov MDS2, MDS1
mov MDS1, MDS0
mov MDS0, r0
cpi r16, 7
breq 9f
inc r16
rjmp 1b
9:
stack_free_large 64
pop r29
pop r28
pop_range 4, 17
ret
A = 23
B = 22
P = 24
gf256mul:
mov A, r24
clr P
1:
lsr A
breq 4f
brcc 2f
eor P, B
2:
lsl B
brcc 3f
eor B, r20
3:
rjmp 1b
4:
brcc 2f
eor P, B
2:
ret