avr-crypto-lib/aes/aes_dec-asm.S

426 lines
6.0 KiB
ArmAsm

/* aes_dec-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_dec-asm.S
* \email bg@nerilex.org
* \author Daniel Otte
* \date 2009-01-10
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
A = 28
B = 29
P = 0
xREDUCER = 25
gf256mul:
clr P
1:
lsr A
breq 4f
brcc 2f
eor P, B
2:
lsl B
brcc 3f
eor B, xREDUCER
3:
rjmp 1b
4:
brcc 2f
eor P, B
2:
ret
.global aes256_dec
aes256_dec:
ldi r20, 14
rjmp aes_decrypt_core
.global aes192_dec
aes192_dec:
ldi r20, 12
rjmp aes_decrypt_core
.global aes128_dec
aes128_dec:
ldi r20, 10
/*
void aes_decrypt_core(aes_cipher_state_t *state, const aes_genctx_t *ks, uint8_t rounds)
*/
T0= 2
T1= 3
T2= 4
T3= 5
T4 = 6
T5 = 7
ST00 = 8
ST01 = 9
ST02 = 10
ST03 = 11
ST10 = 12
ST11 = 13
ST12 = 14
ST13 = 15
ST20 = 16
ST21 = 17
ST22 = 18
ST23 = 19
ST30 = 20
ST31 = 21
ST32 = 22
ST33 = 23
CTR = 24
/*
* param state: r24:r25
* param ks: r22:r23
* param rounds: r20
*/
.global aes_decrypt_core
aes_decrypt_core:
push_range 2, 17
push r28
push r29
push r24
push r25
movw r26, r22
movw r30, r24
mov CTR, r20
inc r20
swap r20 /* r20*16 */
add r26, r20
adc r27, r1
clt
; ldi CTR, 2
.irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
ld \param, Z+
.endr
ldi xREDUCER, 0x1b /* load reducer */
ldi r31, hi8(aes_invsbox)
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
ld r0, -X
eor \param, r0
.endr
1:
dec CTR
brne 2f
set
2:
/* substitute and invShift */
.irp param, ST00, ST10, ST20, ST30
mov r30, \param
lpm \param, Z
.endr
mov r30, ST31
lpm T0, Z
mov r30, ST21
lpm ST31, Z
mov r30, ST11
lpm ST21, Z
mov r30, ST01
lpm ST11, Z
mov ST01, T0
mov r30, ST32
lpm T0, Z
mov r30, ST22
lpm T1,Z
mov r30, ST12
lpm ST32, Z
mov r30, ST02
lpm ST22, Z
mov ST12, T0
mov ST02, T1
mov r30, ST03
lpm T0, Z
mov r30, ST13
lpm ST03, Z
mov r30, ST23
lpm ST13, Z
mov r30, ST33
lpm ST23, Z
mov ST33, T0
/* key addition */
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
ld r0, -X
eor \param, r0
.endr
brtc 2f
exit:
pop r31
pop r30
st Z+, ST00
st Z+, ST01
st Z+, ST02
st Z+, ST03
st Z+, ST10
st Z+, ST11
st Z+, ST12
st Z+, ST13
st Z+, ST20
st Z+, ST21
st Z+, ST22
st Z+, ST23
st Z+, ST30
st Z+, ST31
st Z+, ST32
st Z+, ST33
pop r29
pop r28
pop_range 2, 17
ret
2:
/* inv column (row) mixing*/
/* invMixCol (Row) 1 */
/* preparing */
mov T0, ST03
eor T0, ST02 ; T0 = t
mov T1, ST00
eor T1, ST01 ; T1 = u
mov T2, T0
eor T2, T1
mov B, T2
ldi A, 0x08
rcall gf256mul
eor T2, P ; T2 = v'
mov B, ST02
eor B, ST00
ldi A, 0x04
rcall gf256mul
mov T3, P
eor T3, T2; T3 = w
mov B, ST03
eor B, ST01
ldi A, 0x04
rcall gf256mul
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST00
eor T4, ST03
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST03, T4
mov T4, ST02
eor T4, ST01
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST01, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST02, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST00, T1
/* invMixCol (Row) 2 */
/* preparing */
mov T0, ST13
eor T0, ST12 ; T0 = t
mov T1, ST10
eor T1, ST11 ; T1 = u
mov T2, T0
eor T2, T1
mov B, T2
ldi A, 0x08
rcall gf256mul
eor T2, P ; T2 = v'
mov B, ST12
eor B, ST10
ldi A, 0x04
rcall gf256mul
mov T3, P
eor T3, T2; T3 = w
mov B, ST13
eor B, ST11
ldi A, 0x04
rcall gf256mul
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST10
eor T4, ST13
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST13, T4
mov T4, ST12
eor T4, ST11
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST11, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST12, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST10, T1
/* invMixCol (Row) 2 */
/* preparing */
mov T0, ST23
eor T0, ST22 ; T0 = t
mov T1, ST20
eor T1, ST21 ; T1 = u
mov T2, T0
eor T2, T1
mov B, T2
ldi A, 0x08
rcall gf256mul
eor T2, P ; T2 = v'
mov B, ST22
eor B, ST20
ldi A, 0x04
rcall gf256mul
mov T3, P
eor T3, T2; T3 = w
mov B, ST23
eor B, ST21
ldi A, 0x04
rcall gf256mul
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST20
eor T4, ST23
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST23, T4
mov T4, ST22
eor T4, ST21
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST21, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST22, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST20, T1
/* invMixCol (Row) 3 */
/* preparing */
mov T0, ST33
eor T0, ST32 ; T0 = t
mov T1, ST30
eor T1, ST31 ; T1 = u
mov T2, T0
eor T2, T1
mov B, T2
ldi A, 0x08
rcall gf256mul
eor T2, P ; T2 = v'
mov B, ST32
eor B, ST30
ldi A, 0x04
rcall gf256mul
mov T3, P
eor T3, T2; T3 = w
mov B, ST33
eor B, ST31
ldi A, 0x04
rcall gf256mul
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST30
eor T4, ST33
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST33, T4
mov T4, ST32
eor T4, ST31
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST31, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST32, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST30, T1
rjmp 1b