avr-crypto-lib/aes/aes_dec-asm_faster.S

458 lines
9.2 KiB
ArmAsm

/* aes_dec-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_dec-asm.S
* \email daniel.otte@rub.de
* \author Daniel Otte
* \date 2009-01-10
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
A = 28
B = 29
P = 0
xREDUCER = 25
.global aes256_dec
aes256_dec:
ldi r20, 14
rjmp aes_decrypt_core
.global aes192_dec
aes192_dec:
ldi r20, 12
rjmp aes_decrypt_core
.global aes128_dec
aes128_dec:
ldi r20, 10
/*
void aes_decrypt_core(aes_cipher_state_t *state, const aes_genctx_t *ks, uint8_t rounds)
*/
T0= 2
T1= 3
T2= 4
T3= 5
T4 = 6
T5 = 7
ST00 = 8
ST01 = 9
ST02 = 10
ST03 = 11
ST10 = 12
ST11 = 13
ST12 = 14
ST13 = 15
ST20 = 16
ST21 = 17
ST22 = 18
ST23 = 19
ST30 = 20
ST31 = 21
ST32 = 22
ST33 = 23
CTR = 24
/*
* param state: r24:r25
* param ks: r22:r23
* param rounds: r20
*/
.global aes_decrypt_core
aes_decrypt_core:
push_range 2, 17
push r28
push r29
push r24
push r25
movw r26, r22
movw r30, r24
mov CTR, r20
inc r20
swap r20 /* r20*16 */
add r26, r20
adc r27, r1
clt
.irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
ld \param, Z+
.endr
ldi xREDUCER, 0x1b /* load reducer */
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
ld r0, -X
eor \param, r0
.endr
1:
dec CTR
brne 2f
set
2:
ldi r31, hi8(aes_invsbox)
/* substitute and invShift */
.irp param, ST00, ST10, ST20, ST30
mov r30, \param
lpm \param, Z
.endr
mov r30, ST31
lpm T0, Z
mov r30, ST21
lpm ST31, Z
mov r30, ST11
lpm ST21, Z
mov r30, ST01
lpm ST11, Z
mov ST01, T0
mov r30, ST32
lpm T0, Z
mov r30, ST22
lpm T1,Z
mov r30, ST12
lpm ST32, Z
mov r30, ST02
lpm ST22, Z
mov ST12, T0
mov ST02, T1
mov r30, ST03
lpm T0, Z
mov r30, ST13
lpm ST03, Z
mov r30, ST23
lpm ST13, Z
mov r30, ST33
lpm ST23, Z
mov ST33, T0
/* key addition */
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
ld r0, -X
eor \param, r0
.endr
brtc 2f
exit:
pop r31
pop r30
st Z+, ST00
st Z+, ST01
st Z+, ST02
st Z+, ST03
st Z+, ST10
st Z+, ST11
st Z+, ST12
st Z+, ST13
st Z+, ST20
st Z+, ST21
st Z+, ST22
st Z+, ST23
st Z+, ST30
st Z+, ST31
st Z+, ST32
st Z+, ST33
pop r29
pop r28
pop_range 2, 17
ret
2:
/* inv column (row) mixing*/
/* invMixCol (Row) 1 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST03
eor T0, ST02 ; T0 = t
mov T1, ST00
eor T1, ST01 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST02
eor r30, ST00
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST03
eor r30, ST01
lpm P, Z ; T2 = v
eor T2, P
/* now the big move */
mov T4, ST00
eor T4, ST03
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST03, T4
mov T4, ST02
eor T4, ST01
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST01, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST02, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST00, T1
/* invMixCol (Row) 2 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST13
eor T0, ST12 ; T0 = t
mov T1, ST10
eor T1, ST11 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST12
eor r30, ST10
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST13
eor r30, ST11
lpm P, Z
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST10
eor T4, ST13
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST13, T4
mov T4, ST12
eor T4, ST11
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST11, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST12, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST10, T1
/* invMixCol (Row) 2 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST23
eor T0, ST22 ; T0 = t
mov T1, ST20
eor T1, ST21 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST22
eor r30, ST20
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST23
eor r30, ST21
lpm P, Z
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST20
eor T4, ST23
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST23, T4
mov T4, ST22
eor T4, ST21
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST21, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST22, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST20, T1
/* invMixCol (Row) 3 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST33
eor T0, ST32 ; T0 = t
mov T1, ST30
eor T1, ST31 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST32
eor r30, ST30
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST33
eor r30, ST31
lpm P, Z
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST30
eor T4, ST33
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST33, T4
mov T4, ST32
eor T4, ST31
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST31, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST32, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST30, T1
rjmp 1b
.balign 256
lut_gf256mul_0x09:
.byte 0x00, 0x09, 0x12, 0x1B, 0x24, 0x2D, 0x36, 0x3F
.byte 0x48, 0x41, 0x5A, 0x53, 0x6C, 0x65, 0x7E, 0x77
.byte 0x90, 0x99, 0x82, 0x8B, 0xB4, 0xBD, 0xA6, 0xAF
.byte 0xD8, 0xD1, 0xCA, 0xC3, 0xFC, 0xF5, 0xEE, 0xE7
.byte 0x3B, 0x32, 0x29, 0x20, 0x1F, 0x16, 0x0D, 0x04
.byte 0x73, 0x7A, 0x61, 0x68, 0x57, 0x5E, 0x45, 0x4C
.byte 0xAB, 0xA2, 0xB9, 0xB0, 0x8F, 0x86, 0x9D, 0x94
.byte 0xE3, 0xEA, 0xF1, 0xF8, 0xC7, 0xCE, 0xD5, 0xDC
.byte 0x76, 0x7F, 0x64, 0x6D, 0x52, 0x5B, 0x40, 0x49
.byte 0x3E, 0x37, 0x2C, 0x25, 0x1A, 0x13, 0x08, 0x01
.byte 0xE6, 0xEF, 0xF4, 0xFD, 0xC2, 0xCB, 0xD0, 0xD9
.byte 0xAE, 0xA7, 0xBC, 0xB5, 0x8A, 0x83, 0x98, 0x91
.byte 0x4D, 0x44, 0x5F, 0x56, 0x69, 0x60, 0x7B, 0x72
.byte 0x05, 0x0C, 0x17, 0x1E, 0x21, 0x28, 0x33, 0x3A
.byte 0xDD, 0xD4, 0xCF, 0xC6, 0xF9, 0xF0, 0xEB, 0xE2
.byte 0x95, 0x9C, 0x87, 0x8E, 0xB1, 0xB8, 0xA3, 0xAA
.byte 0xEC, 0xE5, 0xFE, 0xF7, 0xC8, 0xC1, 0xDA, 0xD3
.byte 0xA4, 0xAD, 0xB6, 0xBF, 0x80, 0x89, 0x92, 0x9B
.byte 0x7C, 0x75, 0x6E, 0x67, 0x58, 0x51, 0x4A, 0x43
.byte 0x34, 0x3D, 0x26, 0x2F, 0x10, 0x19, 0x02, 0x0B
.byte 0xD7, 0xDE, 0xC5, 0xCC, 0xF3, 0xFA, 0xE1, 0xE8
.byte 0x9F, 0x96, 0x8D, 0x84, 0xBB, 0xB2, 0xA9, 0xA0
.byte 0x47, 0x4E, 0x55, 0x5C, 0x63, 0x6A, 0x71, 0x78
.byte 0x0F, 0x06, 0x1D, 0x14, 0x2B, 0x22, 0x39, 0x30
.byte 0x9A, 0x93, 0x88, 0x81, 0xBE, 0xB7, 0xAC, 0xA5
.byte 0xD2, 0xDB, 0xC0, 0xC9, 0xF6, 0xFF, 0xE4, 0xED
.byte 0x0A, 0x03, 0x18, 0x11, 0x2E, 0x27, 0x3C, 0x35
.byte 0x42, 0x4B, 0x50, 0x59, 0x66, 0x6F, 0x74, 0x7D
.byte 0xA1, 0xA8, 0xB3, 0xBA, 0x85, 0x8C, 0x97, 0x9E
.byte 0xE9, 0xE0, 0xFB, 0xF2, 0xCD, 0xC4, 0xDF, 0xD6
.byte 0x31, 0x38, 0x23, 0x2A, 0x15, 0x1C, 0x07, 0x0E
.byte 0x79, 0x70, 0x6B, 0x62, 0x5D, 0x54, 0x4F, 0x46
lut_gf256mul_0x04:
.byte 0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C
.byte 0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C
.byte 0x40, 0x44, 0x48, 0x4C, 0x50, 0x54, 0x58, 0x5C
.byte 0x60, 0x64, 0x68, 0x6C, 0x70, 0x74, 0x78, 0x7C
.byte 0x80, 0x84, 0x88, 0x8C, 0x90, 0x94, 0x98, 0x9C
.byte 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC
.byte 0xC0, 0xC4, 0xC8, 0xCC, 0xD0, 0xD4, 0xD8, 0xDC
.byte 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC
.byte 0x1B, 0x1F, 0x13, 0x17, 0x0B, 0x0F, 0x03, 0x07
.byte 0x3B, 0x3F, 0x33, 0x37, 0x2B, 0x2F, 0x23, 0x27
.byte 0x5B, 0x5F, 0x53, 0x57, 0x4B, 0x4F, 0x43, 0x47
.byte 0x7B, 0x7F, 0x73, 0x77, 0x6B, 0x6F, 0x63, 0x67
.byte 0x9B, 0x9F, 0x93, 0x97, 0x8B, 0x8F, 0x83, 0x87
.byte 0xBB, 0xBF, 0xB3, 0xB7, 0xAB, 0xAF, 0xA3, 0xA7
.byte 0xDB, 0xDF, 0xD3, 0xD7, 0xCB, 0xCF, 0xC3, 0xC7
.byte 0xFB, 0xFF, 0xF3, 0xF7, 0xEB, 0xEF, 0xE3, 0xE7
.byte 0x36, 0x32, 0x3E, 0x3A, 0x26, 0x22, 0x2E, 0x2A
.byte 0x16, 0x12, 0x1E, 0x1A, 0x06, 0x02, 0x0E, 0x0A
.byte 0x76, 0x72, 0x7E, 0x7A, 0x66, 0x62, 0x6E, 0x6A
.byte 0x56, 0x52, 0x5E, 0x5A, 0x46, 0x42, 0x4E, 0x4A
.byte 0xB6, 0xB2, 0xBE, 0xBA, 0xA6, 0xA2, 0xAE, 0xAA
.byte 0x96, 0x92, 0x9E, 0x9A, 0x86, 0x82, 0x8E, 0x8A
.byte 0xF6, 0xF2, 0xFE, 0xFA, 0xE6, 0xE2, 0xEE, 0xEA
.byte 0xD6, 0xD2, 0xDE, 0xDA, 0xC6, 0xC2, 0xCE, 0xCA
.byte 0x2D, 0x29, 0x25, 0x21, 0x3D, 0x39, 0x35, 0x31
.byte 0x0D, 0x09, 0x05, 0x01, 0x1D, 0x19, 0x15, 0x11
.byte 0x6D, 0x69, 0x65, 0x61, 0x7D, 0x79, 0x75, 0x71
.byte 0x4D, 0x49, 0x45, 0x41, 0x5D, 0x59, 0x55, 0x51
.byte 0xAD, 0xA9, 0xA5, 0xA1, 0xBD, 0xB9, 0xB5, 0xB1
.byte 0x8D, 0x89, 0x85, 0x81, 0x9D, 0x99, 0x95, 0x91
.byte 0xED, 0xE9, 0xE5, 0xE1, 0xFD, 0xF9, 0xF5, 0xF1
.byte 0xCD, 0xC9, 0xC5, 0xC1, 0xDD, 0xD9, 0xD5, 0xD1