a first look at aes assembly

This commit is contained in:
bg 2009-01-10 22:39:34 +00:00
parent a75cfaf73e
commit 5e274071cf
6 changed files with 760 additions and 11 deletions

452
aes_enc-asm.S Normal file
View File

@ -0,0 +1,452 @@
/* aes_enc-asm.S */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_enc-asm.S
* \email daniel.otte@rub.de
* \author Daniel Otte
* \date 2009-01-10
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
/*
* param a: r24
* param b: r22
* param reducer: r0
*/
A = 28
B = 29
P = 0
xREDUCER = 25
gf256mul:
clr P
1:
lsr A
breq 4f
brcc 2f
eor P, B
2:
lsl B
brcc 3f
eor B, xREDUCER
3:
rjmp 1b
4:
brcc 2f
eor P, B
2:
ret
.global aes256_enc
aes256_enc:
ldi r20, 14
rjmp aes_encrypt_core
.global aes192_enc
aes192_enc:
ldi r20, 12
rjmp aes_encrypt_core
.global aes128_enc
aes128_enc:
ldi r20, 10
/*
void aes_encrypt_core(aes_cipher_state_t* state, const aes_genctx_t* ks, uint8_t rounds)
*/
T0= 2
T1= 3
T2= 4
T3= 5
SBOX_SAVE0 = 6
SBOX_SAVE1 = 7
ST00 = 8
ST01 = 9
ST02 = 10
ST03 = 11
ST10 = 12
ST11 = 13
ST12 = 14
ST13 = 15
ST20 = 16
ST21 = 17
ST22 = 18
ST23 = 19
ST30 = 20
ST31 = 21
ST32 = 22
ST33 = 23
CTR = 24
/*
* param state: r24:r25
* param ks: r22:r23
* param rounds: r20
*/
.global aes_encrypt_core
aes_encrypt_core:
push_range 2, 17
push r28
push r29
push r24
push r25
movw r26, r22
movw r30, r24
mov CTR, r20
clt
.irp param,ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
ld \param, Z+
.endr
/* key whitening */
1:
.irp param,ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
ld r0, X+
eor \param, r0
.endr
brtc 2f
rjmp exit
2: dec CTR
brne 3f
set
3:
ldi r30, lo8(aes_sbox)
ldi r31, hi8(aes_sbox)
movw SBOX_SAVE0, r30
/* encryption loop */
/* SBOX substitution and shifting */
movw r30, SBOX_SAVE0
add r30, ST00
adc r31, r1
lpm ST00, Z
movw r30, SBOX_SAVE0
add r30, ST10
adc r31, r1
lpm ST10, Z
movw r30, SBOX_SAVE0
add r30, ST20
adc r31, r1
lpm ST20, Z
movw r30, SBOX_SAVE0
add r30, ST30
adc r31, r1
lpm ST30, Z
movw r30, SBOX_SAVE0
add r30, ST01
adc r31, r1
lpm T0, Z
movw r30, SBOX_SAVE0
add r30, ST11
adc r31, r1
lpm ST01, Z
movw r30, SBOX_SAVE0
add r30, ST21
adc r31, r1
lpm ST11, Z
movw r30, SBOX_SAVE0
add r30, ST31
adc r31, r1
lpm ST21, Z
mov ST31, T0
movw r30, SBOX_SAVE0
add r30, ST02
adc r31, r1
lpm T0, Z
movw r30, SBOX_SAVE0
add r30, ST12
adc r31, r1
lpm T1, Z
movw r30, SBOX_SAVE0
add r30, ST22
adc r31, r1
lpm ST02, Z
movw r30, SBOX_SAVE0
add r30, ST32
adc r31, r1
lpm ST12, Z
mov ST22, T0
mov ST32, T1
movw r30, SBOX_SAVE0
add r30, ST03
adc r31, r1
lpm T0, Z
movw r30, SBOX_SAVE0
add r30, ST13
adc r31, r1
lpm T1, Z
movw r30, SBOX_SAVE0
add r30, ST23
adc r31, r1
lpm T2, Z
movw r30, SBOX_SAVE0
add r30, ST33
adc r31, r1
lpm ST03, Z
mov ST13, T0
mov ST23, T1
mov ST33, T2
/* mixcols (or rows in our case) */
brtc 2f
rjmp 1b
2:
ldi xREDUCER, 0x1b /* load reducer */
ldi A, 2
mov B, ST00
rcall gf256mul
mov T0, r0
ldi A, 3
mov B, ST01
rcall gf256mul
eor T0, r0
eor T0, ST02
eor T0, ST03
mov T1, ST00
ldi A, 2
mov B, ST01
rcall gf256mul
eor T1, r0
ldi A, 3
mov B, ST02
rcall gf256mul
eor T1, r0
eor T1, ST03
mov T2, ST00
eor T2, ST01
ldi A, 2
mov B, ST02
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST03
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST00
rcall gf256mul
mov T3, r0
eor T3, ST01
eor T3, ST02
ldi A, 2
mov B, ST03
rcall gf256mul
eor T3, r0
mov ST00, T0
mov ST01, T1
mov ST02, T2
mov ST03, T3
ldi A, 2
mov B, ST10
rcall gf256mul
mov T0, r0
ldi A, 3
mov B, ST11
rcall gf256mul
eor T0, r0
eor T0, ST12
eor T0, ST13
mov T1, ST10
ldi A, 2
mov B, ST11
rcall gf256mul
eor T1, r0
ldi A, 3
mov B, ST12
rcall gf256mul
eor T1, r0
eor T1, ST13
mov T2, ST10
eor T2, ST11
ldi A, 2
mov B, ST12
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST13
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST10
rcall gf256mul
mov T3, r0
eor T3, ST11
eor T3, ST12
ldi A, 2
mov B, ST13
rcall gf256mul
eor T3, r0
mov ST10, T0
mov ST11, T1
mov ST12, T2
mov ST13, T3
ldi A, 2
mov B, ST20
rcall gf256mul
mov T0, r0
ldi A, 3
mov B, ST21
rcall gf256mul
eor T0, r0
eor T0, ST22
eor T0, ST23
mov T1, ST20
ldi A, 2
mov B, ST21
rcall gf256mul
eor T1, r0
ldi A, 3
mov B, ST22
rcall gf256mul
eor T1, r0
eor T1, ST23
mov T2, ST20
eor T2, ST21
ldi A, 2
mov B, ST22
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST23
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST20
rcall gf256mul
mov T3, r0
eor T3, ST21
eor T3, ST22
ldi A, 2
mov B, ST23
rcall gf256mul
eor T3, r0
mov ST20, T0
mov ST21, T1
mov ST22, T2
mov ST23, T3
ldi A, 2
mov B, ST30
rcall gf256mul
mov T0, r0
ldi A, 3
mov B, ST31
rcall gf256mul
eor T0, r0
eor T0, ST32
eor T0, ST33
mov T1, ST30
ldi A, 2
mov B, ST31
rcall gf256mul
eor T1, r0
ldi A, 3
mov B, ST32
rcall gf256mul
eor T1, r0
eor T1, ST33
mov T2, ST30
eor T2, ST31
ldi A, 2
mov B, ST32
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST33
rcall gf256mul
eor T2, r0
ldi A, 3
mov B, ST30
rcall gf256mul
mov T3, r0
eor T3, ST31
eor T3, ST32
ldi A, 2
mov B, ST33
rcall gf256mul
eor T3, r0
mov ST30, T0
mov ST31, T1
mov ST32, T2
mov ST33, T3
/* mix colums (rows) done */
/* add key*/
rjmp 1b
exit:
pop r31
pop r30
st Z+, ST00
st Z+, ST01
st Z+, ST02
st Z+, ST03
st Z+, ST10
st Z+, ST11
st Z+, ST12
st Z+, ST13
st Z+, ST20
st Z+, ST21
st Z+, ST22
st Z+, ST23
st Z+, ST30
st Z+, ST31
st Z+, ST32
st Z+, ST33
pop r29
pop r28
pop_range 2, 17
ret

225
aes_keyschedule-asm.S Normal file
View File

@ -0,0 +1,225 @@
/* aes_keyschedule-asm */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_keyschedule-asm.S
* \email daniel.otte@rub.de
* \author Daniel Otte
* \date 2009-01-09
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
.global aes256_init
aes256_init:
movw r20, r22
ldi r23, hi8(256)
ldi r22, lo8(256)
rjmp aes_init
.global aes192_init
aes192_init:
movw r20, r22
ldi r23, hi8(192)
ldi r22, lo8(192)
rjmp aes_init
.global aes128_init
aes128_init:
movw r20, r22
clr r23
ldi r22, 128
/*
void aes_init(const void* key, uint16_t keysize_b, aes_genctx_t* ctx){
uint8_t hi,i,nk, next_nk;
uint8_t rc=1;
uint8_t tmp[4];
nk=keysize_b>>5; / * 4, 6, 8 * /
hi=4*(nk+6+1);
memcpy(ctx, key, keysize_b/8);
next_nk = nk;
for(i=nk;i<hi;++i){
*((uint32_t*)tmp) = ((uint32_t*)(ctx->key[0].ks))[i-1];
if(i!=next_nk){
if(nk==8 && i%8==4){
tmp[0] = pgm_read_byte(aes_sbox+tmp[0]);
tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
}
} else {
next_nk += nk;
aes_rotword(tmp);
tmp[0] = pgm_read_byte(aes_sbox+tmp[0]);
tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
tmp[0] ^= rc;
rc<<=1;
}
((uint32_t*)(ctx->key[0].ks))[i] = ((uint32_t*)(ctx->key[0].ks))[i-nk]
^ *((uint32_t*)tmp);
}
}
*/
SBOX_SAVE0 = 14
SBOX_SAVE1 = 15
XRC = 17
NK = 22
C1 = 18
NEXT_NK = 19
HI = 23
T0 = 20
T1 = 21
T2 = 24
T3 = 25
/*
* param key: r24:r25
* param keysize_b: r22:r23
* param ctx: r20:r21
*/
.global aes_init
aes_init:
push_range 14, 17
push r28
push r29
movw r30, r20
movw r28, r20
movw r26, r24
lsr r23
ror r22
lsr r22
lsr r22 /* r22 contains keysize_b/8 */
mov C1, r22
1: /* copy key to ctx */
ld r0, X+
st Z+, r0
dec C1
brne 1b
lsr NK
lsr NK
bst NK,3 /* set T if NK==8 */
mov NEXT_NK, NK
mov HI, NK
subi HI, -7
lsl HI
lsl HI
movw r26, r30
sbiw r26, 4
mov C1, NK
ldi r30, lo8(aes_sbox)
ldi r31, hi8(aes_sbox)
movw SBOX_SAVE0, r30
ldi XRC, 1
1:
ld T0, X+
ld T1, X+
ld T2, X+
ld T3, X+
cp NEXT_NK, C1
breq 2f
brtc 5f
mov r16, C1
andi r16, 0x07
cpi r16, 0x04
brne 5f
movw r30, SBOX_SAVE0
add r30, T0
adc r31, r1
lpm T0, Z
movw r30, SBOX_SAVE0
add r30, T1
adc r31, r1
lpm T1, Z
movw r30, SBOX_SAVE0
add r30, T2
adc r31, r1
lpm T2, Z
movw r30, SBOX_SAVE0
add r30, T3
adc r31, r1
lpm T3, Z
rjmp 5f
2:
add NEXT_NK, NK
movw r30, SBOX_SAVE0
add r30, T0
adc r31, r1
lpm r16, Z
movw r30, SBOX_SAVE0
add r30, T1
adc r31, r1
lpm T0, Z
movw r30, SBOX_SAVE0
add r30, T2
adc r31, r1
lpm T1, Z
movw r30, SBOX_SAVE0
add r30, T3
adc r31, r1
lpm T2, Z
mov T3, r16
eor T0, XRC
lsl XRC
brcc 3f
ldi XRC, 0x1b
3:
5:
movw r30, r26
ld r0, Y+
eor r0, T0
st Z+, r0
ld r0, Y+
eor r0 ,T1
st Z+, r0
ld r0, Y+
eor r0, T2
st Z+, r0
ld r0, Y+
eor r0, T3
st Z+, r0
/*
st Z+, T0
st Z+, T1
st Z+, T2
st Z+, T3
*/
inc C1
cp C1, HI
breq 6f
rjmp 1b
6:
clt
pop r29
pop r28
pop_range 14, 17
ret

View File

@ -1,7 +1,7 @@
/* aes_keyschedule.c */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -26,7 +26,6 @@
*/
#include <stdint.h>
#include "gf256mul.h"
#include "aes.h"
#include "aes_keyschedule.h"
#include "aes_sbox.h"
@ -43,9 +42,13 @@ void aes_rotword(void* a){
((uint8_t*)a)[3] = t;
}
uint8_t rc_tab[] PROGMEM = { 0x01, 0x02, 0x04, 0x08,
0x10, 0x20, 0x40, 0x80,
0x1b, 0x36 };
void aes_init(const void* key, uint16_t keysize_b, aes_genctx_t* ctx){
uint8_t hi,i,nk, next_nk;
uint8_t rc=1;
uint8_t rc=0;
uint8_t tmp[4];
nk=keysize_b>>5; /* 4, 6, 8 */
hi=4*(nk+6+1);
@ -67,8 +70,8 @@ void aes_init(const void* key, uint16_t keysize_b, aes_genctx_t* ctx){
tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
tmp[0] ^= rc;
rc = gf256mul(2,rc,0x1b);
tmp[0] ^= pgm_read_byte(rc_tab+rc);
rc++;
}
((uint32_t*)(ctx->key[0].ks))[i] = ((uint32_t*)(ctx->key[0].ks))[i-nk]
^ *((uint32_t*)tmp);

View File

@ -1,14 +1,13 @@
# Makefile for AES
ALGO_NAME := AES_C
ALGO_NAME := AES
# comment out the following line for removement of AES from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := aes_enc.o aes_dec.o aes_sbox.o aes_invsbox.o aes.o \
aes_keyschedule.o gf256mul.o \
aes128_enc.o aes128_dec.o aes192_enc.o aes192_dec.o \
aes256_enc.o aes256_dec.o
$(ALGO_NAME)_OBJ := aes_enc-asm.o aes_dec.o aes_sbox.o aes_invsbox.o aes.o \
aes_keyschedule-asm.o gf256mul.o \
aes128_dec.o aes192_dec.o aes256_dec.o
$(ALGO_NAME)_TEST_BIN := main-aes-test.o debug.o uart.o serial-tools.o \
nessie_bc_test.o nessie_common.o cli.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie

16
mkfiles/aes_c.mk Normal file
View File

@ -0,0 +1,16 @@
# Makefile for AES
ALGO_NAME := AES_C
# comment out the following line for removement of AES from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := aes_enc.o aes_dec.o aes_sbox.o aes_invsbox.o aes.o \
aes_keyschedule.o gf256mul.o \
aes128_enc.o aes128_dec.o aes192_enc.o aes192_dec.o \
aes256_enc.o aes256_dec.o
$(ALGO_NAME)_TEST_BIN := main-aes-test.o debug.o uart.o serial-tools.o \
nessie_bc_test.o nessie_common.o cli.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie
$(ALGO_NAME)_PERFORMANCE_TEST := performance

View File

@ -94,7 +94,7 @@ void testrun_test_aes(void){
}
void testrun_testkey_aes(void){
void testrun_testkey_aes128(void){
uint8_t key[16] = { 0x2b, 0x7e, 0x15, 0x16,
0x28, 0xae, 0xd2, 0xa6,
0xab, 0xf7, 0x15, 0x88,
@ -113,6 +113,60 @@ void testrun_testkey_aes(void){
}
}
void testrun_testkey_aes192(void){
uint8_t key[24] = { 0x8e, 0x73, 0xb0, 0xf7,
0xda, 0x0e, 0x64, 0x52,
0xc8, 0x10, 0xf3, 0x2b,
0x80, 0x90, 0x79, 0xe5,
0x62, 0xf8, 0xea, 0xd2,
0x52, 0x2c, 0x6b, 0x7b};
aes192_ctx_t ctx;
uint8_t i;
memset(&ctx, 0, sizeof(aes192_ctx_t));
aes192_init(key, &ctx);
uart_putstr_P(PSTR("\r\n\r\n keyschedule test (FIPS 197):\r\n key: "));
uart_hexdump(key, 24);
for(i=0; i<13; ++i){
uart_putstr_P(PSTR("\r\n index: "));
uart_putc('0'+i/10);
uart_putc('0'+i%10);
uart_putstr_P(PSTR(" roundkey "));
uart_hexdump(ctx.key[i].ks, 16);
}
}
void testrun_testkey_aes256(void){
uint8_t key[32] = { 0x60, 0x3d, 0xeb, 0x10,
0x15, 0xca, 0x71, 0xbe,
0x2b, 0x73, 0xae, 0xf0,
0x85, 0x7d, 0x77, 0x81,
0x1f, 0x35, 0x2c, 0x07,
0x3b, 0x61, 0x08, 0xd7,
0x2d, 0x98, 0x10, 0xa3,
0x09, 0x14, 0xdf, 0xf4};
aes256_ctx_t ctx;
uint8_t i;
memset(&ctx, 0, sizeof(aes256_ctx_t));
aes256_init(key, &ctx);
uart_putstr_P(PSTR("\r\n\r\n keyschedule test (FIPS 197):\r\n key: "));
uart_hexdump(key, 32);
for(i=0; i<15; ++i){
uart_putstr_P(PSTR("\r\n index: "));
uart_putc('0'+i/10);
uart_putc('0'+i%10);
uart_putstr_P(PSTR(" roundkey "));
uart_hexdump(ctx.key[i].ks, 16);
}
}
void testrun_testkey_aes(void){
testrun_testkey_aes128();
testrun_testkey_aes192();
testrun_testkey_aes256();
}
/*****************************************************************************/
void testrun_performance_aes128(void){
uint64_t t;
char str[16];