From 5e274071cfce142ba67387bea5ad017b42b8001c Mon Sep 17 00:00:00 2001 From: bg Date: Sat, 10 Jan 2009 22:39:34 +0000 Subject: [PATCH] a first look at aes assembly --- aes_enc-asm.S | 452 +++++++++++++++++++++++++++++++++++++++ aes_keyschedule-asm.S | 225 +++++++++++++++++++ aes_keyschedule.c | 13 +- mkfiles/aes.mk | 9 +- mkfiles/aes_c.mk | 16 ++ test_src/main-aes-test.c | 56 ++++- 6 files changed, 760 insertions(+), 11 deletions(-) create mode 100644 aes_enc-asm.S create mode 100644 aes_keyschedule-asm.S create mode 100644 mkfiles/aes_c.mk diff --git a/aes_enc-asm.S b/aes_enc-asm.S new file mode 100644 index 0000000..ed04b6f --- /dev/null +++ b/aes_enc-asm.S @@ -0,0 +1,452 @@ +/* aes_enc-asm.S */ +/* + This file is part of the Crypto-avr-lib/microcrypt-lib. + Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file aes_enc-asm.S + * \email daniel.otte@rub.de + * \author Daniel Otte + * \date 2009-01-10 + * \license GPLv3 or later + * + */ + +#include "avr-asm-macros.S" + + +/* + * param a: r24 + * param b: r22 + * param reducer: r0 + */ +A = 28 +B = 29 +P = 0 +xREDUCER = 25 + +gf256mul: + clr P +1: + lsr A + breq 4f + brcc 2f + eor P, B +2: + lsl B + brcc 3f + eor B, xREDUCER +3: + rjmp 1b +4: + brcc 2f + eor P, B +2: + ret + +.global aes256_enc +aes256_enc: + ldi r20, 14 + rjmp aes_encrypt_core + +.global aes192_enc +aes192_enc: + ldi r20, 12 + rjmp aes_encrypt_core + +.global aes128_enc +aes128_enc: + ldi r20, 10 + + +/* + void aes_encrypt_core(aes_cipher_state_t* state, const aes_genctx_t* ks, uint8_t rounds) +*/ +T0= 2 +T1= 3 +T2= 4 +T3= 5 +SBOX_SAVE0 = 6 +SBOX_SAVE1 = 7 +ST00 = 8 +ST01 = 9 +ST02 = 10 +ST03 = 11 +ST10 = 12 +ST11 = 13 +ST12 = 14 +ST13 = 15 +ST20 = 16 +ST21 = 17 +ST22 = 18 +ST23 = 19 +ST30 = 20 +ST31 = 21 +ST32 = 22 +ST33 = 23 +CTR = 24 +/* + * param state: r24:r25 + * param ks: r22:r23 + * param rounds: r20 + */ +.global aes_encrypt_core +aes_encrypt_core: + push_range 2, 17 + push r28 + push r29 + push r24 + push r25 + movw r26, r22 + movw r30, r24 + mov CTR, r20 + clt + + .irp param,ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33 + ld \param, Z+ + .endr + /* key whitening */ +1: + .irp param,ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33 + ld r0, X+ + eor \param, r0 + .endr + + brtc 2f + rjmp exit +2: dec CTR + brne 3f + set +3: + + ldi r30, lo8(aes_sbox) + ldi r31, hi8(aes_sbox) + movw SBOX_SAVE0, r30 + /* encryption loop */ + + /* SBOX substitution and shifting */ + movw r30, SBOX_SAVE0 + add r30, ST00 + adc r31, r1 + lpm ST00, Z + movw r30, SBOX_SAVE0 + add r30, ST10 + adc r31, r1 + lpm ST10, Z + movw r30, SBOX_SAVE0 + add r30, ST20 + adc r31, r1 + lpm ST20, Z + movw r30, SBOX_SAVE0 + add r30, ST30 + adc r31, r1 + lpm ST30, Z + + movw r30, SBOX_SAVE0 + add r30, ST01 + adc r31, r1 + lpm T0, Z + movw r30, SBOX_SAVE0 + add r30, ST11 + adc r31, r1 + lpm ST01, Z + movw r30, SBOX_SAVE0 + add r30, ST21 + adc r31, r1 + lpm ST11, Z + movw r30, SBOX_SAVE0 + add r30, ST31 + adc r31, r1 + lpm ST21, Z + mov ST31, T0 + + movw r30, SBOX_SAVE0 + add r30, ST02 + adc r31, r1 + lpm T0, Z + movw r30, SBOX_SAVE0 + add r30, ST12 + adc r31, r1 + lpm T1, Z + movw r30, SBOX_SAVE0 + add r30, ST22 + adc r31, r1 + lpm ST02, Z + movw r30, SBOX_SAVE0 + add r30, ST32 + adc r31, r1 + lpm ST12, Z + mov ST22, T0 + mov ST32, T1 + + movw r30, SBOX_SAVE0 + add r30, ST03 + adc r31, r1 + lpm T0, Z + movw r30, SBOX_SAVE0 + add r30, ST13 + adc r31, r1 + lpm T1, Z + movw r30, SBOX_SAVE0 + add r30, ST23 + adc r31, r1 + lpm T2, Z + movw r30, SBOX_SAVE0 + add r30, ST33 + adc r31, r1 + lpm ST03, Z + mov ST13, T0 + mov ST23, T1 + mov ST33, T2 + + /* mixcols (or rows in our case) */ + brtc 2f + rjmp 1b +2: + ldi xREDUCER, 0x1b /* load reducer */ + + ldi A, 2 + mov B, ST00 + rcall gf256mul + mov T0, r0 + ldi A, 3 + mov B, ST01 + rcall gf256mul + eor T0, r0 + eor T0, ST02 + eor T0, ST03 + + mov T1, ST00 + ldi A, 2 + mov B, ST01 + rcall gf256mul + eor T1, r0 + ldi A, 3 + mov B, ST02 + rcall gf256mul + eor T1, r0 + eor T1, ST03 + + mov T2, ST00 + eor T2, ST01 + ldi A, 2 + mov B, ST02 + rcall gf256mul + eor T2, r0 + ldi A, 3 + mov B, ST03 + rcall gf256mul + eor T2, r0 + + ldi A, 3 + mov B, ST00 + rcall gf256mul + mov T3, r0 + eor T3, ST01 + eor T3, ST02 + ldi A, 2 + mov B, ST03 + rcall gf256mul + eor T3, r0 + + mov ST00, T0 + mov ST01, T1 + mov ST02, T2 + mov ST03, T3 + + ldi A, 2 + mov B, ST10 + rcall gf256mul + mov T0, r0 + ldi A, 3 + mov B, ST11 + rcall gf256mul + eor T0, r0 + eor T0, ST12 + eor T0, ST13 + + mov T1, ST10 + ldi A, 2 + mov B, ST11 + rcall gf256mul + eor T1, r0 + ldi A, 3 + mov B, ST12 + rcall gf256mul + eor T1, r0 + eor T1, ST13 + + mov T2, ST10 + eor T2, ST11 + ldi A, 2 + mov B, ST12 + rcall gf256mul + eor T2, r0 + ldi A, 3 + mov B, ST13 + rcall gf256mul + eor T2, r0 + + ldi A, 3 + mov B, ST10 + rcall gf256mul + mov T3, r0 + eor T3, ST11 + eor T3, ST12 + ldi A, 2 + mov B, ST13 + rcall gf256mul + eor T3, r0 + + mov ST10, T0 + mov ST11, T1 + mov ST12, T2 + mov ST13, T3 + + ldi A, 2 + mov B, ST20 + rcall gf256mul + mov T0, r0 + ldi A, 3 + mov B, ST21 + rcall gf256mul + eor T0, r0 + eor T0, ST22 + eor T0, ST23 + + mov T1, ST20 + ldi A, 2 + mov B, ST21 + rcall gf256mul + eor T1, r0 + ldi A, 3 + mov B, ST22 + rcall gf256mul + eor T1, r0 + eor T1, ST23 + + mov T2, ST20 + eor T2, ST21 + ldi A, 2 + mov B, ST22 + rcall gf256mul + eor T2, r0 + ldi A, 3 + mov B, ST23 + rcall gf256mul + eor T2, r0 + + ldi A, 3 + mov B, ST20 + rcall gf256mul + mov T3, r0 + eor T3, ST21 + eor T3, ST22 + ldi A, 2 + mov B, ST23 + rcall gf256mul + eor T3, r0 + + mov ST20, T0 + mov ST21, T1 + mov ST22, T2 + mov ST23, T3 + + ldi A, 2 + mov B, ST30 + rcall gf256mul + mov T0, r0 + ldi A, 3 + mov B, ST31 + rcall gf256mul + eor T0, r0 + eor T0, ST32 + eor T0, ST33 + + mov T1, ST30 + ldi A, 2 + mov B, ST31 + rcall gf256mul + eor T1, r0 + ldi A, 3 + mov B, ST32 + rcall gf256mul + eor T1, r0 + eor T1, ST33 + + mov T2, ST30 + eor T2, ST31 + ldi A, 2 + mov B, ST32 + rcall gf256mul + eor T2, r0 + ldi A, 3 + mov B, ST33 + rcall gf256mul + eor T2, r0 + + ldi A, 3 + mov B, ST30 + rcall gf256mul + mov T3, r0 + eor T3, ST31 + eor T3, ST32 + ldi A, 2 + mov B, ST33 + rcall gf256mul + eor T3, r0 + + mov ST30, T0 + mov ST31, T1 + mov ST32, T2 + mov ST33, T3 + + /* mix colums (rows) done */ + + /* add key*/ + rjmp 1b + +exit: + pop r31 + pop r30 + st Z+, ST00 + st Z+, ST01 + st Z+, ST02 + st Z+, ST03 + st Z+, ST10 + st Z+, ST11 + st Z+, ST12 + st Z+, ST13 + st Z+, ST20 + st Z+, ST21 + st Z+, ST22 + st Z+, ST23 + st Z+, ST30 + st Z+, ST31 + st Z+, ST32 + st Z+, ST33 + + pop r29 + pop r28 + pop_range 2, 17 + ret + + + + + + + diff --git a/aes_keyschedule-asm.S b/aes_keyschedule-asm.S new file mode 100644 index 0000000..dab7104 --- /dev/null +++ b/aes_keyschedule-asm.S @@ -0,0 +1,225 @@ +/* aes_keyschedule-asm */ +/* + This file is part of the Crypto-avr-lib/microcrypt-lib. + Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file aes_keyschedule-asm.S + * \email daniel.otte@rub.de + * \author Daniel Otte + * \date 2009-01-09 + * \license GPLv3 or later + * + */ + +#include "avr-asm-macros.S" + +.global aes256_init +aes256_init: + movw r20, r22 + ldi r23, hi8(256) + ldi r22, lo8(256) + rjmp aes_init + +.global aes192_init +aes192_init: + movw r20, r22 + ldi r23, hi8(192) + ldi r22, lo8(192) + rjmp aes_init + +.global aes128_init +aes128_init: + movw r20, r22 + clr r23 + ldi r22, 128 + +/* +void aes_init(const void* key, uint16_t keysize_b, aes_genctx_t* ctx){ + uint8_t hi,i,nk, next_nk; + uint8_t rc=1; + uint8_t tmp[4]; + nk=keysize_b>>5; / * 4, 6, 8 * / + hi=4*(nk+6+1); + memcpy(ctx, key, keysize_b/8); + next_nk = nk; + for(i=nk;ikey[0].ks))[i-1]; + if(i!=next_nk){ + if(nk==8 && i%8==4){ + tmp[0] = pgm_read_byte(aes_sbox+tmp[0]); + tmp[1] = pgm_read_byte(aes_sbox+tmp[1]); + tmp[2] = pgm_read_byte(aes_sbox+tmp[2]); + tmp[3] = pgm_read_byte(aes_sbox+tmp[3]); + } + } else { + next_nk += nk; + aes_rotword(tmp); + tmp[0] = pgm_read_byte(aes_sbox+tmp[0]); + tmp[1] = pgm_read_byte(aes_sbox+tmp[1]); + tmp[2] = pgm_read_byte(aes_sbox+tmp[2]); + tmp[3] = pgm_read_byte(aes_sbox+tmp[3]); + tmp[0] ^= rc; + rc<<=1; + } + ((uint32_t*)(ctx->key[0].ks))[i] = ((uint32_t*)(ctx->key[0].ks))[i-nk] + ^ *((uint32_t*)tmp); + } +} +*/ + +SBOX_SAVE0 = 14 +SBOX_SAVE1 = 15 +XRC = 17 +NK = 22 +C1 = 18 +NEXT_NK = 19 +HI = 23 +T0 = 20 +T1 = 21 +T2 = 24 +T3 = 25 +/* + * param key: r24:r25 + * param keysize_b: r22:r23 + * param ctx: r20:r21 + */ +.global aes_init +aes_init: + push_range 14, 17 + push r28 + push r29 + movw r30, r20 + movw r28, r20 + movw r26, r24 + lsr r23 + ror r22 + lsr r22 + lsr r22 /* r22 contains keysize_b/8 */ + mov C1, r22 + +1: /* copy key to ctx */ + ld r0, X+ + st Z+, r0 + dec C1 + brne 1b + + lsr NK + lsr NK + bst NK,3 /* set T if NK==8 */ + mov NEXT_NK, NK + mov HI, NK + subi HI, -7 + lsl HI + lsl HI + movw r26, r30 + sbiw r26, 4 + mov C1, NK + ldi r30, lo8(aes_sbox) + ldi r31, hi8(aes_sbox) + movw SBOX_SAVE0, r30 + ldi XRC, 1 +1: + ld T0, X+ + ld T1, X+ + ld T2, X+ + ld T3, X+ + cp NEXT_NK, C1 + breq 2f + brtc 5f + mov r16, C1 + andi r16, 0x07 + cpi r16, 0x04 + brne 5f + movw r30, SBOX_SAVE0 + add r30, T0 + adc r31, r1 + lpm T0, Z + movw r30, SBOX_SAVE0 + add r30, T1 + adc r31, r1 + lpm T1, Z + movw r30, SBOX_SAVE0 + add r30, T2 + adc r31, r1 + lpm T2, Z + movw r30, SBOX_SAVE0 + add r30, T3 + adc r31, r1 + lpm T3, Z + rjmp 5f +2: + add NEXT_NK, NK + movw r30, SBOX_SAVE0 + add r30, T0 + adc r31, r1 + lpm r16, Z + movw r30, SBOX_SAVE0 + add r30, T1 + adc r31, r1 + lpm T0, Z + movw r30, SBOX_SAVE0 + add r30, T2 + adc r31, r1 + lpm T1, Z + movw r30, SBOX_SAVE0 + add r30, T3 + adc r31, r1 + lpm T2, Z + mov T3, r16 + eor T0, XRC + lsl XRC + brcc 3f + ldi XRC, 0x1b +3: +5: + movw r30, r26 + + ld r0, Y+ + eor r0, T0 + st Z+, r0 + ld r0, Y+ + eor r0 ,T1 + st Z+, r0 + ld r0, Y+ + eor r0, T2 + st Z+, r0 + ld r0, Y+ + eor r0, T3 + st Z+, r0 + +/* + st Z+, T0 + st Z+, T1 + st Z+, T2 + st Z+, T3 +*/ + + inc C1 + cp C1, HI + breq 6f + rjmp 1b +6: + + clt + pop r29 + pop r28 + pop_range 14, 17 + ret + + + + diff --git a/aes_keyschedule.c b/aes_keyschedule.c index d53c406..cd92c9c 100644 --- a/aes_keyschedule.c +++ b/aes_keyschedule.c @@ -1,7 +1,7 @@ /* aes_keyschedule.c */ /* This file is part of the Crypto-avr-lib/microcrypt-lib. - Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,7 +26,6 @@ */ #include -#include "gf256mul.h" #include "aes.h" #include "aes_keyschedule.h" #include "aes_sbox.h" @@ -43,9 +42,13 @@ void aes_rotword(void* a){ ((uint8_t*)a)[3] = t; } +uint8_t rc_tab[] PROGMEM = { 0x01, 0x02, 0x04, 0x08, + 0x10, 0x20, 0x40, 0x80, + 0x1b, 0x36 }; + void aes_init(const void* key, uint16_t keysize_b, aes_genctx_t* ctx){ uint8_t hi,i,nk, next_nk; - uint8_t rc=1; + uint8_t rc=0; uint8_t tmp[4]; nk=keysize_b>>5; /* 4, 6, 8 */ hi=4*(nk+6+1); @@ -67,8 +70,8 @@ void aes_init(const void* key, uint16_t keysize_b, aes_genctx_t* ctx){ tmp[1] = pgm_read_byte(aes_sbox+tmp[1]); tmp[2] = pgm_read_byte(aes_sbox+tmp[2]); tmp[3] = pgm_read_byte(aes_sbox+tmp[3]); - tmp[0] ^= rc; - rc = gf256mul(2,rc,0x1b); + tmp[0] ^= pgm_read_byte(rc_tab+rc); + rc++; } ((uint32_t*)(ctx->key[0].ks))[i] = ((uint32_t*)(ctx->key[0].ks))[i-nk] ^ *((uint32_t*)tmp); diff --git a/mkfiles/aes.mk b/mkfiles/aes.mk index 1839453..47d1da6 100644 --- a/mkfiles/aes.mk +++ b/mkfiles/aes.mk @@ -1,14 +1,13 @@ # Makefile for AES -ALGO_NAME := AES_C +ALGO_NAME := AES # comment out the following line for removement of AES from the build process BLOCK_CIPHERS += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := aes_enc.o aes_dec.o aes_sbox.o aes_invsbox.o aes.o \ - aes_keyschedule.o gf256mul.o \ - aes128_enc.o aes128_dec.o aes192_enc.o aes192_dec.o \ - aes256_enc.o aes256_dec.o +$(ALGO_NAME)_OBJ := aes_enc-asm.o aes_dec.o aes_sbox.o aes_invsbox.o aes.o \ + aes_keyschedule-asm.o gf256mul.o \ + aes128_dec.o aes192_dec.o aes256_dec.o $(ALGO_NAME)_TEST_BIN := main-aes-test.o debug.o uart.o serial-tools.o \ nessie_bc_test.o nessie_common.o cli.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := test nessie diff --git a/mkfiles/aes_c.mk b/mkfiles/aes_c.mk new file mode 100644 index 0000000..1839453 --- /dev/null +++ b/mkfiles/aes_c.mk @@ -0,0 +1,16 @@ +# Makefile for AES +ALGO_NAME := AES_C + +# comment out the following line for removement of AES from the build process +BLOCK_CIPHERS += $(ALGO_NAME) + + +$(ALGO_NAME)_OBJ := aes_enc.o aes_dec.o aes_sbox.o aes_invsbox.o aes.o \ + aes_keyschedule.o gf256mul.o \ + aes128_enc.o aes128_dec.o aes192_enc.o aes192_dec.o \ + aes256_enc.o aes256_dec.o +$(ALGO_NAME)_TEST_BIN := main-aes-test.o debug.o uart.o serial-tools.o \ + nessie_bc_test.o nessie_common.o cli.o performance_test.o +$(ALGO_NAME)_NESSIE_TEST := test nessie +$(ALGO_NAME)_PERFORMANCE_TEST := performance + diff --git a/test_src/main-aes-test.c b/test_src/main-aes-test.c index a051ce8..ba0e09c 100644 --- a/test_src/main-aes-test.c +++ b/test_src/main-aes-test.c @@ -94,7 +94,7 @@ void testrun_test_aes(void){ } -void testrun_testkey_aes(void){ +void testrun_testkey_aes128(void){ uint8_t key[16] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, @@ -113,6 +113,60 @@ void testrun_testkey_aes(void){ } } +void testrun_testkey_aes192(void){ + uint8_t key[24] = { 0x8e, 0x73, 0xb0, 0xf7, + 0xda, 0x0e, 0x64, 0x52, + 0xc8, 0x10, 0xf3, 0x2b, + 0x80, 0x90, 0x79, 0xe5, + 0x62, 0xf8, 0xea, 0xd2, + 0x52, 0x2c, 0x6b, 0x7b}; + aes192_ctx_t ctx; + uint8_t i; + memset(&ctx, 0, sizeof(aes192_ctx_t)); + aes192_init(key, &ctx); + uart_putstr_P(PSTR("\r\n\r\n keyschedule test (FIPS 197):\r\n key: ")); + uart_hexdump(key, 24); + for(i=0; i<13; ++i){ + uart_putstr_P(PSTR("\r\n index: ")); + uart_putc('0'+i/10); + uart_putc('0'+i%10); + uart_putstr_P(PSTR(" roundkey ")); + uart_hexdump(ctx.key[i].ks, 16); + } +} + + +void testrun_testkey_aes256(void){ + uint8_t key[32] = { 0x60, 0x3d, 0xeb, 0x10, + 0x15, 0xca, 0x71, 0xbe, + 0x2b, 0x73, 0xae, 0xf0, + 0x85, 0x7d, 0x77, 0x81, + 0x1f, 0x35, 0x2c, 0x07, + 0x3b, 0x61, 0x08, 0xd7, + 0x2d, 0x98, 0x10, 0xa3, + 0x09, 0x14, 0xdf, 0xf4}; + aes256_ctx_t ctx; + uint8_t i; + memset(&ctx, 0, sizeof(aes256_ctx_t)); + aes256_init(key, &ctx); + uart_putstr_P(PSTR("\r\n\r\n keyschedule test (FIPS 197):\r\n key: ")); + uart_hexdump(key, 32); + for(i=0; i<15; ++i){ + uart_putstr_P(PSTR("\r\n index: ")); + uart_putc('0'+i/10); + uart_putc('0'+i%10); + uart_putstr_P(PSTR(" roundkey ")); + uart_hexdump(ctx.key[i].ks, 16); + } +} + +void testrun_testkey_aes(void){ + testrun_testkey_aes128(); + testrun_testkey_aes192(); + testrun_testkey_aes256(); +} +/*****************************************************************************/ + void testrun_performance_aes128(void){ uint64_t t; char str[16];