Browse Source

new and more compact aes

master
bg nerilex 4 years ago
parent
commit
d9352fc79f
2 changed files with 392 additions and 0 deletions
  1. +185
    -0
      aes/aes_aleph_enc-asm.S
  2. +207
    -0
      aes/aes_aleph_keyschedule-asm.S

+ 185
- 0
aes/aes_aleph_enc-asm.S View File

@@ -0,0 +1,185 @@
/* aes_enc-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_enc-asm.S
* \email bg@nerilex.org
* \author Daniel Otte
* \date 2009-01-10
* \license GPLv3 or later
*
*/

#include "avr-asm-macros.S"


xtime:
lsl r24
brcc 1f
eor r24, r27
1:
ret

shift_offset_table:
.byte 12, 8, 4, 0
.byte 9, 5, 1, 13
.byte 6, 2, 14, 10
.byte 3, 15, 11, 7

.global aes256_enc
aes256_enc:
ldi r20, 14
rjmp aes_encrypt_core

.global aes192_enc
aes192_enc:
ldi r20, 12
rjmp aes_encrypt_core

.global aes128_enc
aes128_enc:
ldi r20, 10

/*
void aes_encrypt_core(aes_cipher_state_t *state, const aes_genctx_t *ks, uint8_t rounds)
*/
/*
* param state: r24:r25
* param ks: r22:r23
* param rounds: r20
*/

.global aes_encrypt_core
aes_encrypt_core:
push r3
push r16
push r17
push r28
push r29
mov r3, r20
clt
movw r28, r24
x:
movw r24, r28
key_add:

clr r21
ldi r20, 16
call memxor
movw r22, r26 /* switch to next roundkey; r26 points after the end of src after memxor ;-) */

brtc sub_shift_bytes
4:
pop r29
pop r28
pop r17
pop r16
pop r3
ret

sub_shift_bytes:
ldi r30, lo8(shift_offset_table)
ldi r31, hi8(shift_offset_table)
ldi r20, 4 /* load counter for columns (rows in spec) */
movw r24, r28
1:
ldi r21, 4
2:
ld r16, Y
adiw r28, 4
push r16
dec r21
brne 2b

ldi r21, 4
2:
pop r16
movw r26, r24
lpm r0, Z+
add r26, r0
adc r27, r1
st X, r16
dec r21
brne 2b

sbiw r28, 15

dec r20
brne 1b

sbiw r28, 4 /* set Y back to the start of state */

dec r3
brne mix_rows
set

mix_rows:
ldi r31, hi8(aes_sbox)
ldi r27, 0x1B
ldi r20, 4
1:
ldd r30, Y+0
lpm r16, Z
ldd r30, Y+1
lpm r17, Z
ldd r30, Y+2
lpm r18, Z
ldd r30, Y+3
lpm r19, Z

brts 2f
mov r26, r16

mov r24, r16
eor r24, r17

mov r21, r24
eor r21, r18
eor r21, r19

rcall xtime
eor r16, r24
eor r16, r21

mov r24, r17
eor r24, r18
rcall xtime
eor r17, r24
eor r17, r21

mov r24, r18
eor r24, r19
rcall xtime
eor r18, r24
eor r18, r21

mov r24, r19
eor r24, r26
rcall xtime
eor r19, r24
eor r19, r21
2:
st Y+, r16
st Y+, r17
st Y+, r18
st Y+, r19
dec r20
brne 1b
sbiw r28, 16
rjmp x


+ 207
- 0
aes/aes_aleph_keyschedule-asm.S View File

@@ -0,0 +1,207 @@
/* aes_keyschedule-asm */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_keyschedule-asm.S
* \email bg@nerilex.org
* \author Daniel Otte
* \date 2009-01-09
* \license GPLv3 or later
*
*/

#include "avr-asm-macros.S"

.global aes256_init
aes256_init:
movw r20, r22
ldi r23, hi8(256)
ldi r22, lo8(256)
rjmp aes_init
.global aes192_init
aes192_init:
movw r20, r22
ldi r23, hi8(192)
ldi r22, lo8(192)
rjmp aes_init
.global aes128_init
aes128_init:
movw r20, r22
clr r23
ldi r22, 128

/*
void aes_init(const void *key, uint16_t keysize_b, aes_genctx_t *ctx){
uint8_t hi,i,nk, next_nk;
uint8_t rc=1;
uint8_t tmp[4];
nk=keysize_b>>5; / * 4, 6, 8 * /
hi=4*(nk+6+1);
memcpy(ctx, key, keysize_b/8);
next_nk = nk;
for(i=nk;i<hi;++i){
*((uint32_t*)tmp) = ((uint32_t*)(ctx->key[0].ks))[i-1];
if(i!=next_nk){
if(nk==8 && i%8==4){
tmp[0] = pgm_read_byte(aes_sbox+tmp[0]);
tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
}
} else {
next_nk += nk;
aes_rotword(tmp);
tmp[0] = pgm_read_byte(aes_sbox+tmp[0]);
tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
tmp[0] ^= rc;
rc<<=1;
}
((uint32_t*)(ctx->key[0].ks))[i] = ((uint32_t*)(ctx->key[0].ks))[i-nk]
^ *((uint32_t*)tmp);
}
}
*/

SBOX_SAVE0 = 14
SBOX_SAVE1 = 15
XRC = 17
NK = 22
C1 = 18
NEXT_NK = 19
HI = 23
T0 = 20
T1 = 21
T2 = 24
T3 = 25
/*
* param key: r24:r25
* param keysize_b: r22:r23
* param ctx: r20:r21
*/
.global aes_init
aes_init:
push_range 14, 17
push r28
push r29
movw r30, r20
movw r28, r20
movw r26, r24
lsr r23
ror r22
lsr r22
lsr r22 /* r22 contains keysize_b/8 */
mov C1, r22

1: /* copy key to ctx */
ld r0, X+
st Z+, r0
dec C1
brne 1b
lsr NK
lsr NK
/* NK is now the number of 32-bit words in the supplied key */
bst NK, 3 /* set T if NK==8 */
mov NEXT_NK, NK
mov HI, NK
subi HI, -7 /* HI += 7 */
lsl HI
lsl HI
movw r26, r30
sbiw r26, 4
mov C1, NK
ldi XRC, 1
1:
ld T0, X+
ld T1, X+
ld T2, X+
ld T3, X+
cp NEXT_NK, C1
breq 2f
brtc 5f
mov r16, C1
andi r16, 0x07
cpi r16, 0x04
brne 5f
rcall substitute
rjmp 5f
2:
add NEXT_NK, NK
rcall substitute
mov r16, T0
mov T0, T1
mov T1, T2
mov T2, T3
mov T3, r16
eor T0, XRC
lsl XRC
brcc 3f
ldi XRC, 0x1b
3:
5:
movw r30, r26

ld r0, Y+
eor r0, T0
st Z+, r0
ld r0, Y+
eor r0 ,T1
st Z+, r0
ld r0, Y+
eor r0, T2
st Z+, r0
ld r0, Y+
eor r0, T3
st Z+, r0
/*
st Z+, T0
st Z+, T1
st Z+, T2
st Z+, T3
*/
inc C1
cp C1, HI
breq 6f
rjmp 1b
6:
clt
pop r29
pop r28
pop_range 14, 17
ret
substitute:
ldi r31, hi8(aes_sbox)
mov r30, T0
lpm T0, Z
mov r30, T1
lpm T1, Z
mov r30, T2
lpm T2, Z
mov r30, T3
lpm T3, Z
ret

Loading…
Cancel
Save