avr-crypto-lib/cscipher/cscipher_tiny_asm.S

400 lines
6.6 KiB
ArmAsm

/* cscipher_tiny_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "avr-asm-macros.S"
/*
uint8_t p(uint8_t a){
a ^= pgm_read_byte(fg_table+(a&0xf))&0xf0;
a ^= pgm_read_byte(fg_table+(a>>4)) &0x0f;
a ^= pgm_read_byte(fg_table+(a&0xf))&0xf0;
return a;
}
*/
fg_table:
.byte 0xfa, 0xd6, 0xb0, 0xb2, 0x7b, 0x5e, 0x71, 0x78
.byte 0xed, 0xd4, 0xa5, 0xb3, 0xef, 0xdc, 0xe7, 0xf9
.global p
p:
ldi r30, lo8(fg_table)
ldi r31, hi8(fg_table)
movw r26, r30
mov r25, r24
andi r25, 0x0F
add r30, r25
adc r31, r1
lpm r25, Z
andi r25, 0xF0
eor r24, r25
movw r30, r26
mov r25, r24
swap r25
andi r25, 0x0F
add r30, r25
adc r31, r1
lpm r25, Z
andi r25, 0x0F
eor r24, r25
movw r30, r26
mov r25, r24
andi r25, 0x0F
add r30, r25
adc r31, r1
lpm r25, Z
andi r25, 0xF0
eor r24, r25
clr r25
ret
ks_const:
.byte 0x29,0x0d,0x61,0x40,0x9c,0xeb,0x9e,0x8f
.byte 0x1f,0x85,0x5f,0x58,0x5b,0x01,0x39,0x86
.byte 0x97,0x2e,0xd7,0xd6,0x35,0xae,0x17,0x16
.byte 0x21,0xb6,0x69,0x4e,0xa5,0x72,0x87,0x08
.byte 0x3c,0x18,0xe6,0xe7,0xfa,0xad,0xb8,0x89
.byte 0xb7,0x00,0xf7,0x6f,0x73,0x84,0x11,0x63
.byte 0x3f,0x96,0x7f,0x6e,0xbf,0x14,0x9d,0xac
.byte 0xa4,0x0e,0x7e,0xf6,0x20,0x4a,0x62,0x30
.byte 0x03,0xc5,0x4b,0x5a,0x46,0xa3,0x44,0x65
CTX_0 = 18
CTX_1 = 19
CNT = 17
.global cscipher_init
cscipher_init:
push CNT
push_range 28, 29
stack_alloc 24, 28, 29
adiw r28, 1
movw r30, r24
movw CTX_0, r22
/* copy key to local tmp_key */
ldi r22, 16
10: ld r23, Z+
st Y+, r23
dec r22
brne 10b
sbiw r28, 16
ldi CNT, 0xff
10: /* main loop */
inc CNT
/* copy part of tmp_key to tmp */
ldi r23, 8
11: ldd r22, Y+0
sbrc CNT, 0
ldd r22, Y+8
std Y+16, r22
adiw r28, 1
dec r23
brne 11b
adiw r28, 8 /* Y points at tmp */
/* xor ks constant into tmp */
movw r24, r28
ldi r22, lo8(ks_const)
ldi r23, hi8(ks_const)
mov r21, CNT
swap r21
lsr r21
add r22, r21
adc r23, r1
clr r21
ldi r20, 8
call memxor_P
/* do P transformation */
ldi r22, 8
20: ld r24, Y
rcall p
st Y+, r24
dec r22
brne 20b
sbiw r28, 8 /* Y points at tmp */
movw r26, r28
sbiw r26, 8
sbrc CNT, 0
sbiw r26, 8
/* do T transformation */
movw r30, CTX_0
ldi r22, 8
30: ldi r23, 8
35: ld r24, Y
rol r24
rol r21
st Y+, r24
dec r23
brne 35b
sbiw r28, 8 /* Y points at tmp */
ld r24, X
eor r21, r24
st X+, r21
st Z+, r21
dec r22
brne 30b
sbiw r28, 16 /* Y points at tmp_key (again) */
movw CTX_0, r30
sbrs CNT, 3
rjmp 10b
stack_free 24
pop_range 28, 29
pop CNT
ret
round_const:
.byte 0xb7, 0xe1, 0x51, 0x62, 0x8a, 0xed, 0x2a, 0x6a
.byte 0xbf, 0x71, 0x58, 0x80, 0x9c, 0xf4, 0xf3, 0xc7
/*
void cscipher_enc(void *buffer, const cscipher_ctx_t *ctx){
uint8_t i,j,k;
uint8_t tmp[8];
for(i=0; i<8; ++i){
for(j=0; j<3; ++j){
if(j==0){
memxor(buffer, ctx->keys[i], 8);
}else{
memxor_P(buffer, round_const+((j==1)?0:8), 8);
}
for(k=0; k<4; ++k){
((uint16_t*)tmp)[k] = m(((uint16_t*)buffer)[k]);
}
for(k=0; k<4; ++k){
((uint8_t*)buffer)[k] = tmp[2*k];
((uint8_t*)buffer)[k+4] = tmp[2*k+1];
}
}
}
memxor(buffer, ctx->keys[8], 8);
}
*/
TMP_0 = 2
TMP_1 = 3
TMP_2 = 4
TMP_3 = 5
TMP_4 = 6
TMP_5 = 7
TMP_6 = 8
TMP_7 = 9
CTX_0 = 10
CTX_1 = 11
CNT_0 = 16
CNT_1 = 17
DST_0 = 12
DST_1 = 13
SRC_0 = 14
SRC_1 = 15
.global cscipher_enc
cscipher_enc:
push_range 2, 17
push_range 28, 29
movw r28, r24
movw CTX_0, r22
ldi CNT_0, 8
/* main loop */
10: ldi CNT_1, 2
clt
/* sub loop */
20: ldi r27, 0
ldi r26, TMP_0
movw DST_0, r26
ldi r30, lo8(round_const)
ldi r31, hi8(round_const)
sbrs CNT_1, 0
adiw r30, 8
sbrc CNT_1, 1
movw r30, CTX_0
movw SRC_0, r30
ldi r21, 4
/* xor and m transformation */
25: ld r24, Y+
ld r25, Y+
movw r30, SRC_0
brts 30f
ld r22, Z+
ld r23, Z+
rjmp 35f
30: lpm r22, Z+
lpm r23, Z+
35:
movw SRC_0, r30
eor r24, r22
eor r25, r23
movw r22, r24
mov r25, r22
rol r25
adc r25, r1
mov r22, r25
andi r22, 0x55
eor r22, r24
eor r22, r23
eor r23, r25
mov r24, r23
rcall p
mov r23, r24
mov r24, r22
rcall p
movw r26, DST_0
st X+, r24
st X+, r23
movw DST_0, r26
dec r21
brne 25b
sbrc CNT_1, 1
movw CTX_0, SRC_0
sbiw r28, 8
std Y+0, TMP_0
std Y+4, TMP_1
std Y+1, TMP_2
std Y+5, TMP_3
std Y+2, TMP_4
std Y+6, TMP_5
std Y+3, TMP_6
std Y+7, TMP_7
set
dec CNT_1
brpl 20b
dec CNT_0
brne 10b
movw r24, r28
movw r22, CTX_0
clr r21
ldi r20, 8
pop_range 28, 29
pop_range 2, 17
rjmp memxor
/*
void cscipher_dec(void *buffer, const cscipher_ctx_t *ctx){
uint8_t i=7,j,k;
uint8_t tmp[8];
memxor(buffer, ctx->keys[8], 8);
do{
for(j=0; j<3; ++j){
for(k=0; k<4; ++k){
tmp[2*k] = ((uint8_t*)buffer)[k];
tmp[2*k+1] = ((uint8_t*)buffer)[4+k];
}
for(k=0; k<4; ++k){
((uint16_t*)buffer)[k] = m_inv(((uint16_t*)tmp)[k]);
}
if(j==2){
memxor(buffer, ctx->keys[i], 8);
}else{
memxor_P(buffer, round_const+((j==1)?0:8), 8);
}
}
}while(i--);
}
*/
.global cscipher_dec
cscipher_dec:
push_range 2, 17
push_range 28, 29
movw r28, r24
movw r26, r22
adiw r26, 7*8
adiw r26, 8
movw CTX_0, r26
movw r22, r26
clr r21
ldi r20, 8
call memxor
ldi CNT_0, 7
10:
ldi CNT_1, 3
20:
clr r27
ldi r26, TMP_0
movw DST_0, r26
ldi r21, 4
30:
ldd r23, Y+4
ld r24, Y+
/* m_inv transformation */
; mov r23, r25
rcall p
mov r22, r24
mov r24, r23
rcall p
eor r22, r24
mov r25, r24
mov r24, r22
rol r24
adc r24, r1
andi r24, 0xaa
eor r24, r22
mov r22, r24
rol r22
adc r22, r1
eor r25, r22
movw r26, DST_0
st X+, r24
st X+, r25
movw DST_0, r26
dec r21
brne 30b
sbiw r28, 4
std Y+0, TMP_0
std Y+1, TMP_1
std Y+2, TMP_2
std Y+3, TMP_3
std Y+4, TMP_4
std Y+5, TMP_5
std Y+6, TMP_6
std Y+7, TMP_7
movw r24, r28
clr r21
ldi r20, 8
sbrc CNT_1, 1
rjmp 40f
movw r26, CTX_0
sbiw r26, 8
movw CTX_0, r26
movw r22, r26
call memxor
rjmp 45f
40:
ldi r26, lo8(round_const)
ldi r27, hi8(round_const)
sbrc CNT_1, 0
adiw r26, 8
movw r22, r26
call memxor_P
45:
dec CNT_1
brne 20b
dec CNT_0
brpl 10b
90:
pop_range 28, 29
pop_range 2, 17
ret