avr-crypto-lib/skein/threefish512_dec_asm.S

337 lines
6.3 KiB
ArmAsm
Raw Permalink Normal View History

/* threefish512_enc_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
2015-02-06 02:43:31 +00:00
Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
2015-02-06 02:43:31 +00:00
* \email bg@nerilex.org
* \date 2009-03-24
* \license GPLv3 or later
2009-10-14 23:43:35 +00:00
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
#define X(a) (((uint64_t*)data)[(a)])
static
void permute_inv8(void *data){
uint64_t t;
t = X(6);
X(6) = X(4);
X(4) = X(2);
X(2) = X(0);
X(0) = t;
t = X(7);
X(7) = X(3);
X(3) = t;
}
static
void add_key_8(void *data, const threefish512_ctx_t *ctx, uint8_t s){
uint8_t i;
for(i=0; i<5; ++i){
X(i) -= ctx->k[(s+i)%9];
}
X(5) -= ctx->k[(s+5)%9] + ctx->t[s%3];
X(6) -= ctx->k[(s+6)%9] + ctx->t[(s+1)%3];
X(7) -= ctx->k[(s+7)%9] + s;
}
void threefish512_dec(void *data, const threefish512_ctx_t *ctx){
uint8_t i=0,s=18;
uint8_t r0[8] = {0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a};
uint8_t r1[8] = {0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a};
uint8_t r2[8] = {0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62};
uint8_t r3[8] = {0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b};
do{
if(i%4==0){
add_key_8(data, ctx, s);
--s;
}
permute_inv8(data);
threefish_invmix((uint8_t*)data + 0, r0[i%8]);
threefish_invmix((uint8_t*)data + 16, r1[i%8]);
threefish_invmix((uint8_t*)data + 32, r2[i%8]);
threefish_invmix((uint8_t*)data + 48, r3[i%8]);
++i;
}while(i!=72);
add_key_8(data, ctx, s);
}
*/
I = 2
S = 3
DATA0 = 4
DATA1 = 5
CTX0 = 6
CTX1 = 7
IDX0 = 8
IDX1 = 9
IDX2 = 10
IDX3 = 11
IDX4 = 12
IDX5 = 13
IDX6 = 14
IDX7 = 15
/*
* param data: r24:r25
* param ctx: r22:r23
*/
.global threefish512_dec
threefish512_dec:
push r28
push r29
push_range 2, 17
movw DATA0, r24
movw CTX0, r22
clr I
ldi r26, 18
mov S, r26
2009-10-14 23:43:35 +00:00
1:
mov r30, I
andi r30, 0x03
breq 2f
rjmp 4f
2:
ldi r30, lo8(threefish512_slut9)
ldi r31, hi8(threefish512_slut9)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z+
lpm IDX4, Z+
lpm IDX5, Z+
lpm IDX6, Z+
lpm IDX7, Z
movw r30, CTX0
movw r26, DATA0
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX4
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX5
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX6
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX7
adc r31, r1
rcall sub_z_from_x8
2009-10-14 23:43:35 +00:00
/* now the remaining key */
sbiw r26, 3*8
ldi r30, lo8(threefish512_slut3)
ldi r31, hi8(threefish512_slut3)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z
movw r30, CTX0
adiw r30, 7*8 /* make Z pointing to (extended tweak) */
adiw r30, 2*8
movw IDX2, r30
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, IDX2
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
ld r0, X
sub r0, S
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
tst S
brne 3f
exit:
pop_range 2, 17
pop r29
pop r28
2009-10-14 23:43:35 +00:00
ret
3:
2009-10-14 23:43:35 +00:00
dec S
4:
/* now the permutation */
movw r26, DATA0
movw r30, DATA0
adiw r30, 6*8
rcall xchg_zx8
movw r26, DATA0
adiw r26, 6*8
movw r30, DATA0
adiw r30, 4*8
rcall xchg_zx8
movw r26, DATA0
adiw r26, 2*8
movw r30, DATA0
adiw r30, 4*8
2009-10-14 23:43:35 +00:00
rcall xchg_zx8
movw r26, DATA0
adiw r26, 3*8
movw r30, DATA0
adiw r30, 7*8
rcall xchg_zx8
/* call mix */
ldi r30, lo8(threefish512_rc0)
ldi r31, hi8(threefish512_rc0)
mov r26, I
andi r26, 0x07
add r30, r26
adc r31, r1
lpm r22, Z
adiw r30, 8
lpm IDX0, Z
adiw r30, 8
lpm IDX1, Z
push IDX1
adiw r30, 8
lpm IDX1, Z
2009-10-14 23:43:35 +00:00
movw r24, DATA0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 16
mov r22, IDX0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 32
pop r22
;mov r22, IDX0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 48
mov r22, IDX1
call threefish_invmix_asm /* no rcall? */
inc I
rjmp 1b
2009-10-14 23:43:35 +00:00
threefish512_slut9:
.byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38
.byte 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30
2009-10-14 23:43:35 +00:00
.byte 0x38, 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28
.byte 0x30, 0x38, 0x40
threefish512_slut3:
.byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
2009-10-14 23:43:35 +00:00
.byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
.byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
2009-10-14 23:43:35 +00:00
/* old round constants
threefish512_rc0: .byte 0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a
threefish512_rc1: .byte 0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a
threefish512_rc2: .byte 0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62
threefish512_rc3: .byte 0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b
2009-10-14 23:43:35 +00:00
*/
threefish512_rc0: .byte 0x10, 0x31, 0x2b, 0x59, 0x54, 0x21, 0x41, 0x6a
threefish512_rc1: .byte 0x43, 0x4b, 0x62, 0x4a, 0x11, 0x61, 0x33, 0x44
threefish512_rc2: .byte 0x70, 0x59, 0x12, 0x42, 0x7a, 0x44, 0x2a, 0x23
threefish512_rc3: .byte 0x3a, 0x53, 0x21, 0x30, 0x70, 0x59, 0x52, 0x5b
sub_z_from_x8:
ld r0, Z+
ld r1, X
sub r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
clr r1
ret
2009-10-14 23:43:35 +00:00
T0 = IDX0
T1 = 0
2009-10-14 23:43:35 +00:00
CNT = 24
xchg_zx8:
ldi CNT, 8
1: ld T0, X
ld T1, Z
st X+, T1
2009-10-14 23:43:35 +00:00
st Z+, T0
dec CNT
brne 1b
ret