threefish decryption (256, 512 and 1024 bit) in assembler

This commit is contained in:
bg 2009-03-30 23:42:48 +00:00
parent c239a90de4
commit 877bd61c0f
8 changed files with 1453 additions and 12 deletions

View File

@ -74,9 +74,27 @@ for i in -3..4
printf("%+d: %4d\n", i, ss_hist[i])
end
puts "\ntransformed:"
(0..shift_values.length-1).each{|i|
puts " for 256 bit:" if i==0
puts " for 512 bit:" if i==16
puts " for 1024 bit:" if i==16+32
a = transform_shift(shift_values[i])
a[0] = transform_singleshift(a[0])
printf("0x%01x%01x, ", a[1], a[0])
puts("") if (i%8==7)
}
puts "\ntransformed (decryption):"
(0..shift_values.length-1).each{|i|
puts " for 256 bit:" if i==0
puts " for 512 bit:" if i==16
puts " for 1024 bit:" if i==16+32
a = transform_shift(shift_values[(i/8)*8+7-(i%8)])
a[0] = transform_singleshift(a[0])
printf("0x%01x%01x, ", a[1], a[0])
puts("") if (i%8==7)
}

View File

@ -6,8 +6,8 @@ BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc_asm.o threefish1024_enc_asm.o\
threefish_mix.o threefish_mix_4c.o threefish_invmix_c.o\
threefish256_dec.o threefish512_dec.o threefish1024_dec.o
threefish_mix.o threefish_invmix.o \
threefish256_dec_asm.o threefish512_dec_asm.o threefish1024_dec_asm.o
$(ALGO_NAME)_TEST_BIN := main-threefish-test.o debug.o uart.o hexdigit_tab.o \
nessie_bc_test.o dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie

View File

@ -125,6 +125,10 @@ void testrun_stdtest_threefish256(void){
threefish256_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 32, 4, 16);
cli_putstr_P(PSTR("\r\ndecipher: "));
threefish256_dec(data, &ctx);
cli_hexdump_block(data, 32, 4, 16);
/* second test */
for(i=0; i<32; ++i){
key[i] = 0x10+i;
@ -143,6 +147,9 @@ void testrun_stdtest_threefish256(void){
threefish256_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 32, 4, 16);
cli_putstr_P(PSTR("\r\ndecipher: "));
threefish256_dec(data, &ctx);
cli_hexdump_block(data, 32, 4, 16);
}
void testrun_stdtest_threefish512(void){
@ -168,6 +175,10 @@ void testrun_stdtest_threefish512(void){
threefish512_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 64, 4, 16);
threefish512_dec(data, &ctx);
cli_putstr_P(PSTR("\r\ndecipher: "));
cli_hexdump_block(data, 64, 4, 16);
for(i=0; i<64; ++i){
key[i] = 0x10+i;
@ -188,6 +199,10 @@ void testrun_stdtest_threefish512(void){
threefish512_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 64, 4, 16);
threefish512_dec(data, &ctx);
cli_putstr_P(PSTR("\r\ndecipher: "));
cli_hexdump_block(data, 64, 4, 16);
}
void testrun_stdtest_threefish1024(void){
@ -211,6 +226,9 @@ void testrun_stdtest_threefish1024(void){
threefish1024_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 128, 4, 16);
threefish1024_dec(data, &ctx);
cli_putstr_P(PSTR("\r\ndecipher: "));
cli_hexdump_block(data, 128, 4, 16);
for(i=0; i<128; ++i){
key[i] = 0x10+i;
@ -229,6 +247,9 @@ void testrun_stdtest_threefish1024(void){
threefish1024_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 128, 4, 16);
threefish1024_dec(data, &ctx);
cli_putstr_P(PSTR("\r\ndecipher: "));
cli_hexdump_block(data, 128, 4, 16);
}
@ -268,6 +289,12 @@ void testrun_performance_threefish256(void){
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
threefish256_dec(data, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tdecrypt time: "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
cli_putstr_P(PSTR("\r\n"));
}
@ -301,6 +328,13 @@ void testrun_performance_threefish512(void){
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
threefish512_dec(data, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tdecrypt time: "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
cli_putstr_P(PSTR("\r\n"));
}
@ -334,6 +368,13 @@ void testrun_performance_threefish1024(void){
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
threefish1024_dec(data, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tdecrypt time: "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
cli_putstr_P(PSTR("\r\n"));
}

474
threefish1024_dec_asm.S Normal file
View File

@ -0,0 +1,474 @@
/* threefish1024_enc_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-24
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
void permute_inv16(void* data){
uint64_t t;
t = X(15);
X(15) = X(7);
X(7) = X(9);
X(9) = X(1);
X(1) = t;
t = X(11);
X(11) = X(5);
X(5) = X(13);
X(13) = X(3);
X(3) = t;
t = X(4);
X(4) = X(6);
X(6) = t;
t = X(14);
X(14) = X(12);
X(12) = X(10);
X(10) = X(8);
X(8) = t;
}
void add_key_16(void* data, const threefish1024_ctx_t* ctx, uint8_t s){
uint8_t i;
for(i=0; i<13; ++i){
X(i) -= ctx->k[(s+i)%17];
}
X(13) -= ctx->k[(s+13)%17] + ctx->t[s%3];
X(14) -= ctx->k[(s+14)%17] + ctx->t[(s+1)%3];
X(15) -= ctx->k[(s+15)%17] + s;
}
void threefish1024_dec(void* data, const threefish1024_ctx_t* ctx){
uint8_t i=0,s=20;
uint8_t r0[8] = {0x69, 0x72, 0x21, 0x34, 0x42, 0x41, 0x31, 0x79};
uint8_t r1[8] = {0x61, 0x19, 0x1a, 0x19, 0x53, 0x10, 0x31, 0x53};
uint8_t r2[8] = {0x33, 0x40, 0x22, 0x69, 0x31, 0x22, 0x6a, 0x5b};
uint8_t r3[8] = {0x72, 0x6b, 0x31, 0x60, 0x74, 0x71, 0x2b, 0x50};
uint8_t r4[8] = {0x5b, 0x23, 0x53, 0x63, 0x54, 0x3b, 0x2a, 0x20};
uint8_t r5[8] = {0x60, 0x22, 0x52, 0x11, 0x11, 0x14, 0x2b, 0x3a};
uint8_t r6[8] = {0x7b, 0x02, 0x50, 0x43, 0x73, 0x40, 0x64, 0x5a};
uint8_t r7[8] = {0x70, 0x70, 0x29, 0x51, 0x42, 0x7a, 0x71, 0x14};
do{
if(i%4==0){
add_key_16(data, ctx, s);
--s;
}
permute_inv16(data);
threefish_invmix((uint8_t*)data + 0, r0[i%8]);
threefish_invmix((uint8_t*)data + 16, r1[i%8]);
threefish_invmix((uint8_t*)data + 32, r2[i%8]);
threefish_invmix((uint8_t*)data + 48, r3[i%8]);
threefish_invmix((uint8_t*)data + 64, r4[i%8]);
threefish_invmix((uint8_t*)data + 80, r5[i%8]);
threefish_invmix((uint8_t*)data + 96, r6[i%8]);
threefish_invmix((uint8_t*)data +112, r7[i%8]);
++i;
}while(i!=80);
add_key_16(data, ctx, s);
}
*/
I = 2
S = 3
DATA0 = 4
DATA1 = 5
CTX0 = 6
CTX1 = 7
IDX0 = 8
IDX1 = 9
IDX2 = 10
IDX3 = 11
IDX4 = 12
IDX5 = 13
IDX6 = 14
IDX7 = 15
/*
* param data: r24:r25
* param ctx: r22:r23
*/
.global threefish1024_dec
threefish1024_dec:
push r28
push r29
push_range 2, 17
movw DATA0, r24
movw CTX0, r22
clr I
ldi r26, 20
mov S, r26
1:
mov r30, I
andi r30, 0x03
breq 2f
rjmp 4f
2:
ldi r30, lo8(threefish1024_slut17)
ldi r31, hi8(threefish1024_slut17)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z+
lpm IDX4, Z+
lpm IDX5, Z+
lpm IDX6, Z+
lpm IDX7, Z
movw r30, CTX0
movw r26, DATA0
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX4
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX5
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX6
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX7
adc r31, r1
rcall sub_z_from_x8
/* second half */
ldi r30, lo8(threefish1024_slut17)
ldi r31, hi8(threefish1024_slut17)
add r30, S
adc r31, r1
adiw r30, 8
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z+
lpm IDX4, Z+
lpm IDX5, Z+
lpm IDX6, Z+
lpm IDX7, Z
movw r30, CTX0
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX4
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX5
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX6
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX7
adc r31, r1
rcall sub_z_from_x8
/* now the remaining key */
sbiw r26, 3*8
ldi r30, lo8(threefish1024_slut3)
ldi r31, hi8(threefish1024_slut3)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z
movw r30, CTX0
adiw r30, 7*8 /* make Z pointing to (extended tweak) */
adiw r30, 7*8
adiw r30, 3*8
movw IDX2, r30
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, IDX2
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
ld r0, X
sub r0, S
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
tst S
brne 3f
exit:
pop_range 2, 17
pop r29
pop r28
ret
3:
dec S
4:
/* now the permutation */
movw r26, DATA0 /* X1 <-> X15 */
adiw r26, 1*8
movw r30, DATA0
adiw r30, 7*8+4
adiw r30, 7*8+4
rcall xchg_zx8
movw r26, DATA0 /* X15 <-> X7 */
adiw r26, 7*8+4
adiw r26, 7*8+4
movw r30, DATA0
adiw r30, 7*8
rcall xchg_zx8
movw r26, DATA0 /* X9 <-> X7 */
adiw r26, 7*8
adiw r26, 2*8
movw r30, DATA0
adiw r30, 7*8
rcall xchg_zx8
/* --- */
movw r26, DATA0 /* X3 <-> X11 */
adiw r26, 3*8
movw r30, DATA0
adiw r30, 7*8
adiw r30, 4*8
rcall xchg_zx8
movw r26, DATA0 /* X11 <-> X5 */
adiw r26, 7*8
adiw r26, 4*8
movw r30, DATA0
adiw r30, 5*8
rcall xchg_zx8
movw r26, DATA0 /* X13 <-> X5 */
adiw r26, 7*8
adiw r26, 6*8
movw r30, DATA0
adiw r30, 5*8
rcall xchg_zx8
/* --- */
movw r26, DATA0 /* X8 <-> X14 */
adiw r26, 7*8
adiw r26, 1*8
movw r30, DATA0
adiw r30, 7*8
adiw r30, 7*8
rcall xchg_zx8
movw r26, DATA0 /* X14 <-> X12 */
adiw r26, 7*8
adiw r26, 7*8
movw r30, DATA0
adiw r30, 7*8
adiw r30, 5*8
rcall xchg_zx8
movw r26, DATA0 /* X10 <-> X12 */
adiw r26, 7*8
adiw r26, 3*8
movw r30, DATA0
adiw r30, 7*8
adiw r30, 5*8
rcall xchg_zx8
/* --- */
movw r26, DATA0 /* X4 <-> X6 */
adiw r26, 4*8
movw r30, DATA0
adiw r30, 6*8
rcall xchg_zx8
/* call mix */
ldi r30, lo8(threefish1024_rc0)
ldi r31, hi8(threefish1024_rc0)
mov r26, I
andi r26, 0x07
add r30, r26
adc r31, r1
lpm r22, Z
adiw r30, 8
lpm IDX0, Z
adiw r30, 8
lpm IDX1, Z
adiw r30, 8
lpm IDX2, Z
adiw r30, 8
lpm IDX3, Z
adiw r30, 8
lpm IDX4, Z
adiw r30, 8
lpm IDX5, Z
adiw r30, 8
lpm IDX6, Z
push IDX6
push IDX5
push IDX4
push IDX3
push IDX2
movw r24, DATA0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 16
mov r22, IDX0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 32
mov r22, IDX1
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 48
pop r22
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 63
adiw r24, 1
pop r22
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 63
adiw r24, 17
pop r22
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 63
adiw r24, 33
pop r22
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 63
adiw r24, 49
pop r22
call threefish_invmix_asm /* no rcall? */
inc I
9:
rjmp 1b
threefish1024_slut17:
.byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38
.byte 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78
.byte 0x80, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30
.byte 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70
.byte 0x78, 0x80, 0x00, 0x08, 0x10
threefish1024_slut3:
.byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
.byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
.byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10
.byte 0x00
threefish1024_rc0: .byte 0x69, 0x72, 0x21, 0x34, 0x42, 0x41, 0x31, 0x79
threefish1024_rc1: .byte 0x61, 0x19, 0x1a, 0x19, 0x53, 0x10, 0x31, 0x53
threefish1024_rc2: .byte 0x33, 0x40, 0x22, 0x69, 0x31, 0x22, 0x6a, 0x5b
threefish1024_rc3: .byte 0x72, 0x6b, 0x31, 0x60, 0x74, 0x71, 0x2b, 0x50
threefish1024_rc4: .byte 0x5b, 0x23, 0x53, 0x63, 0x54, 0x3b, 0x2a, 0x20
threefish1024_rc5: .byte 0x60, 0x22, 0x52, 0x11, 0x11, 0x14, 0x2b, 0x3a
threefish1024_rc6: .byte 0x7b, 0x02, 0x50, 0x43, 0x73, 0x40, 0x64, 0x5a
threefish1024_rc7: .byte 0x70, 0x70, 0x29, 0x51, 0x42, 0x7a, 0x71, 0x14
sub_z_from_x8:
ld r0, Z+
ld r1, X
sub r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
clr r1
ret
T0 = IDX0
T1 = 0
CNT = 24
xchg_zx8:
ldi CNT, 8
1: ld T0, X
ld T1, Z
st X+, T1
st Z+, T0
dec CNT
brne 1b
ret

279
threefish256_dec_asm.S Normal file
View File

@ -0,0 +1,279 @@
/* threefish256_enc_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
void permute_4(void* data){
uint64_t t;
t = X(1);
X(1) = X(3);
X(3) = t;
}
void add_key_4(void* data, const threefish256_ctx_t* ctx, uint8_t s){
X(0) -= ctx->k[(s+0)%5];
X(1) -= ctx->k[(s+1)%5] + ctx->t[s%3];
X(2) -= ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
X(3) -= ctx->k[(s+3)%5] + s;
}
void threefish256_dec(void* data, const threefish256_ctx_t* ctx){
uint8_t i=0,s=18;
uint8_t r0[8] = {0x73, 0x13, 0x7b, 0x32, 0x72, 0x2b, 0x44, 0x1b};
uint8_t r1[8] = {0x62, 0x52, 0x43, 0x24, 0x54, 0x6a, 0x34, 0x70};
do{
if(i%4==0){
add_key_4(data, ctx, s);
--s;
}
permute_4(data);
threefish_invmix(data, r0[i%8]);
threefish_invmix((uint8_t*)data + 16, r1[i%8]);
++i;
}while(i!=72);
add_key_4(data, ctx, s);
}
*/
I = 2
S = 3
DATA0 = 4
DATA1 = 5
CTX0 = 6
CTX1 = 7
IDX0 = 8
IDX1 = 9
IDX2 = 10
IDX3 = 11
/*
* param data: r24:r25
* param ctx: r22:r23
*/
.global threefish256_dec
threefish256_dec:
push r28
push r29
push_range 2, 17
movw DATA0, r24
movw CTX0, r22
clr I
ldi r26, 18
mov S, r26
1:
mov r30, I
andi r30, 0x03
breq 2f
rjmp 4f
2:
ldi r30, lo8(threefish256_slut5)
ldi r31, hi8(threefish256_slut5)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z
movw r30, CTX0
movw r26, DATA0
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall sub_z_from_x8
/* now the remaining key */
sbiw r26, 3*8
ldi r30, lo8(threefish256_slut3)
ldi r31, hi8(threefish256_slut3)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z
movw r30, CTX0
adiw r30, 5*8
movw IDX2, r30
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, IDX2
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
ld r0, X
sub r0, S
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
tst S
brne 3f
exit:
pop_range 2, 17
pop r29
pop r28
ret
3:
dec S
4:
/* now the permutation */
movw r26, DATA0
adiw r26, 8
movw r30, r26
adiw r30, 16
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
/* call mix */
ldi r30, lo8(threefish256_rc0)
ldi r31, hi8(threefish256_rc0)
mov r26, I
andi r26, 0x07
add r30, r26
adc r31, r1
lpm r22, Z
adiw r30, 8
lpm IDX0, Z
movw r24, DATA0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 16
mov r22, IDX0
call threefish_invmix_asm /* no rcall? */
inc I
rjmp 1b
threefish256_slut5:
.byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
.byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
.byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
threefish256_slut3:
.byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
.byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
.byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
threefish256_rc0: .byte 0x73, 0x13, 0x7b, 0x32, 0x72, 0x2b, 0x44, 0x1b
threefish256_rc1: .byte 0x62, 0x52, 0x43, 0x24, 0x54, 0x6a, 0x34, 0x70
sub_z_from_x8:
ld r0, Z+
ld r1, X
sub r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
clr r1
ret

330
threefish512_dec_asm.S Normal file
View File

@ -0,0 +1,330 @@
/* threefish512_enc_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-24
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
#define X(a) (((uint64_t*)data)[(a)])
static
void permute_inv8(void* data){
uint64_t t;
t = X(6);
X(6) = X(4);
X(4) = X(2);
X(2) = X(0);
X(0) = t;
t = X(7);
X(7) = X(3);
X(3) = t;
}
static
void add_key_8(void* data, const threefish512_ctx_t* ctx, uint8_t s){
uint8_t i;
for(i=0; i<5; ++i){
X(i) -= ctx->k[(s+i)%9];
}
X(5) -= ctx->k[(s+5)%9] + ctx->t[s%3];
X(6) -= ctx->k[(s+6)%9] + ctx->t[(s+1)%3];
X(7) -= ctx->k[(s+7)%9] + s;
}
void threefish512_dec(void* data, const threefish512_ctx_t* ctx){
uint8_t i=0,s=18;
uint8_t r0[8] = {0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a};
uint8_t r1[8] = {0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a};
uint8_t r2[8] = {0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62};
uint8_t r3[8] = {0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b};
do{
if(i%4==0){
add_key_8(data, ctx, s);
--s;
}
permute_inv8(data);
threefish_invmix((uint8_t*)data + 0, r0[i%8]);
threefish_invmix((uint8_t*)data + 16, r1[i%8]);
threefish_invmix((uint8_t*)data + 32, r2[i%8]);
threefish_invmix((uint8_t*)data + 48, r3[i%8]);
++i;
}while(i!=72);
add_key_8(data, ctx, s);
}
*/
I = 2
S = 3
DATA0 = 4
DATA1 = 5
CTX0 = 6
CTX1 = 7
IDX0 = 8
IDX1 = 9
IDX2 = 10
IDX3 = 11
IDX4 = 12
IDX5 = 13
IDX6 = 14
IDX7 = 15
/*
* param data: r24:r25
* param ctx: r22:r23
*/
.global threefish512_dec
threefish512_dec:
push r28
push r29
push_range 2, 17
movw DATA0, r24
movw CTX0, r22
clr I
ldi r26, 18
mov S, r26
1:
mov r30, I
andi r30, 0x03
breq 2f
rjmp 4f
2:
ldi r30, lo8(threefish512_slut9)
ldi r31, hi8(threefish512_slut9)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z+
lpm IDX4, Z+
lpm IDX5, Z+
lpm IDX6, Z+
lpm IDX7, Z
movw r30, CTX0
movw r26, DATA0
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX4
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX5
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX6
adc r31, r1
rcall sub_z_from_x8
movw r30, CTX0
add r30, IDX7
adc r31, r1
rcall sub_z_from_x8
/* now the remaining key */
sbiw r26, 3*8
ldi r30, lo8(threefish512_slut3)
ldi r31, hi8(threefish512_slut3)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z
movw r30, CTX0
adiw r30, 7*8 /* make Z pointing to (extended tweak) */
adiw r30, 2*8
movw IDX2, r30
add r30, IDX0
adc r31, r1
rcall sub_z_from_x8
movw r30, IDX2
add r30, IDX1
adc r31, r1
rcall sub_z_from_x8
ld r0, X
sub r0, S
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
ld r0, X
sbc r0, r1
st X+, r0
tst S
brne 3f
exit:
pop_range 2, 17
pop r29
pop r28
ret
3:
dec S
4:
/* now the permutation */
movw r26, DATA0
movw r30, DATA0
adiw r30, 6*8
rcall xchg_zx8
movw r26, DATA0
adiw r26, 6*8
movw r30, DATA0
adiw r30, 4*8
rcall xchg_zx8
movw r26, DATA0
adiw r26, 2*8
movw r30, DATA0
adiw r30, 4*8
rcall xchg_zx8
movw r26, DATA0
adiw r26, 3*8
movw r30, DATA0
adiw r30, 7*8
rcall xchg_zx8
/* call mix */
ldi r30, lo8(threefish512_rc0)
ldi r31, hi8(threefish512_rc0)
mov r26, I
andi r26, 0x07
add r30, r26
adc r31, r1
lpm r22, Z
adiw r30, 8
lpm IDX0, Z
adiw r30, 8
lpm IDX1, Z
push IDX1
adiw r30, 8
lpm IDX1, Z
movw r24, DATA0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 16
mov r22, IDX0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 32
pop r22
;mov r22, IDX0
call threefish_invmix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 48
mov r22, IDX1
call threefish_invmix_asm /* no rcall? */
inc I
rjmp 1b
threefish512_slut9:
.byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38
.byte 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30
.byte 0x38, 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28
.byte 0x30, 0x38, 0x40
threefish512_slut3:
.byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
.byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
.byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
threefish512_rc0: .byte 0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a
threefish512_rc1: .byte 0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a
threefish512_rc2: .byte 0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62
threefish512_rc3: .byte 0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b
sub_z_from_x8:
ld r0, Z+
ld r1, X
sub r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
sbc r1, r0
st X+, r1
clr r1
ret
T0 = IDX0
T1 = 0
CNT = 24
xchg_zx8:
ldi CNT, 8
1: ld T0, X
ld T1, Z
st X+, T1
st Z+, T0
dec CNT
brne 1b
ret

299
threefish_invmix.S Normal file
View File

@ -0,0 +1,299 @@
/* threefish_invmix.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-21
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/*
#define X0 (((uint64_t*)data)[0])
#define X1 (((uint64_t*)data)[1])
void threefish_invmix(void* data, uint8_t rot){
uint64_t x;
x = X1;
x ^= X0;
X1 = ((x>>rot)|(x<<(64-rot)));
X0 -= X1;
}
*/
A0 = 10
A1 = 11
A2 = 12
A3 = 13
A4 = 14
A5 = 15
A6 = 16
A7 = 17
B0 = 18
B1 = 19
B2 = 20
B3 = 21
B4 = 22
B5 = 23
B6 = 24
B7 = 25
vROT = 27
/*
* param data: r24:r25
* param rot: r22
*/
.global threefish_invmix_asm
threefish_invmix_asm:
movw r28, r24
mov vROT,r22
ldd A0, Y+ 0
ldd A1, Y+ 1
ldd A2, Y+ 2
ldd A3, Y+ 3
ldd A4, Y+ 4
ldd A5, Y+ 5
ldd A6, Y+ 6
ldd A7, Y+ 7
ldd B0, Y+ 8
ldd B1, Y+ 9
ldd B2, Y+10
ldd B3, Y+11
ldd B4, Y+12
ldd B5, Y+13
ldd B6, Y+14
ldd B7, Y+15
eor B0, A0
eor B1, A1
eor B2, A2
eor B3, A3
eor B4, A4
eor B5, A5
eor B6, A6
eor B7, A7
mov r26, vROT
swap r26
andi r26, 0x07
ldi r30, pm_lo8(byte_rot_jmptable)
ldi r31, pm_hi8(byte_rot_jmptable)
add r30, r26
adc r31, r1
ijmp
post_byterot:
bst vROT, 3
andi vROT, 0x07
brts 1f
rjmp bit_rotr
1: rjmp bit_rotl
post_bitrot:
sub A0, B0
sbc A1, B1
sbc A2, B2
sbc A3, B3
sbc A4, B4
sbc A5, B5
sbc A6, B6
sbc A7, B7
std Y+ 0, A0
std Y+ 1, A1
std Y+ 2, A2
std Y+ 3, A3
std Y+ 4, A4
std Y+ 5, A5
std Y+ 6, A6
std Y+ 7, A7
std Y+ 8, B0
std Y+ 9, B1
std Y+10, B2
std Y+11, B3
std Y+12, B4
std Y+13, B5
std Y+14, B6
std Y+15, B7
exit:
ret
byte_rot_jmptable:
rjmp post_byterot;ret; rjmp byte_rotr_0
rjmp byte_rotr_1
rjmp byte_rotr_2
rjmp byte_rotr_3
rjmp byte_rotr_4
rjmp byte_rotr_5
rjmp byte_rotr_6
rjmp byte_rotr_7
rjmp post_byterot;ret; rjmp byte_rotr_0
; 0 1 2 3 4 5 6 7
; 1 2 3 4 5 6 7 0
;.global byte_rotr_1
;.global byte_rotr_0
byte_rotr_1: /* 10 words */
mov r0, B0
mov B0, B1
mov B1, B2
mov B2, B3
mov B3, B4
mov B4, B5
mov B5, B6
mov B6, B7
mov B7, r0
byte_rotr_0:
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 2 3 4 5 6 7 0 1
;.global byte_rotr_2
byte_rotr_2: /* 11 words */
mov r0, B0
mov B0, B2
mov B2, B4
mov B4, B6
mov B6, r0
mov r0, B1
mov B1, B3
mov B3, B5
mov B5, B7
mov B7, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 3 4 5 6 7 0 1 2
;.global byte_rotr_3
byte_rotr_3: /* 10 words */
mov r0, B0
mov B0, B3
mov B3, B6
mov B6, B1
mov B1, B4
mov B4, B7
mov B7, B2
mov B2, B5
mov B5, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 4 5 6 7 0 1 2 3
;.global byte_rotr_4
byte_rotr_4: /* 13 words */
mov r0, B0
mov B0, B4
mov B4, r0
mov r0, B1
mov B1, B5
mov B5, r0
mov r0, B2
mov B2, B6
mov B6, r0
mov r0, B3
mov B3, B7
mov B7, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 5 6 7 0 1 2 3 4
;.global byte_rotr_5
byte_rotr_5: /* 10 words */
mov r0, B0
mov B0, B5
mov B5, B2
mov B2, B7
mov B7, B4
mov B4, B1
mov B1, B6
mov B6, B3
mov B3, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 6 7 0 1 2 3 4 5
;.global byte_rotr_6
byte_rotr_6: /* 11 words */
mov r0, B0
mov B0, B6
mov B6, B4
mov B4, B2
mov B2, r0
mov r0, B1
mov B1, B7
mov B7, B5
mov B5, B3
mov B3, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 7 0 1 2 3 4 5 6
;.global byte_rotr_7
byte_rotr_7: /* 10 words */
mov r0, B7
mov B7, B6
mov B6, B5
mov B5, B4
mov B4, B3
mov B3, B2
mov B2, B1
mov B1, B0
mov B0, r0
rjmp post_byterot
;.global bit_rotl
bit_rotl:
tst vROT
brne 1f
rjmp post_bitrot
1: mov r0, B7
rol r0
rol B0
rol B1
rol B2
rol B3
rol B4
rol B5
rol B6
rol B7
dec vROT
rjmp bit_rotl
;.global bit_rotr
bit_rotr:
tst vROT
brne 1f
rjmp post_bitrot
1: mov r0, B0
ror r0
ror B7
ror B6
ror B5
ror B4
ror B3
ror B2
ror B1
ror B0
dec vROT
rjmp bit_rotr

View File

@ -145,8 +145,8 @@ byte_rot_jmptable:
; 0 1 2 3 4 5 6 7
; 1 2 3 4 5 6 7 0
.global byte_rotr_1
.global byte_rotr_0
;.global byte_rotr_1
;.global byte_rotr_0
byte_rotr_1: /* 10 words */
mov r0, B0
mov B0, B1
@ -162,7 +162,7 @@ byte_rotr_0:
; 0 1 2 3 4 5 6 7
; 2 3 4 5 6 7 0 1
.global byte_rotr_2
;.global byte_rotr_2
byte_rotr_2: /* 11 words */
mov r0, B0
mov B0, B2
@ -178,7 +178,7 @@ byte_rotr_2: /* 11 words */
; 0 1 2 3 4 5 6 7
; 3 4 5 6 7 0 1 2
.global byte_rotr_3
;.global byte_rotr_3
byte_rotr_3: /* 10 words */
mov r0, B0
mov B0, B3
@ -193,7 +193,7 @@ byte_rotr_3: /* 10 words */
; 0 1 2 3 4 5 6 7
; 4 5 6 7 0 1 2 3
.global byte_rotr_4
;.global byte_rotr_4
byte_rotr_4: /* 13 words */
mov r0, B0
mov B0, B4
@ -214,7 +214,7 @@ byte_rotr_4: /* 13 words */
; 0 1 2 3 4 5 6 7
; 5 6 7 0 1 2 3 4
.global byte_rotr_5
;.global byte_rotr_5
byte_rotr_5: /* 10 words */
mov r0, B0
mov B0, B5
@ -229,7 +229,7 @@ byte_rotr_5: /* 10 words */
; 0 1 2 3 4 5 6 7
; 6 7 0 1 2 3 4 5
.global byte_rotr_6
;.global byte_rotr_6
byte_rotr_6: /* 11 words */
mov r0, B0
mov B0, B6
@ -246,7 +246,7 @@ byte_rotr_6: /* 11 words */
; 0 1 2 3 4 5 6 7
; 7 0 1 2 3 4 5 6
.global byte_rotr_7
;.global byte_rotr_7
byte_rotr_7: /* 10 words */
mov r0, B7
mov B7, B6
@ -259,7 +259,7 @@ byte_rotr_7: /* 10 words */
mov B0, r0
rjmp post_byterot
.global bit_rotl
;.global bit_rotl
bit_rotl:
tst vROT
brne 1f
@ -277,7 +277,7 @@ bit_rotl:
dec vROT
rjmp bit_rotl
.global bit_rotr
;.global bit_rotr
bit_rotr:
tst vROT
brne 1f