+threefis256 asm +ubi256 asm + skein256 asm

This commit is contained in:
bg 2009-03-21 15:15:02 +00:00
parent 92725df162
commit 4147d732ef
18 changed files with 2201 additions and 60 deletions

View File

@ -35,7 +35,7 @@
#define UART_LINE_BUFFER_SIZE 40
#define UART_XON_XOFF
#define UART_XON_XOFF_THRESHOLD_1 (UART_RXBUFSIZE - 24)
#define UART_XON_XOFF_THRESHOLD_2 (UART_RXBUFSIZE - 30)
#define UART_XON_XOFF_THRESHOLD_2 (UART_RXBUFSIZE - 60)
#undef UART_LEDS
/*

View File

@ -19,7 +19,7 @@
=end
$debug = false
require 'rubygems'
require 'serialport'
def init_system
@ -49,7 +49,18 @@ def get_md
return line
end
def send_md(md_string)
for i in 0..md_string.length-1
$sp.print(md_string[i].chr)
# print(md_string[i].chr)
if(i%20==19)
sleep(0.1)
end
end
end
def run_test(filename)
errors = 0
if not File.exist?(filename)
puts("ERROR file "+filename+" does not exist!")
end
@ -68,7 +79,7 @@ def run_test(filename)
end while not (file.eof or (/[\s]*Msg[\s]*=.*/.match(lb)))
return if file.eof
puts("DBG sending: "+lb) if $debug
$sp.print(lb.strip)
send_md(lb.strip)
avr_md = get_md()
begin
lb=file.gets()
@ -78,10 +89,16 @@ def run_test(filename)
a.upcase!
b.upcase!
puts("") if (pos%$linewidth==0 and $linewidth!=0)
putc((a==b)?'*':'!')
#putc((a==b)?'*':'!')
if(a==b)
putc('*')
else
putc('!')
errors += 1;
end
pos += 1
end
return errors
end
if ARGV.size < 6
@ -92,17 +109,29 @@ EOF
end
puts("\nPort: "+ARGV[0]+ "@"+ARGV[1]+" "+ARGV[2]+"N"+ARGV[3]+"\n");
puts("serial port interface version: " + SerialPort::VERSION);
$linewidth = 64
$sp = SerialPort.new(ARGV[0], ARGV[1].to_i, ARGV[2].to_i, ARGV[3].to_i, SerialPort::NONE);
$params = { "baud" => ARGV[1].to_i,
"data_bits" => ARGV[2].to_i,
"stop_bits" => ARGV[3].to_i,
"parity" => SerialPort::NONE }
$sp = SerialPort.new(ARGV[0], $params)
#$sp = SerialPort.new(ARGV[0], ARGV[1].to_i, ARGV[2].to_i, ARGV[3].to_i, SerialPort::NONE);
$sp.read_timeout=1000; # 5 minutes
$sp.flow_control = SerialPort::SOFT
$algo_select = ARGV[4]
#irb
init_system()
for i in (5..(ARGV.size-1))
run_test(ARGV[i])
puts("")
errors = run_test(ARGV[i])
if errors == 0
puts("[ok]")
else
puts("[errors: "+errors.to_s+"]")
end
end
$sp.print("EXIT\r");

View File

@ -1,12 +1,13 @@
# Makefile for Skein
ALGO_NAME := SKEIN_C
ALGO_NAME := SKEIN
# comment out the following line for removement of Skein from the build process
HASHES += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o \
ubi256.o ubi512.o ubi1024.o memxor.o skein256.o skein512.o skein1024.o
$(ALGO_NAME)_OBJ := threefish_mix.o threefish256_enc_asm.o ubi256_asm.o skein256_asm.o \
threefish_mix_4c.o threefish512_enc.o threefish1024_enc.o \
ubi512.o ubi1024.o memxor.o skein512.o skein1024.o
$(ALGO_NAME)_TEST_BIN := main-skein-test.o debug.o uart.o hexdigit_tab.o \
dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \
hfal-basic.o hfal_skein256.o hfal_skein512.o hfal_skein1024.o shavs.o

View File

@ -1,11 +1,12 @@
# Makefile for threefish
ALGO_NAME := THREEFISH_C
ALGO_NAME := THREEFISH
# comment out the following line for removement of threefish from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o
$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc.o threefish1024_enc.o\
threefish_mix.o threefish_mix_4c.o
$(ALGO_NAME)_TEST_BIN := main-threefish-test.o debug.o uart.o hexdigit_tab.o \
nessie_bc_test.o dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie

View File

@ -1,12 +1,12 @@
# Makefile for UBI
ALGO_NAME := UBI_C
ALGO_NAME := UBI
# comment out the following line for removement of ubi from the build process
HASHES += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o \
ubi256.o ubi512.o ubi1024.o memxor.o
$(ALGO_NAME)_OBJ := threefish_mix.o threefish256_enc_asm.o ubi256_asm.o threefish512_enc.o threefish1024_enc.o \
threefish_mix_4c.o ubi512.o ubi1024.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-ubi-test.o debug.o uart.o hexdigit_tab.o \
dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie

343
skein256_asm.S Normal file
View File

@ -0,0 +1,343 @@
/* skein256_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
void skein256_init(skein256_ctx_t* ctx, uint16_t outsize_b){
skein_config_t conf;
uint8_t null[UBI256_BLOCKSIZE_B];
memset(null, 0, UBI256_BLOCKSIZE_B);
memset(&conf, 0, sizeof(skein_config_t));
conf.schema[0] = 'S';
conf.schema[1] = 'H';
conf.schema[2] = 'A';
conf.schema[3] = '3';
conf.version = 1;
conf.out_length = outsize_b;
ctx->outsize_b = outsize_b;
ubi256_init(&(ctx->ubictx), null, UBI_TYPE_CFG);
ubi256_lastBlock(&(ctx->ubictx), &conf, 256);
ubi256_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_MSG);
}
*/
/*
* param ctx: r24:r25
* param outsize_b: r22:r23
*/
UBICTX0 = 2
UBICTX1 = 3
CONF0 = 4
CONF1 = 5
.global skein256_init
skein256_init:
push_range 2, 5
stack_alloc 64-22
adiw r30, 1
movw CONF0, r30
movw r26, r24
st X+, r22
st X+, r23
movw UBICTX0, r26
ldi r24, 'S'
st Z+, r24
ldi r24, 'H'
st Z+, r24
ldi r24, 'A'
st Z+, r24
ldi r24, '3'
st Z+, r24
ldi r24, 1
st Z+, r24
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r22
st Z+, r23
ldi 24, 22+10
1: st Z+, r1
dec r24
brne 1b
/* call ubi256_init*/
sbiw r30, 32
movw r24, UBICTX0
movw r22, r30
ldi r20, 4
rcall ubi256_init
/* call ubi256_lastBlock*/
movw r24, UBICTX0
movw r22, CONF0
ldi r21, 1
clr r20
rcall ubi256_lastBlock
/* call ubi256_init*/
movw r24, UBICTX0
adiw r24, 16
movw r22, r24
movw r24, UBICTX0
ldi r20, 48
rcall ubi256_init
stack_free 64-22
pop_range 2, 5
ret
/******************************************************************************/
.global skein256_nextBlock
skein256_nextBlock:
adiw r24, 2
rjmp ubi256_nextBlock
/******************************************************************************/
.global skein256_lastBlock
skein256_lastBlock:
adiw r24, 2
rjmp ubi256_lastBlock
/******************************************************************************/
/*
void skein256_ctx2hash(void* dest, skein256_ctx_t* ctx){
ubi256_ctx_t uctx;
uint16_t outsize_b;
uint64_t counter=0;
uint8_t outbuffer[UBI256_BLOCKSIZE_B];
ubi256_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_OUT);
outsize_b = ctx->outsize_b;
while(1){
memcpy(&uctx, &(ctx->ubictx), sizeof(ubi256_ctx_t));
ubi256_lastBlock(&uctx, &counter, 64);
ubi256_ctx2hash(outbuffer, &uctx);
if(outsize_b<=UBI256_BLOCKSIZE){
memcpy(dest, outbuffer, (outsize_b+7)/8);
break;
}else{
memcpy(dest, outbuffer, UBI256_BLOCKSIZE_B);
dest = (uint8_t*)dest + UBI256_BLOCKSIZE_B;
outsize_b -= UBI256_BLOCKSIZE;
counter++;
}
}
}
*/
/*
* param dest: r24:r25
* param ctx: r22:r23
*/
OUTSIZE_B0 = 16
OUTSIZE_B1 = 17
UCTX0 = 14
UCTX1 = 15
UBICTX0 = 12
UBICTX1 = 13
DEST0 = 10
DEST1 = 11
.global skein256_ctx2hash
skein256_ctx2hash:
push_range 10, 17
/* 48 || 8 || 32 */
stack_alloc_large 88 /* uctx || counter || outbuffer */
movw DEST0, r24
adiw r30, 1
movw UCTX0, r30
adiw r30, 48
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
st Z+, r1
movw r26, 22
ld OUTSIZE_B0, X+
ld OUTSIZE_B1, X+
movw UBICTX0, r26
/* call ubi256_init */
movw r24, UBICTX0
adiw r24, 16
movw r22, r24
movw r24, UBICTX0
ldi r20, 63
rcall ubi256_init
/* main loop */
/* copy ubictx in uctx*/
movw r30, UCTX0
movw r26, UBICTX0
ldi r24, 48
2: ld r25, X+
st Z+, r25
dec r24
brne 2b
/* call ubi256_lastBlock */
movw r24, UCTX0
adiw r24, 48
movw r22, r24
movw r24, UCTX0
clr r21
ldi r20, 64
rcall ubi256_lastBlock
/* copy uctx->g to outbuffer */
movw r26, UCTX0
adiw r26, 16
movw r30, UCTX0
adiw r30, 56
ldi r24, 32
2: ld r25, X+
st Z+, r25
dec r24
brne 2b
/* compare outsize_b with 256*/
cpi OUTSIZE_B1, 2
brge 5f
cpi OUTSIZE_B1, 1
brlo 3f
tst OUTSIZE_B0
breq 3f
5: /* copy outbuffer to dest */
movw r30, DEST0
movw r26, UCTX0
adiw r26, 56
ldi r24, 32
6: ld r25, X+
st Z+, r25
dec r24
brne 6b
/* store new dest */
movw DEST0, r26
/* adjust counter and outsize_b*/
dec OUTSIZE_B1
movw r30, UCTX0
adiw r30, 48
ldi r24, 1
ld r25, Z
add r25, r24
st Z+, r25
ldi r24, 7
6: ld r25, Z
adc r25, r1
st Z+, r25
dec r24
brne 6b
rjmp 1b
3: /* last iteraton */
movw r24, OUTSIZE_B0
adiw r24, 7
lsr r25
ror r24
lsr r24
lsr r24
movw r30, DEST0
movw r26, UCTX0
adiw r26, 56
tst r24
breq 8f
7: ld r25, X+
st Z+, r25
dec r24
brne 7b
8:
stack_free_large 88
pop_range 10, 17
ret
/******************************************************************************/
/*
void skein256(void* dest, uint16_t outlength_b, const void* msg, uint32_t length_b){
skein256_ctx_t ctx;
skein256_init(&ctx, outlength_b);
while(length_b>SKEIN256_BLOCKSIZE){
skein256_nextBlock(&ctx, msg);
msg = (uint8_t*)msg + SKEIN256_BLOCKSIZE_B;
length_b -= SKEIN256_BLOCKSIZE;
}
skein256_lastBlock(&ctx, msg, length_b);
skein256_ctx2hash(dest, &ctx);
}
*/
/*
* param dest: r24:r25
* param outlength_b: r22:r23
* param msg: r20:r21
* param length_b: r16:r19
*/
LENGTH_B0 = 2
LENGTH_B1 = 3
LENGTH_B2 = 4
LENGTH_B3 = 5
DEST0 = 6
DEST1 = 7
MSG0 = 8
MSG1 = 9
CTX0 = 10
CTX1 = 11
.global skein256
skein256:
push_range 2, 11
stack_alloc 50
adiw r30, 1
movw CTX0, r30
movw DEST0, r24
movw MSG0, r20
movw LENGTH_B0, r16
movw LENGTH_B2, r18
/* call skein256_init */
movw r24, r30
rcall skein256_init
1: tst LENGTH_B2
brne 4f
tst LENGTH_B3
brne 4f
/* call skein256_lastBlock */
movw r24, CTX0
movw r22, MSG0
movw r20, LENGTH_B0
rcall skein256_lastBlock
/* call skein256_ctx2hash */
movw r24, DEST0
movw r22, CTX0
rcall skein256_ctx2hash
/* return */
stack_free 50
pop_range 2, 11
ret
4: /* process preceeding blocks */
movw r24, CTX0
movw r22, MSG0
rcall skein256_nextBlock
movw r24, MSG0
adiw r24, 32
movw MSG0, r24
mov r24, LENGTH_B1
mov r25, LENGTH_B2
sbiw r24, 1
sbc LENGTH_B3, r1
mov LENGTH_B1, r24
mov LENGTH_B2, r25
rjmp 1b

View File

@ -53,16 +53,19 @@ void testrun_stdtest_skein256(uint16_t outsize_b){
message[i] = 0xFF-i;
cli_putstr_P(PSTR("\r\nmessage: "));
cli_hexdump(message, 1);
skein256(hash, outsize_b, message, 8);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(message, 32, 4, 16);
skein256(hash, outsize_b, message, 32*8);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(message, 64, 4, 16);
skein256(hash, outsize_b, message, 64*8);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
@ -84,11 +87,13 @@ void testrun_stdtest_skein512(uint16_t outsize_b){
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(message, 64, 4, 16);
skein512(hash, outsize_b, message, 64*8);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(message, 128, 4, 16);
skein512(hash, outsize_b, message, 128*8);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
@ -110,6 +115,7 @@ void testrun_stdtest_skein1024(uint16_t outsize_b){
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(message, 128, 4, 16);
skein1024(hash, outsize_b, message, 128*8);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
@ -228,7 +234,12 @@ int main (void){
for(;;){
cli_putstr_P(PSTR("\r\n\r\nCrypto-VS ("));
cli_putstr(algo_name);
cli_putstr_P(PSTR("; "));
cli_putstr(__DATE__);
cli_putstr_P(PSTR(" "));
cli_putstr(__TIME__);
cli_putstr_P(PSTR(")\r\nloaded and running\r\n"));
cmd_interface(cmdlist);
}
}

View File

@ -61,7 +61,17 @@ void testrun_stdtest_threefish256(void){
threefish256_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump(data, 32);
/*
cli_hexdump_rev(data, 8);
cli_putc(' ');
cli_hexdump_rev(data+8, 8);
cli_putc(' ');
cli_hexdump_rev(data+16, 8);
cli_putc(' ');
cli_hexdump_rev(data+24, 8);
cli_putc(' ');
*/
/* second test */
for(i=0; i<32; ++i){
key[i] = 0x10+i;
data[i] = 0xFF-i;
@ -323,18 +333,33 @@ void testrun_performance_threefish(void){
testrun_performance_threefish1024();
}
void init_test(void){
threefish256_ctx_t ctx;
uint8_t key[32], tweak[16];
memset(key, 0,32);
memset(tweak, 0,16);
threefish256_init(key, tweak, &ctx);
cli_putstr_P(PSTR("\r\n ctx: \r\n\tk:"));
cli_hexdump(ctx.k, 5*8);
cli_putstr_P(PSTR("\r\n\tt:"));
cli_hexdump(ctx.t, 3*8);
}
/*****************************************************************************
* main *
*****************************************************************************/
const char nessie_str[] PROGMEM = "nessie";
const char test_str[] PROGMEM = "test";
const char inittest_str[] PROGMEM = "inittest";
const char performance_str[] PROGMEM = "performance";
const char echo_str[] PROGMEM = "echo";
cmdlist_entry_t cmdlist[] PROGMEM = {
// { nessie_str, NULL, testrun_nessie_noekeon},
{ test_str, NULL, testrun_stdtest_threefish},
{ inittest_str, NULL, init_test},
{ performance_str, NULL, testrun_performance_threefish},
{ echo_str, (void*)1, (void_fpt)echo_ctrl},
{ NULL, NULL, NULL}

View File

@ -54,6 +54,8 @@ typedef struct{
} threefish1024_ctx_t;
void threefish_mix(void* data, uint8_t rot);
void threefish256_init_c(void* key, void* tweak, threefish256_ctx_t* ctx);
void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx);
void threefish512_init(void* key, void* tweak, threefish512_ctx_t* ctx);

View File

@ -30,16 +30,6 @@
#include <string.h>
#include "threefish.h"
#define X0 (((uint64_t*)data)[0])
#define X1 (((uint64_t*)data)[1])
static
void mix(void* data, uint8_t rot){
uint64_t x;
x = X1;
X0 += x;
X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
}
#define X(a) (((uint64_t*)data)[(a)])
static
@ -108,14 +98,14 @@ void threefish1024_enc(void* data, threefish1024_ctx_t* ctx){
add_key_16(data, ctx, s);
++s;
}
mix((uint8_t*)data + 0, r0[i%8]);
mix((uint8_t*)data + 16, r1[i%8]);
mix((uint8_t*)data + 32, r2[i%8]);
mix((uint8_t*)data + 48, r3[i%8]);
mix((uint8_t*)data + 64, r4[i%8]);
mix((uint8_t*)data + 80, r5[i%8]);
mix((uint8_t*)data + 96, r6[i%8]);
mix((uint8_t*)data +112, r7[i%8]);
threefish_mix((uint8_t*)data + 0, r0[i%8]);
threefish_mix((uint8_t*)data + 16, r1[i%8]);
threefish_mix((uint8_t*)data + 32, r2[i%8]);
threefish_mix((uint8_t*)data + 48, r3[i%8]);
threefish_mix((uint8_t*)data + 64, r4[i%8]);
threefish_mix((uint8_t*)data + 80, r5[i%8]);
threefish_mix((uint8_t*)data + 96, r6[i%8]);
threefish_mix((uint8_t*)data +112, r7[i%8]);
permute_16(data);
++i;
}while(i!=80);

View File

@ -30,16 +30,6 @@
#include <string.h>
#include "threefish.h"
#define X0 (((uint64_t*)data)[0])
#define X1 (((uint64_t*)data)[1])
static
void mix(void* data, uint8_t rot){
uint64_t x;
x = X1;
X0 += x;
X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
}
#define X(a) (((uint64_t*)data)[(a)])
static
void permute_4(void* data){
@ -82,8 +72,8 @@ void threefish256_enc(void* data, threefish256_ctx_t* ctx){
add_key_4(data, ctx, s);
++s;
}
mix(data, r0[i%8]);
mix((uint8_t*)data + 16, r1[i%8]);
threefish_mix(data, r0[i%8]);
threefish_mix((uint8_t*)data + 16, r1[i%8]);
permute_4(data);
++i;
}while(i!=72);

411
threefish256_enc_asm.S Normal file
View File

@ -0,0 +1,411 @@
/* threefish_mix.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
A0 = 14
A1 = 15
A2 = 16
A3 = 17
A4 = 18
A5 = 19
A6 = 20
A7 = 21
/*
#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * /
#define K(s) (((uint64_t*)key)[(s)])
#define T(s) (((uint64_t*)tweak)[(s)])
void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){
memcpy(ctx->k, key, 4*8);
memcpy(ctx->t, tweak, 2*8);
uint8_t i;
ctx->k[4] = THREEFISH_KEY_CONST;
for(i=0; i<4; ++i){
ctx->k[4] ^= K(i);
}
ctx->t[2] = T(0) ^ T(1);
}
*/
/*
* param key: r24:r25
* param tweak: r22:r23
* param ctx: r20:r21
*/
.global threefish256_init
threefish256_init:
push_range 14, 17
movw r30, r20
movw r26, r24
ldi r24, 4
ldi A7, 0x55
mov A6, A7
movw A4, A6
movw A2, A6
movw A0, A6
1:
ld r0, X+
st Z+, r0
eor A0, r0
ld r0, X+
st Z+, r0
eor A1, r0
ld r0, X+
st Z+, r0
eor A2, r0
ld r0, X+
st Z+, r0
eor A3, r0
ld r0, X+
st Z+, r0
eor A4, r0
ld r0, X+
st Z+, r0
eor A5, r0
ld r0, X+
st Z+, r0
eor A6, r0
ld r0, X+
st Z+, r0
eor A7, r0
dec r24
brne 1b
st Z+, A0
st Z+, A1
st Z+, A2
st Z+, A3
st Z+, A4
st Z+, A5
st Z+, A6
st Z+, A7
/* now the tweak */
movw r26, r22
ld A0, X+
ld A1, X+
ld A2, X+
ld A3, X+
ld A4, X+
ld A5, X+
ld A6, X+
ld A7, X+
st Z+, A0
st Z+, A1
st Z+, A2
st Z+, A3
st Z+, A4
st Z+, A5
st Z+, A6
st Z+, A7
ld r0, X+
eor A0, r0
st Z+, r0
ld r0, X+
eor A1, r0
st Z+, r0
ld r0, X+
eor A2, r0
st Z+, r0
ld r0, X+
eor A3, r0
st Z+, r0
ld r0, X+
eor A4, r0
st Z+, r0
ld r0, X+
eor A5, r0
st Z+, r0
ld r0, X+
eor A6, r0
st Z+, r0
ld r0, X+
eor A7, r0
st Z+, r0
st Z+, A0
st Z+, A1
st Z+, A2
st Z+, A3
st Z+, A4
st Z+, A5
st Z+, A6
st Z+, A7
pop_range 14, 17
ret
/******************************************************************************/
/*
#define X(a) (((uint64_t*)data)[(a)])
void permute_4(void* data){
uint64_t t;
t = X(1);
X(1) = X(3);
X(3) = t;
}
void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * /
X(0) += ctx->k[(s+0)%5];
X(1) += ctx->k[(s+1)%5] + ctx->t[s%3];
X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
X(3) += ctx->k[(s+3)%5] + s;
}
void threefish256_enc(void* data, threefish256_ctx_t* ctx){
uint8_t i=0,s=0;
uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59};
uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50};
do{
if(i%4==0){
add_key_4(data, ctx, s);
++s;
}
threefish_mix(data, r0[i%8]);
threefish_mix((uint8_t*)data + 16, r1[i%8]);
permute_4(data);
++i;
}while(i!=72);
add_key_4(data, ctx, s);
}
*/
I = 2
S = 3
DATA0 = 4
DATA1 = 5
CTX0 = 6
CTX1 = 7
IDX0 = 8
IDX1 = 9
IDX2 = 10
IDX3 = 11
/*
* param data: r24:r25
* param ctx: r22:r23
*/
.global threefish256_enc
threefish256_enc:
push r28
push r29
push_range 2, 17
movw DATA0, r24
movw CTX0, r22
clr I
clr S
1:
mov r30, I
andi r30, 0x03
breq 2f
rjmp 4f
2:
ldi r30, lo8(threefish256_slut5)
ldi r31, hi8(threefish256_slut5)
mov r26, S
add r30, r26
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z
movw r30, CTX0
movw r26, DATA0
add r30, IDX0
adc r31, r1
rcall add_z_to_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall add_z_to_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall add_z_to_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall add_z_to_x8
/* now the remaining key */
sbiw r26, 3*8
ldi r30, lo8(threefish256_slut3)
ldi r31, hi8(threefish256_slut3)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z
movw r30, CTX0
adiw r30, 5*8
movw IDX2, r30
add r30, IDX0
adc r31, r1
rcall add_z_to_x8
movw r30, IDX2
add r30, IDX1
adc r31, r1
rcall add_z_to_x8
ld r0, X
add r0, S
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
ld r0, X
adc r0, r1
st X+, r0
inc S
mov r26, S
cpi r26, 19
brmi 4f
exit:
pop_range 2, 17
pop r29
pop r28
ret
4:
/* call mix */
ldi r30, lo8(threefish256_rc0)
ldi r31, hi8(threefish256_rc0)
mov r26, I
andi r26, 0x07
add r30, r26
adc r31, r1
lpm r22, Z
adiw r30, 8
lpm IDX0, Z
movw r24, DATA0
call threefish_mix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 16
mov r22, IDX0
call threefish_mix_asm /* no rcall? */
/* now the permutation */
movw r26, DATA0
adiw r26, 8
movw r30, r26
adiw r30, 16
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
inc I
rjmp 1b
threefish256_slut5:
.byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
.byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
.byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
threefish256_slut3:
.byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
.byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
.byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
;threefish256_rc0: .byte 5, 36, 13, 58, 26, 53, 11, 59
;threefish256_rc1: .byte 56, 28, 46, 44, 20, 35, 42, 50
threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73
threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62
add_z_to_x8:
ld r0, Z+
ld r1, X
add r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
clr r1
ret

350
threefish256_enc_small.S Normal file
View File

@ -0,0 +1,350 @@
/* threefish_mix.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
A0 = 14
A1 = 15
A2 = 16
A3 = 17
A4 = 18
A5 = 19
A6 = 20
A7 = 21
/*
#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * /
#define K(s) (((uint64_t*)key)[(s)])
#define T(s) (((uint64_t*)tweak)[(s)])
void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){
memcpy(ctx->k, key, 4*8);
memcpy(ctx->t, tweak, 2*8);
uint8_t i;
ctx->k[4] = THREEFISH_KEY_CONST;
for(i=0; i<4; ++i){
ctx->k[4] ^= K(i);
}
ctx->t[2] = T(0) ^ T(1);
}
*/
/*
* param key: r24:r25
* param tweak: r22:r23
* param ctx: r20:r21
*/
.global threefish256_init
threefish256_init:
push_range 14, 17
movw r30, r20
movw r26, r24
ldi r24, 4
ldi A7, 0x55
mov A6, A7
movw A4, A6
movw A2, A6
movw A0, A6
1:
ld r0, X+
st Z+, r0
eor A0, r0
ld r0, X+
st Z+, r0
eor A1, r0
ld r0, X+
st Z+, r0
eor A2, r0
ld r0, X+
st Z+, r0
eor A3, r0
ld r0, X+
st Z+, r0
eor A4, r0
ld r0, X+
st Z+, r0
eor A5, r0
ld r0, X+
st Z+, r0
eor A6, r0
ld r0, X+
st Z+, r0
eor A7, r0
dec r24
brne 1b
st Z+, A0
st Z+, A1
st Z+, A2
st Z+, A3
st Z+, A4
st Z+, A5
st Z+, A6
st Z+, A7
/* now the tweak */
movw r26, r22
ld A0, X+
ld A1, X+
ld A2, X+
ld A3, X+
ld A4, X+
ld A5, X+
ld A6, X+
ld A7, X+
st Z+, A0
st Z+, A1
st Z+, A2
st Z+, A3
st Z+, A4
st Z+, A5
st Z+, A6
st Z+, A7
ld r0, X+
eor A0, r0
st Z+, r0
ld r0, X+
eor A1, r0
st Z+, r0
ld r0, X+
eor A2, r0
st Z+, r0
ld r0, X+
eor A3, r0
st Z+, r0
ld r0, X+
eor A4, r0
st Z+, r0
ld r0, X+
eor A5, r0
st Z+, r0
ld r0, X+
eor A6, r0
st Z+, r0
ld r0, X+
eor A7, r0
st Z+, r0
st Z+, A0
st Z+, A1
st Z+, A2
st Z+, A3
st Z+, A4
st Z+, A5
st Z+, A6
st Z+, A7
pop_range 14, 17
ret
/******************************************************************************/
/*
#define X(a) (((uint64_t*)data)[(a)])
void permute_4(void* data){
uint64_t t;
t = X(1);
X(1) = X(3);
X(3) = t;
}
void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * /
X(0) += ctx->k[(s+0)%5];
X(1) += ctx->k[(s+1)%5] + ctx->t[s%3];
X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
X(3) += ctx->k[(s+3)%5] + s;
}
void threefish256_enc(void* data, threefish256_ctx_t* ctx){
uint8_t i=0,s=0;
uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59};
uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50};
do{
if(i%4==0){
add_key_4(data, ctx, s);
++s;
}
threefish_mix(data, r0[i%8]);
threefish_mix((uint8_t*)data + 16, r1[i%8]);
permute_4(data);
++i;
}while(i!=72);
add_key_4(data, ctx, s);
}
*/
I = 2
S = 3
DATA0 = 4
DATA1 = 5
CTX0 = 6
CTX1 = 7
IDX0 = 8
IDX1 = 9
IDX2 = 10
IDX3 = 11
/*
* param data: r24:r25
* param ctx: r22:r23
*/
.global threefish256_enc
threefish256_enc:
push r28
push r29
push_range 2, 17
movw DATA0, r24
movw CTX0, r22
clr I
clr S
1:
mov r30, I
andi r30, 0x03
breq 2f
rjmp 4f
2:
ldi r30, lo8(threefish256_slut5)
ldi r31, hi8(threefish256_slut5)
mov r26, S
add r30, r26
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z+
lpm IDX2, Z+
lpm IDX3, Z
movw r30, CTX0
movw r26, DATA0
add r30, IDX0
adc r31, r1
rcall add_z_to_x8
movw r30, CTX0
add r30, IDX1
adc r31, r1
rcall add_z_to_x8
movw r30, CTX0
add r30, IDX2
adc r31, r1
rcall add_z_to_x8
movw r30, CTX0
add r30, IDX3
adc r31, r1
rcall add_z_to_x8
/* now the remaining key */
sbiw r26, 3*8
ldi r30, lo8(threefish256_slut3)
ldi r31, hi8(threefish256_slut3)
add r30, S
adc r31, r1
lpm IDX0, Z+
lpm IDX1, Z
movw r30, CTX0
adiw r30, 5*8
movw IDX2, r30
add r30, IDX0
adc r31, r1
rcall add_z_to_x8
movw r30, IDX2
add r30, IDX1
adc r31, r1
rcall add_z_to_x8
ld r0, X
add r0, S
st X+, r0
ldi r16, 7
3:
ld r0, X
adc r0, r1
st X+, r0
dec r16
brne 3b
inc S
mov r26, S
cpi r26, 19
brmi 4f
exit:
pop_range 2, 17
pop r29
pop r28
ret
4:
/* call mix */
ldi r30, lo8(threefish256_rc0)
ldi r31, hi8(threefish256_rc0)
mov r26, I
andi r26, 0x07
add r30, r26
adc r31, r1
lpm r22, Z
adiw r30, 8
lpm IDX0, Z
movw r24, DATA0
call threefish_mix_asm /* no rcall? */
movw r24, DATA0
adiw r24, 16
mov r22, IDX0
call threefish_mix_asm /* no rcall? */
/* now the permutation */
movw r26, DATA0
adiw r26, 8
movw r30, r26
adiw r30, 16
ldi r16, 8
3: ld IDX0, X
ld IDX1, Z
st X+, IDX1
st Z+, IDX0
dec r16
brne 3b
inc I
rjmp 1b
threefish256_slut5:
.byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
.byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
.byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
threefish256_slut3:
.byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
.byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
.byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73
threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62
add_z_to_x8:
ld r0, Z+
ld r1, X
add r1, r0
st X+, r1
ldi r16, 7
1:
ld r0, Z+
ld r1, X
adc r1, r0
st X+, r1
dec r16
brne 1b
clr r1
ret

View File

@ -30,18 +30,10 @@
#include <string.h>
#include "threefish.h"
#define X0 (((uint64_t*)data)[0])
#define X1 (((uint64_t*)data)[1])
static
void mix(void* data, uint8_t rot){
uint64_t x;
x = X1;
X0 += x;
X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
}
#define X(a) (((uint64_t*)data)[(a)])
static
void permute_8(void* data){
uint64_t t;
@ -107,10 +99,10 @@ void threefish512_enc(void* data, threefish512_ctx_t* ctx){
add_key_8(data, ctx, s);
++s;
}
mix((uint8_t*)data + 0, r0[i%8]);
mix((uint8_t*)data + 16, r1[i%8]);
mix((uint8_t*)data + 32, r2[i%8]);
mix((uint8_t*)data + 48, r3[i%8]);
threefish_mix((uint8_t*)data + 0, r0[i%8]);
threefish_mix((uint8_t*)data + 16, r1[i%8]);
threefish_mix((uint8_t*)data + 32, r2[i%8]);
threefish_mix((uint8_t*)data + 48, r3[i%8]);
permute_8(data);
++i;
}while(i!=72);

303
threefish_mix.S Normal file
View File

@ -0,0 +1,303 @@
/* threefish_mix.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/*
#define B0 (((uint64_t*)data)[0])
#define B1 (((uint64_t*)data)[1])
static
void mix(void* data, uint8_t rot){
uint64_t x;
x = B1;
B0 += x;
B1 = ((x<<rot)|(x>>(64-rot))) ^ B0;
}
*/
A0 = 10
A1 = 11
A2 = 12
A3 = 13
A4 = 14
A5 = 15
A6 = 16
A7 = 17
B0 = 18
B1 = 19
B2 = 20
B3 = 21
B4 = 22
B5 = 23
B6 = 24
B7 = 25
vROT = 27
/*
* param data: r24:r25
* param rot: r22
*/
.global threefish_mix_asm
threefish_mix_asm:
movw r28, r24
mov vROT,r22
ldd A0, Y+ 0
ldd A1, Y+ 1
ldd A2, Y+ 2
ldd A3, Y+ 3
ldd A4, Y+ 4
ldd A5, Y+ 5
ldd A6, Y+ 6
ldd A7, Y+ 7
ldd B0, Y+ 8
ldd B1, Y+ 9
ldd B2, Y+10
ldd B3, Y+11
ldd B4, Y+12
ldd B5, Y+13
ldd B6, Y+14
ldd B7, Y+15
add A0, B0
adc A1, B1
adc A2, B2
adc A3, B3
adc A4, B4
adc A5, B5
adc A6, B6
adc A7, B7
mov r26, vROT
swap r26
andi r26, 0x07
ldi r30, pm_lo8(byte_rot_jmptable)
ldi r31, pm_hi8(byte_rot_jmptable)
add r30, r26
adc r31, r1
ijmp
post_byterot:
bst vROT, 3
andi vROT, 0x07
brts 1f
rjmp bit_rotl
1: rjmp bit_rotr
post_bitrot:
eor B0, A0
eor B1, A1
eor B2, A2
eor B3, A3
eor B4, A4
eor B5, A5
eor B6, A6
eor B7, A7
std Y+ 0, A0
std Y+ 1, A1
std Y+ 2, A2
std Y+ 3, A3
std Y+ 4, A4
std Y+ 5, A5
std Y+ 6, A6
std Y+ 7, A7
std Y+ 8, B0
std Y+ 9, B1
std Y+10, B2
std Y+11, B3
std Y+12, B4
std Y+13, B5
std Y+14, B6
std Y+15, B7
exit:
ret
byte_rot_jmptable:
rjmp post_byterot;ret; rjmp byte_rotr_0
rjmp byte_rotr_7
rjmp byte_rotr_6
rjmp byte_rotr_5
rjmp byte_rotr_4
rjmp byte_rotr_3
rjmp byte_rotr_2
rjmp byte_rotr_1
rjmp post_byterot;ret; rjmp byte_rotr_0
; 0 1 2 3 4 5 6 7
; 1 2 3 4 5 6 7 0
byte_rotr_1: /* 10 words */
mov r0, B0
mov B0, B1
mov B1, B2
mov B2, B3
mov B3, B4
mov B4, B5
mov B5, B6
mov B6, B7
mov B7, r0
byte_rotr_0:
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 2 3 4 5 6 7 0 1
byte_rotr_2: /* 11 words */
mov r0, B0
mov B0, B2
mov B2, B4
mov B4, B6
mov B6, r0
mov r0, B1
mov B1, B3
mov B3, B5
mov B5, B7
mov B7, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 3 4 5 6 7 0 1 2
byte_rotr_3: /* 10 words */
mov r0, B0
mov B0, B3
mov B3, B6
mov B6, B1
mov B1, B4
mov B4, B7
mov B7, B2
mov B2, B5
mov B5, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 4 5 6 7 0 1 2 3
byte_rotr_4: /* 13 words */
mov r0, B0
mov B0, B4
mov B4, r0
mov r0, B1
mov B1, B5
mov B5, r0
mov r0, B2
mov B2, B6
mov B6, r0
mov r0, B3
mov B3, B7
mov B7, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 5 6 7 0 1 2 3 4
byte_rotr_5: /* 10 words */
mov r0, B0
mov B0, B5
mov B5, B2
mov B2, B7
mov B7, B4
mov B4, B1
mov B1, B6
mov B6, B3
mov B3, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 6 7 0 1 2 3 4 5
byte_rotr_6: /* 11 words */
mov r0, B0
mov B0, B6
mov B6, B4
mov B4, B2
mov B2, r0
mov r0, B1
mov B1, B7
mov B7, B5
mov B5, B3
mov B3, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 7 0 1 2 3 4 5 6
byte_rotr_7: /* 10 words */
mov r0, B7
mov B7, B6
mov B6, B5
mov B5, B4
mov B4, B3
mov B3, B2
mov B2, B1
mov B1, B0
mov B0, r0
rjmp post_byterot
bit_rotl:
tst vROT
brne 1f
rjmp post_bitrot
1: mov r0, B7
rol r0
rol B0
rol B1
rol B2
rol B3
rol B4
rol B5
rol B6
rol B7
dec vROT
rjmp bit_rotl
bit_rotr:
tst vROT
brne 1f
rjmp post_bitrot
1: mov r0, B0
ror r0
ror B7
ror B6
ror B5
ror B4
ror B3
ror B2
ror B1
ror B0
dec vROT
rjmp bit_rotr

328
threefish_mix_4c.S Normal file
View File

@ -0,0 +1,328 @@
/* threefish_mix.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/*
#define B0 (((uint64_t*)data)[0])
#define B1 (((uint64_t*)data)[1])
static
void mix(void* data, uint8_t rot){
uint64_t x;
x = B1;
B0 += x;
B1 = ((x<<rot)|(x>>(64-rot))) ^ B0;
}
*/
A0 = 10
A1 = 11
A2 = 12
A3 = 13
A4 = 14
A5 = 15
A6 = 16
A7 = 17
B0 = 18
B1 = 19
B2 = 20
B3 = 21
B4 = 22
B5 = 23
B6 = 24
B7 = 25
vROT = 27
/*
* param data: r24:r25
* param rot: r22
*/
.global threefish_mix
threefish_mix:
push r28
push r29
push_range 10, 17
movw r28, r24
mov vROT,r22
ldd A0, Y+ 0
ldd A1, Y+ 1
ldd A2, Y+ 2
ldd A3, Y+ 3
ldd A4, Y+ 4
ldd A5, Y+ 5
ldd A6, Y+ 6
ldd A7, Y+ 7
ldd B0, Y+ 8
ldd B1, Y+ 9
ldd B2, Y+10
ldd B3, Y+11
ldd B4, Y+12
ldd B5, Y+13
ldd B6, Y+14
ldd B7, Y+15
add A0, B0
adc A1, B1
adc A2, B2
adc A3, B3
adc A4, B4
adc A5, B5
adc A6, B6
adc A7, B7
mov r26, vROT
adiw r26, 3
lsr r26
lsr r26
lsr r26
; andi r26, 0x07
ldi r30, pm_lo8(byte_rot_jmptable)
ldi r31, pm_hi8(byte_rot_jmptable)
add r30, r26
adc r31, r1
ijmp
post_byterot:
ldi r30, lo8(bit_rot_lut)
ldi r31, hi8(bit_rot_lut)
andi vROT, 0x07
add r30, vROT
adc r31, r1
lpm r27, Z
bst r27, 7
andi r27, 0x07
brts 1f
rjmp bit_rotl
1: rjmp bit_rotr
post_bitrot:
eor B0, A0
eor B1, A1
eor B2, A2
eor B3, A3
eor B4, A4
eor B5, A5
eor B6, A6
eor B7, A7
std Y+ 0, A0
std Y+ 1, A1
std Y+ 2, A2
std Y+ 3, A3
std Y+ 4, A4
std Y+ 5, A5
std Y+ 6, A6
std Y+ 7, A7
std Y+ 8, B0
std Y+ 9, B1
std Y+10, B2
std Y+11, B3
std Y+12, B4
std Y+13, B5
std Y+14, B6
std Y+15, B7
exit:
pop_range 10, 17
pop r29
pop r28
ret
bit_rot_lut:
.byte 0x00
.byte 0x01
.byte 0x02
.byte 0x03
.byte 0x04
.byte 0x83
.byte 0x82
.byte 0x81
byte_rot_jmptable:
rjmp post_byterot;ret; rjmp byte_rotr_0
rjmp byte_rotr_7
rjmp byte_rotr_6
rjmp byte_rotr_5
rjmp byte_rotr_4
rjmp byte_rotr_3
rjmp byte_rotr_2
rjmp byte_rotr_1
rjmp post_byterot;ret; rjmp byte_rotr_0
; 0 1 2 3 4 5 6 7
; 1 2 3 4 5 6 7 0
byte_rotr_1: /* 10 words */
mov r0, B0
mov B0, B1
mov B1, B2
mov B2, B3
mov B3, B4
mov B4, B5
mov B5, B6
mov B6, B7
mov B7, r0
byte_rotr_0:
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 2 3 4 5 6 7 0 1
byte_rotr_2: /* 11 words */
mov r0, B0
mov B0, B2
mov B2, B4
mov B4, B6
mov B6, r0
mov r0, B1
mov B1, B3
mov B3, B5
mov B5, B7
mov B7, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 3 4 5 6 7 0 1 2
byte_rotr_3: /* 10 words */
mov r0, B0
mov B0, B3
mov B3, B6
mov B6, B1
mov B1, B4
mov B4, B7
mov B7, B2
mov B2, B5
mov B5, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 4 5 6 7 0 1 2 3
byte_rotr_4: /* 13 words */
mov r0, B0
mov B0, B4
mov B4, r0
mov r0, B1
mov B1, B5
mov B5, r0
mov r0, B2
mov B2, B6
mov B6, r0
mov r0, B3
mov B3, B7
mov B7, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 5 6 7 0 1 2 3 4
byte_rotr_5: /* 10 words */
mov r0, B0
mov B0, B5
mov B5, B2
mov B2, B7
mov B7, B4
mov B4, B1
mov B1, B6
mov B6, B3
mov B3, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 6 7 0 1 2 3 4 5
byte_rotr_6: /* 11 words */
mov r0, B0
mov B0, B6
mov B6, B4
mov B4, B2
mov B2, r0
mov r0, B1
mov B1, B7
mov B7, B5
mov B5, B3
mov B3, r0
rjmp post_byterot
; 0 1 2 3 4 5 6 7
; 7 0 1 2 3 4 5 6
byte_rotr_7: /* 10 words */
mov r0, B7
mov B7, B6
mov B6, B5
mov B5, B4
mov B4, B3
mov B3, B2
mov B2, B1
mov B1, B0
mov B0, r0
rjmp post_byterot
bit_rotl:
tst r27
brne 1f
rjmp post_bitrot
1: mov r0, B7
rol r0
rol B0
rol B1
rol B2
rol B3
rol B4
rol B5
rol B6
rol B7
dec r27
rjmp bit_rotl
bit_rotr:
tst r27
brne 1f
rjmp post_bitrot
1: mov r0, B0
ror r0
ror B7
ror B6
ror B5
ror B4
ror B3
ror B2
ror B1
ror B0
dec r27
rjmp bit_rotr

38
threefish_mix_c.c Normal file
View File

@ -0,0 +1,38 @@
/* threefish_mix_c.c */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*
*
*
*/
#include <stdint.h>
#define X0 (((uint64_t*)data)[0])
#define X1 (((uint64_t*)data)[1])
void threefish_mix(void* data, uint8_t rot){
uint64_t x;
x = X1;
X0 += x;
X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
}

327
ubi256_asm.S Normal file
View File

@ -0,0 +1,327 @@
/* ubi256_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
void ubi256_init(ubi256_ctx_t* ctx, const void* g, uint8_t type){
memset(ctx->tweak, 0, 15);
ctx->tweak[15] = 0x40+type;
memcpy(ctx->g, g, 32);
}
*/
/*
* param ctx: r24:r25
* param g: r22:r23
* param type: r20
*/
.global ubi256_init
ubi256_init:
movw r26, r24
ldi r21, 15
1: st X+, r1
dec r21
brne 1b
ori r20, 0x40
st X+, r20
ldi r21, 32
movw r30, r22
2: ld r20, Z+
st X+, r20
dec r21
brne 2b
ret
/******************************************************************************/
/*
void ubi256_ctx2hash(void* dest, const ubi256_ctx_t* ctx){
memcpy(dest, ctx->g, UBI256_BLOCKSIZE_B);
}
*/
/*
* param dest: r24:r24
* param ctx: r22:r23
*/
.global ubi256_ctx2hash
ubi256_ctx2hash:
movw r26, r24
movw r30, r22
adiw r30, 16
ldi r22, 32
1: ld r23, Z+
st X+, r23
dec r22
brne 1b
ret
/******************************************************************************/
/*
void ubi256_nextBlock(ubi256_ctx_t* ctx, const void* block){
threefish256_ctx_t tfctx;
((uint64_t*)(ctx->tweak))[0] += UBI256_BLOCKSIZE_B;
threefish256_init(ctx->g, ctx->tweak, &tfctx);
memcpy(ctx->g, block, UBI256_BLOCKSIZE_B);
threefish256_enc(ctx->g, &tfctx);
memxor(ctx->g, block, UBI256_BLOCKSIZE_B);
ctx->tweak[15] &= (uint8_t)~0x40;
}
*/
/*
* param ctx: r24:r25
* param block: r22:r23
*/
CTX0 = 2
CTX1 = 3
BLOCK0 = 4
BLOCK1 = 5
TFCTX0 = 6
TFCTX1 = 7
.global ubi256_nextBlock
ubi256_nextBlock:
stack_alloc_large 64
push_range 2, 7
adiw r30, 1 /* Z points to tfctx */
movw TFCTX0, r30
movw CTX0, r24
movw BLOCK0, r22
movw r26, r24
/* add BLOCKSIZE_B (32) to tweak */
ldi r25, 32
ld r24, X
add r24, r25
st X+, r24
ldi r25, 11
1: ld r24, X
adc r24, r1
st X+, r24
dec r25
brne 1b
/* call threefish256_init */
movw r24, CTX0
adiw r24, 16
movw r22, CTX0
movw CTX0, r24 /* CTX points to ctx->g */
movw r20, TFCTX0
rcall threefish256_init
/* copy block to ctx->g */
movw r26, CTX0
movw r30, BLOCK0
ldi r25, 32
1: ld r24, Z+
st X+, r24
dec r25
brne 1b
/* call threefish256_enc */
movw r24, CTX0
movw r22, TFCTX0
rcall threefish256_enc
/* xor block into ctx->g */
movw r26, BLOCK0
movw r30, CTX0
ldi r25, 32
1: ld r24, X+
ld r23, Z
eor r23, r24
st Z+, r23
dec r25
brne 1b
/* clear 'first' bit in tweak */
sbiw r30, 33
ld r24, Z
andi r24, ~0x40
st Z, r24
exit:
pop_range 2, 7
stack_free_large 64
ret
/******************************************************************************/
/*
void ubi256_lastBlock(ubi256_ctx_t* ctx, const void* block, uint16_t length_b){
threefish256_ctx_t tfctx;
while(length_b>UBI256_BLOCKSIZE){
ubi256_nextBlock(ctx, block);
block = (uint8_t*)block + UBI256_BLOCKSIZE_B;
length_b -= UBI256_BLOCKSIZE;
}
ctx->tweak[15] |= 0x80;
((uint64_t*)(ctx->tweak))[0] += (length_b+7)/8;
if(length_b & 0x07){
ctx->tweak[14] |= 0x80;
}
threefish256_init(ctx->g, ctx->tweak, &tfctx);
memset(ctx->g, 0, UBI256_BLOCKSIZE_B);
memcpy(ctx->g, block, (length_b+7)/8);
if(length_b & 0x07){
ctx->g[((length_b+7)/8)-1] |= 0x80>>(length_b&7);
ctx->g[((length_b+7)/8)-1] &= ~((0x80>>(length_b&7))-1);
}
threefish256_enc(ctx->g, &tfctx);
memxor(ctx->g, block, (length_b+7)/8);
if(length_b & 0x07){
ctx->g[((length_b+7)/8)-1] ^= 0x80>>(length_b&7);
}
}
*/
/*
* param ctx: r24:r25
* param block: r22:r23
* param ength_b: r20:r21
*/
MASK_B = 8
LEN_B = 9
TFCTX0 = 10
TFCTX1 = 11
CTX0 = 12
CTX1 = 13
BLOCK0 = 14
BLOCK1 = 15
LENGTH0 = 16
LENGTH1 = 17
.global ubi256_lastBlock
ubi256_lastBlock:
/* run nextBlock for preceding blocks*/
push_range 8, 17
movw CTX0, r24
movw BLOCK0, r22
movw LENGTH0, r20
1: cpi LENGTH1, 2
brlo 2f
movw r24, CTX0
movw r22, BLOCK0
rcall ubi256_nextBlock
ldi r25, 32
add BLOCK0, r25
adc BLOCK1, r1
dec LENGTH1
rjmp 1b
2: tst LENGTH1
breq 3f
tst LENGTH0
breq 3f
movw r24, CTX0
movw r22, BLOCK0
rcall ubi256_nextBlock
ldi r25, 32
add BLOCK0, r25
adc BLOCK1, r1
dec LENGTH1
3: /* now the real fun */
stack_alloc_large 64
adiw r30, 1
movw TFCTX0, r30
/* calculate LEN_B */
movw r24, LENGTH0
adiw r24, 7
lsr r25
ror r24
lsr r24
lsr r24
mov LEN_B, r24
/* add length to tweak */
movw r30, CTX0
ld r24, Z
add r24, LEN_B
st Z+, r24
ldi r25, 11
1: ld r24, Z
adc r24, r1
st Z+, r24
dec r25
brne 1b
/* set 'final' bit*/
movw r30, CTX0
ldd r24, Z+15
ori r24, 0x80
std Z+15, r24
/* store in T if we do bit processing and set 'BitPad' bit*/
clr MASK_B
mov r24, LENGTH0
andi r24, 0x07
tst r24
breq 4f
ldd r25, Z+14
ori r25, 0x80
std Z+14, r25
ldi r25, 0x80
mov MASK_B, r25
1: lsr MASK_B
dec r24
brne 1b
4: /* call threefish256_init*/
movw r24, CTX0
adiw r24, 16
movw r22, CTX0
movw CTX0, r24 /* CTX points at ctx->g */
movw r20, TFCTX0
rcall threefish256_init
/* copy block to ctx->g */
movw r26, BLOCK0
movw r30, CTX0
mov r24, LEN_B
ldi r25, 32
sub r25, LEN_B
tst r24
1: breq 2f
ld r22, X+
st Z+, r22
dec r24
rjmp 1b
2: tst MASK_B
breq 29f
or r22, MASK_B
st -Z, r22
adiw r30, 1
29: tst r25
3: breq 4f
st Z+, r1
dec r25
rjmp 3b
4: /* call threefish256_enc */
movw r24, CTX0
movw r22, TFCTX0
rcall threefish256_enc
/* xor block into ctx->g */
movw r30, CTX0
movw r26, BLOCK0
tst LEN_B
5: breq 6f
ld r22, X+
ld r23, Z
eor r23, r22
st Z+, r23
dec LEN_B
rjmp 5b
6: tst MASK_B
breq 7f
eor r23, MASK_B
st -Z, r23
7: stack_free_large 64
pop_range 8, 17
ret