performance upgrade

This commit is contained in:
bg 2009-05-13 21:48:37 +00:00
parent 7605b81a4b
commit 2deab9197b
9 changed files with 98 additions and 248 deletions

View File

@ -32,8 +32,8 @@
#include "blake_large.h"
#include "blake_common.h"
#define BUG_3 0 /* bug compatibility with reference code */
#define BUG_4 0 /* bug compatibility with reference code */
#define BUG_3 1 /* bug compatibility with reference code */
#define BUG_4 1 /* bug compatibility with reference code */
uint64_t pgm_read_qword(void* p){
union{
@ -67,25 +67,6 @@ uint64_t blake_c[] PROGMEM = {
((0x00ff0000&(a))>>8)| \
(a)>>24 )
void blake_large_g(uint8_t r, uint8_t i, uint64_t* v, const uint64_t* m){
uint8_t a,b,c,d, s0, s1;
a = pgm_read_byte(blake_index_lut+4*i+0);
b = pgm_read_byte(blake_index_lut+4*i+1);
c = pgm_read_byte(blake_index_lut+4*i+2);
d = pgm_read_byte(blake_index_lut+4*i+3);
s0 = pgm_read_byte(blake_sigma+16*r+2*i+0);
s1 = pgm_read_byte(blake_sigma+16*r+2*i+1);
v[a] += v[b] + (m[s0] ^ pgm_read_qword(&(blake_c[s1])));
v[d] = ROTR64(v[d]^v[a], 32);
v[c] += v[d];
v[b] = ROTR64(v[b]^v[c], 25);
v[a] += v[b] + (m[s1] ^ pgm_read_qword(&(blake_c[s0])));
v[d] = ROTR64(v[d]^v[a], 16);
v[c] += v[d];
v[b] = ROTR64(v[b]^v[c], 11);
}
void blake_large_expand(uint64_t* v, const blake_large_ctx_t* ctx){
uint8_t i;
memcpy(v, ctx->h, 8*8);
@ -108,9 +89,24 @@ void blake_large_changeendian(void* dest, const void* src){
void blake_large_compress(uint64_t* v,const void* m){
uint8_t r,i;
uint8_t a,b,c,d, s0, s1;
for(r=0; r<14; ++r){
for(i=0; i<8; ++i){
blake_large_g(r%10, i, v, (uint64_t*)m);
// blake_large_g(r%10, i, v, (uint64_t*)m);
a = pgm_read_byte(blake_index_lut+4*i+0);
b = pgm_read_byte(blake_index_lut+4*i+1);
c = pgm_read_byte(blake_index_lut+4*i+2);
d = pgm_read_byte(blake_index_lut+4*i+3);
s0 = pgm_read_byte(blake_sigma+16*r+2*i+0);
s1 = pgm_read_byte(blake_sigma+16*r+2*i+1);
v[a] += v[b] + (((uint64_t*)m)[s0] ^ pgm_read_qword(&(blake_c[s1])));
v[d] = ROTR64(v[d]^v[a], 32);
v[c] += v[d];
v[b] = ROTR64(v[b]^v[c], 25);
v[a] += v[b] + (((uint64_t*)m)[s1] ^ pgm_read_qword(&(blake_c[s0])));
v[d] = ROTR64(v[d]^v[a], 16);
v[c] += v[d];
v[b] = ROTR64(v[b]^v[c], 11);
}
}
}

View File

@ -32,8 +32,8 @@
#include "blake_small.h"
#include "blake_common.h"
#define BUG_1 0 /* bug compatibility for zero length message */
#define BUG_2 0 /* bug compatibility for messages of length%512=505...511 */
#define BUG_1 1 /* bug compatibility for zero length message */
#define BUG_2 1 /* bug compatibility for messages of length%512=505...511 */
uint32_t blake_c[] PROGMEM = {
@ -54,25 +54,6 @@ uint32_t blake_c[] PROGMEM = {
((0x00ff0000&(a))>>8)| \
(a)>>24 )
void blake_small_g(uint8_t r, uint8_t i, uint32_t* v, const uint32_t* m){
uint8_t a,b,c,d, s0, s1;
a = pgm_read_byte(blake_index_lut+4*i+0);
b = pgm_read_byte(blake_index_lut+4*i+1);
c = pgm_read_byte(blake_index_lut+4*i+2);
d = pgm_read_byte(blake_index_lut+4*i+3);
s0 = pgm_read_byte(blake_sigma+16*r+2*i+0);
s1 = pgm_read_byte(blake_sigma+16*r+2*i+1);
v[a] += v[b] + (m[s0] ^ pgm_read_dword(&(blake_c[s1])));
v[d] = ROTR32(v[d]^v[a], 16);
v[c] += v[d];
v[b] = ROTR32(v[b]^v[c], 12);
v[a] += v[b] + (m[s1] ^ pgm_read_dword(&(blake_c[s0])));
v[d] = ROTR32(v[d]^v[a], 8);
v[c] += v[d];
v[b] = ROTR32(v[b]^v[c], 7);
}
void blake_small_expand(uint32_t* v, const blake_small_ctx_t* ctx){
uint8_t i;
memcpy(v, ctx->h, 8*4);
@ -92,9 +73,36 @@ void blake_small_changeendian(void* dest, const void* src){
void blake_small_compress(uint32_t* v,const void* m){
uint8_t r,i;
uint8_t a,b,c,d, s0, s1;
uint32_t lv[4];
for(r=0; r<10; ++r){
for(i=0; i<8; ++i){
blake_small_g(r, i, v, (uint32_t*)m);
// blake_small_g(r, i, v, (uint32_t*)m);
a = pgm_read_byte(blake_index_lut+4*i+0);
b = pgm_read_byte(blake_index_lut+4*i+1);
c = pgm_read_byte(blake_index_lut+4*i+2);
d = pgm_read_byte(blake_index_lut+4*i+3);
s0 = pgm_read_byte(blake_sigma+16*r+2*i+0);
s1 = pgm_read_byte(blake_sigma+16*r+2*i+1);
lv[0] = v[a];
lv[1] = v[b];
lv[2] = v[c];
lv[3] = v[d];
lv[0] += lv[1] + (((uint32_t*)m)[s0] ^ pgm_read_dword(&(blake_c[s1])));
lv[3] = ROTR32(lv[3]^lv[0], 16);
lv[2] += lv[3];
lv[1] = ROTR32(lv[1]^lv[2], 12);
lv[0] += lv[1] + (((uint32_t*)m)[s1] ^ pgm_read_dword(&(blake_c[s0])));
lv[3] = ROTR32(lv[3]^lv[0], 8);
lv[2] += lv[3];
lv[1] = ROTR32(lv[1]^lv[2], 7);
v[a] = lv[0];
v[b] = lv[1];
v[c] = lv[2];
v[d] = lv[3];
}
}
}

View File

@ -69,26 +69,34 @@ void hfal_performance(const hfdesc_t* hd){
cli_putstr_P(PSTR("\r\n blocksize (bits): "));
printvalue(hf.blocksize_b);
startTimer(1);
startTimer(0);
START_TIMER;
hf.init(&ctx);
STOP_TIMER;
t = stopTimer();
cli_putstr_P(PSTR("\r\n init (cycles): "));
printvalue(t);
startTimer(1);
startTimer(0);
START_TIMER;
hf.nextBlock(&ctx, data);
STOP_TIMER;
t = stopTimer();
cli_putstr_P(PSTR("\r\n nextBlock (cycles): "));
printvalue(t);
startTimer(1);
startTimer(0);
START_TIMER;
hf.lastBlock(&ctx, data, 0);
STOP_TIMER;
t = stopTimer();
cli_putstr_P(PSTR("\r\n lastBlock (cycles): "));
printvalue(t);
startTimer(1);
startTimer(0);
START_TIMER;
hf.ctx2hash(digest, &ctx);
STOP_TIMER;
t = stopTimer();
cli_putstr_P(PSTR("\r\n ctx2hash (cycles): "));
printvalue(t);

View File

@ -42,8 +42,8 @@ const hfdesc_t blake48_desc PROGMEM = {
BLAKE48_BLOCKSIZE,
384,
(hf_init_fpt)blake48_init,
(hf_nextBlock_fpt)blake48_nextBlock,
(hf_lastBlock_fpt)blake48_lastBlock,
(hf_nextBlock_fpt)blake_large_nextBlock,
(hf_lastBlock_fpt)blake_large_lastBlock,
(hf_ctx2hash_fpt)blake48_ctx2hash,
(hf_free_fpt)NULL,
(hf_mem_fpt)blake48
@ -57,8 +57,8 @@ const hfdesc_t blake64_desc PROGMEM = {
BLAKE64_BLOCKSIZE,
512,
(hf_init_fpt)blake64_init,
(hf_nextBlock_fpt)blake64_nextBlock,
(hf_lastBlock_fpt)blake64_lastBlock,
(hf_nextBlock_fpt)blake_large_nextBlock,
(hf_lastBlock_fpt)blake_large_lastBlock,
(hf_ctx2hash_fpt)blake64_ctx2hash,
(hf_free_fpt)NULL,
(hf_mem_fpt)blake64

View File

@ -42,8 +42,8 @@ const hfdesc_t blake28_desc PROGMEM = {
BLAKE28_BLOCKSIZE,
224,
(hf_init_fpt)blake28_init,
(hf_nextBlock_fpt)blake28_nextBlock,
(hf_lastBlock_fpt)blake28_lastBlock,
(hf_nextBlock_fpt)blake_small_nextBlock,
(hf_lastBlock_fpt)blake_small_lastBlock,
(hf_ctx2hash_fpt)blake28_ctx2hash,
(hf_free_fpt)NULL,
(hf_mem_fpt)blake28
@ -57,8 +57,8 @@ const hfdesc_t blake32_desc PROGMEM = {
BLAKE32_BLOCKSIZE,
256,
(hf_init_fpt)blake32_init,
(hf_nextBlock_fpt)blake32_nextBlock,
(hf_lastBlock_fpt)blake32_lastBlock,
(hf_nextBlock_fpt)blake_small_nextBlock,
(hf_lastBlock_fpt)blake_small_lastBlock,
(hf_ctx2hash_fpt)blake32_ctx2hash,
(hf_free_fpt)NULL,
(hf_mem_fpt)blake32

View File

@ -9,7 +9,8 @@ $(ALGO_NAME)_OBJ := shabal-asm.o shabal192-asm.o shabal224-asm.o \
shabal256-asm.o shabal384-asm.o shabal512-asm.o
$(ALGO_NAME)_TEST_BIN := main-shabal-test.o debug.o uart.o hexdigit_tab.o \
dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \
nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o
nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o \
hfal-nessie.o hfal-performance.o hfal-test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie
$(ALGO_NAME)_PERFORMANCE_TEST := performance

View File

@ -8,7 +8,8 @@ HASHES += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := shabal.o shabal192.o shabal224.o shabal256.o shabal384.o shabal512.o
$(ALGO_NAME)_TEST_BIN := main-shabal-test.o debug.o uart.o hexdigit_tab.o \
dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \
nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o
nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o \
hfal-nessie.o hfal-performance.o hfal-test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie
$(ALGO_NAME)_PERFORMANCE_TEST := performance

View File

@ -29,6 +29,9 @@
#include "shabal.h"
#include "cli.h"
#include "hfal_shabal.h"
#include "hfal-test.h"
#include "hfal-nessie.h"
#include "hfal-performance.h"
#include "shavs.h"
#include "nessie_hash_test.h"
#include "performance_test.h"
@ -39,67 +42,37 @@
char* algo_name = "Shabal";
const hfdesc_t* algolist[] PROGMEM = {
(hfdesc_t*)&shabal192_desc,
(hfdesc_t*)&shabal224_desc,
(hfdesc_t*)&shabal256_desc,
(hfdesc_t*)&shabal384_desc,
(hfdesc_t*)&shabal512_desc,
NULL
};
/*****************************************************************************
* additional validation-functions *
*****************************************************************************/
void testrun_stdtest_shabal192(void* msg, uint16_t size_b){
uint8_t hash[192/8];
cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (192 bits):"));
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(msg, (size_b+7)/8, 4, 16);
shabal192(hash, msg, size_b);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, 192/8, 4, 16);
hfal_test(&shabal192_desc, msg, size_b);
}
void testrun_stdtest_shabal224(void* msg, uint16_t size_b){
uint8_t hash[224/8];
cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (224 bits):"));
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(msg, (size_b+7)/8, 4, 16);
shabal224(hash, msg, size_b);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, 224/8, 4, 16);
hfal_test(&shabal224_desc, msg, size_b);
}
void testrun_stdtest_shabal256(void* msg, uint16_t size_b){
uint8_t hash[256/8];
cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (256 bits):"));
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(msg, (size_b+7)/8, 4, 16);
shabal256(hash, msg, size_b);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, 256/8, 4, 16);
hfal_test(&shabal256_desc, msg, size_b);
}
void testrun_stdtest_shabal384(void* msg, uint16_t size_b){
uint8_t hash[384/8];
cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (384 bits):"));
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(msg, (size_b+7)/8, 4, 16);
shabal384(hash, msg, size_b);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, 384/8, 4, 16);
hfal_test(&shabal384_desc, msg, size_b);
}
void testrun_stdtest_shabal512(void* msg, uint16_t size_b){
uint8_t hash[512/8];
cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (512 bits):"));
cli_putstr_P(PSTR("\r\nmessage:"));
cli_hexdump_block(msg, (size_b+7)/8, 4, 16);
shabal512(hash, msg, size_b);
cli_putstr_P(PSTR("\r\nhash:"));
cli_hexdump_block(hash, 512/8, 4, 16);
hfal_test(&shabal512_desc, msg, size_b);
}
void testrun_stdtest_shabal(void){
@ -204,160 +177,17 @@ void testinit(void){
}
void performance_shabal(void){
uint64_t t;
char str[16];
uint8_t data[64];
uint8_t hash[512/8];
shabal_ctx_t ctx;
calibrateTimer();
print_overhead();
memset(data, 0, 64);
startTimer(1);
shabal192_init(&ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx-gen time (192): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal224_init(&ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx-gen time (224): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal256_init(&ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx-gen time (256): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal384_init(&ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx-gen time (384): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal512_init(&ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx-gen time (512): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal_nextBlock(&ctx, data);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tone-block time: "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal_lastBlock(&ctx, data, 0);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tlast block time: "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal192_ctx2hash(hash, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx2hash time (192): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal224_ctx2hash(hash, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx2hash time (224): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal256_ctx2hash(hash, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx2hash time (256): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal384_ctx2hash(hash, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx2hash time (384): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
startTimer(1);
shabal512_ctx2hash(hash, &ctx);
t = stopTimer();
cli_putstr_P(PSTR("\r\n\tctx2hash time (512): "));
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
cli_putstr_P(PSTR("\r\n"));
hfal_performance_multiple(algolist);
}
void testrun_nessie_shabal(void){
nessie_hash_ctx.hashsize_b = 192;
nessie_hash_ctx.blocksize_B = 512/8;
nessie_hash_ctx.ctx_size_B = sizeof(shabal_ctx_t);
nessie_hash_ctx.name = "Shabal-192";
nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal192_init;
nessie_hash_ctx.hash_next = (nessie_hash_next_fpt)shabal_nextBlock;
nessie_hash_ctx.hash_last = (nessie_hash_last_fpt)shabal_lastBlock;
nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal192_ctx2hash;
nessie_hash_run();
nessie_hash_ctx.hashsize_b = 224;
nessie_hash_ctx.name = "Shabal-224";
nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal224_init;
nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal224_ctx2hash;
nessie_hash_run();
nessie_hash_ctx.hashsize_b = 256;
nessie_hash_ctx.name = "Shabal-256";
nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal256_init;
nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal256_ctx2hash;
nessie_hash_run();
nessie_hash_ctx.hashsize_b = 384;
nessie_hash_ctx.name = "Shabal-384";
nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal384_init;
nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal384_ctx2hash;
nessie_hash_run();
nessie_hash_ctx.hashsize_b = 512;
nessie_hash_ctx.name = "Shabal-512";
nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal512_init;
nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal512_ctx2hash;
nessie_hash_run();
hfal_nessie_multiple(algolist);
}
/*****************************************************************************
* main *
*****************************************************************************/
const hfdesc_t* algolist[] PROGMEM = {
(hfdesc_t*)&shabal192_desc,
(hfdesc_t*)&shabal224_desc,
(hfdesc_t*)&shabal256_desc,
(hfdesc_t*)&shabal384_desc,
(hfdesc_t*)&shabal512_desc,
NULL
};
const char nessie_str[] PROGMEM = "nessie";
const char test_str[] PROGMEM = "test";

View File

@ -21,6 +21,12 @@
#include <stdint.h>
#include <avr/pgmspace.h>
#include <avr/io.h>
#include <avr/interrupt.h>
#define START_TIMER TCCR1B=1
#define STOP_TIMER TCCR1B=0
void calibrateTimer(void);
void startTimer(uint8_t granularity);