From c9c11514d91b8c19f77d65ac051b998bd99048b0 Mon Sep 17 00:00:00 2001 From: bg Date: Thu, 18 Mar 2010 18:53:47 +0000 Subject: [PATCH] stack usage measurement --- Makefile | 12 +- base64/base64_dec.c | 2 +- base64/base64_dec.h | 2 +- base64/base64_enc.c | 2 +- base64/base64_enc.h | 2 +- bigint/bigint.c | 106 ++-- bigint/bigint.h | 4 +- dsa/base64_dec.c | 246 +++++++++ dsa/base64_dec.h | 29 + dsa/base64_enc.c | 117 ++++ dsa/base64_enc.h | 28 + dsa/bigint.c | 808 +++++++++++++++++++++++++++ dsa/bigint.h | 68 +++ dsa/bigint_add_u.S | 137 +++++ dsa/bigint_io.c | 135 +++++ dsa/bigint_io.h | 28 + dsa/dsa.h | 59 ++ dsa/dsa_key_blob.c | 304 ++++++++++ dsa/dsa_key_blob.h | 25 + dsa/dsa_sign.c | 73 +++ dsa/dsa_verify.c | 75 +++ dsa/memxor.S | 66 +++ dsa/memxor.h | 7 + dsa/noekeon.h | 85 +++ dsa/noekeon_asm.S | 641 +++++++++++++++++++++ dsa/noekeon_prng.c | 75 +++ dsa/noekeon_prng.h | 40 ++ dsa/sha1-asm.S | 883 +++++++++++++++++++++++++++++ dsa/sha1.h | 117 ++++ dsa/sha256-asm.S | 1040 +++++++++++++++++++++++++++++++++++ dsa/sha256.h | 122 ++++ hfal-performance.c | 85 +++ host/get_performance.rb | 29 +- host/performance2wiki.rb | 20 +- mkfiles/001_hfal_std.mk | 3 +- mkfiles/bigint.mk | 4 +- mkfiles/dsa.mk | 13 + mkfiles/shabal.mk | 4 +- mkfiles/shabal_c.mk | 4 +- stack_measuring.S | 96 ++++ stack_measuring.h | 36 ++ test_src/main-bigint-test.c | 6 +- test_src/main-bmw-test.c | 26 +- test_src/main-dsa-test.c | 191 +++++++ 44 files changed, 5765 insertions(+), 90 deletions(-) create mode 100644 dsa/base64_dec.c create mode 100644 dsa/base64_dec.h create mode 100644 dsa/base64_enc.c create mode 100644 dsa/base64_enc.h create mode 100644 dsa/bigint.c create mode 100644 dsa/bigint.h create mode 100644 dsa/bigint_add_u.S create mode 100644 dsa/bigint_io.c create mode 100644 dsa/bigint_io.h create mode 100644 dsa/dsa.h create mode 100644 dsa/dsa_key_blob.c create mode 100644 dsa/dsa_key_blob.h create mode 100644 dsa/dsa_sign.c create mode 100644 dsa/dsa_verify.c create mode 100644 dsa/memxor.S create mode 100644 dsa/memxor.h create mode 100644 dsa/noekeon.h create mode 100644 dsa/noekeon_asm.S create mode 100644 dsa/noekeon_prng.c create mode 100644 dsa/noekeon_prng.h create mode 100644 dsa/sha1-asm.S create mode 100644 dsa/sha1.h create mode 100644 dsa/sha256-asm.S create mode 100644 dsa/sha256.h create mode 100644 mkfiles/dsa.mk create mode 100644 stack_measuring.S create mode 100644 stack_measuring.h create mode 100644 test_src/main-dsa-test.c diff --git a/Makefile b/Makefile index 561f84e..97c56d5 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ HASHES := MACS := PRNGS := ENCODINGS := +SIGNATURE := +PK_CIPHERS := AUX := # we use the gnu make standard library @@ -20,7 +22,7 @@ include mkfiles/*.mk #------------------------------------------------------------------------------- ALGORITHMS = $(BLOCK_CIPHERS) $(STREAM_CIPHERS) $(HASHES) $(PRNGS) $(MACS) \ - $(ENCODINGS) $(AUX) + $(ENCODINGS) $(SIGNATURE) $(PK_CIPHERS) $(AUX) ALGORITHMS_OBJ = $(patsubst %,%_OBJ, $(ALGORITHMS)) ALGORITHMS_TEST_BIN = $(patsubst %,%_TEST_BIN, $(ALGORITHMS)) @@ -250,8 +252,14 @@ info: @echo " $(MACS)" @echo " PRNG functions:" @echo " $(PRNGS)" + @echo " signature functions:" + @echo " $(SIGNATURE)" + @echo " public key ciphers:" + @echo " $(PK_CIPHERS)" @echo " encodings:" @echo " $(ENCODINGS)" + @echo " auxiliary functions:" + @echo " $(AUX)" @echo " targets:" @echo " all - all algorithm cores" @echo " cores - all algorithm cores" @@ -264,6 +272,8 @@ info: @echo " macs - all MAC cores" @echo " prngs - all PRNG cores" @echo " all_testrun - testrun all algorithms" + @echo " hash_size - measure size of all hash functions" + @echo " hash_speed - measure performance of all hash functions" @echo " docu - build doxygen documentation" @echo " clean - remove a lot of builded files" @echo " depclean - also remove dependency files" diff --git a/base64/base64_dec.c b/base64/base64_dec.c index f057f54..322cec0 100644 --- a/base64/base64_dec.c +++ b/base64/base64_dec.c @@ -187,7 +187,7 @@ int base64_binlength(char* str, uint8_t strict){ */ -int base64dec(void* dest, char* b64str, uint8_t strict){ +int base64dec(void* dest, const char* b64str, uint8_t strict){ uint8_t buffer[4]; uint8_t idx=0; uint8_t term=0; diff --git a/base64/base64_dec.h b/base64/base64_dec.h index 39beff8..1c9f1d9 100644 --- a/base64/base64_dec.h +++ b/base64/base64_dec.h @@ -24,6 +24,6 @@ #include int base64_binlength(char* str, uint8_t strict); -int base64dec(void* dest, char* b64str, uint8_t strict); +int base64dec(void* dest, const char* b64str, uint8_t strict); #endif /*BASE64_DEC_H_*/ diff --git a/base64/base64_enc.c b/base64/base64_enc.c index 400f25c..c588998 100644 --- a/base64/base64_enc.c +++ b/base64/base64_enc.c @@ -75,7 +75,7 @@ char bit6toAscii(uint8_t a){ #endif -void base64enc(char* dest, void* src, uint16_t length){ +void base64enc(char* dest,const void* src, uint16_t length){ uint16_t i,j; uint8_t a[4]; for(i=0; i -void base64enc(char* dest, void* src, uint16_t length); +void base64enc(char* dest, const void* src, uint16_t length); #endif /*BASE64_ENC_H_*/ diff --git a/bigint/bigint.c b/bigint/bigint.c index f57d285..7004e0d 100644 --- a/bigint/bigint.c +++ b/bigint/bigint.c @@ -26,9 +26,16 @@ */ +#define STRING2(x) #x +#define STRING(x) STRING2(x) +#define STR_LINE STRING(__LINE__) + #include "bigint.h" #include - +/* +#include "cli.h" +#include "bigint_io.h" +*/ #ifndef MAX #define MAX(a,b) (((a)>(b))?(a):(b)) #endif @@ -388,6 +395,10 @@ void bigint_set_zero(bigint_t* a){ /* using the Karatsuba-Algorithm */ /* x*y = (xh*yh)*b**2n + ((xh+xl)*(yh+yl) - xh*yh - xl*yl)*b**n + yh*yl */ void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + if(a->length_B==0 || b->length_B==0){ + bigint_set_zero(dest); + return; + } if(dest==a || dest==b){ bigint_t d; uint8_t d_b[a->length_B+b->length_B]; @@ -396,10 +407,6 @@ void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){ bigint_copy(dest, &d); return; } - if(a->length_B==0 || b->length_B==0){ - bigint_set_zero(dest); - return; - } if(a->length_B==1 || b->length_B==1){ if(a->length_B!=1){ XCHG_PTR(a,b); @@ -575,7 +582,6 @@ void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale){ } while(borrow){ if(i+1 > a->length_B){ - cli_hexdump_rev(&bitscale, 2); bigint_set_zero(a); return; } @@ -591,19 +597,18 @@ void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale){ /******************************************************************************/ void bigint_reduce(bigint_t* a, const bigint_t* r){ +// bigint_adjust(r); uint8_t rfbs = GET_FBS(r); - if(r->length_B==0){ + if(r->length_B==0 || a->length_B==0){ return; } while(a->length_B > r->length_B){ bigint_sub_u_bitscale(a, r, (a->length_B-r->length_B)*8+GET_FBS(a)-rfbs-1); } - while((GET_FBS(a) > rfbs+1) && (a->length_B == r->length_B)){ bigint_sub_u_bitscale(a, r, GET_FBS(a)-rfbs-1); } - while(bigint_cmp_u(a,r)>=0){ bigint_sub_u(a,a,r); } @@ -615,45 +620,46 @@ void bigint_reduce(bigint_t* a, const bigint_t* r){ /* calculate dest = a**exp % r */ /* using square&multiply */ void bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r){ - bigint_t tmp, tmp2, x; - uint8_t x_b[MAX(r->length_B, a->length_B)], tmp_b[r->length_B*2], tmp2_b[r->length_B*2]; - int16_t i; - uint8_t j; - x.wordv = x_b; - tmp.wordv = tmp_b; - tmp2.wordv = tmp2_b; - bigint_copy(&x, a); - bigint_reduce(&x, r); - bigint_copy(&tmp, &x); - if(a->length_B==0 || exp->length_B==0 || r->length_B==0){ + if(a->length_B==0 || r->length_B==0){ return; } - i=exp->length_B-1; - if(exp->wordv[i]!=1){ - for(j=1<<(GET_FBS(exp)-1); j>0; j>>=1){ - bigint_square(&tmp2, &tmp); - bigint_reduce(&tmp2, r); - if(exp->wordv[i]&j){ - bigint_mul_u(&tmp, &tmp2, &x); - bigint_reduce(&tmp, r); - }else{ - bigint_copy(&tmp, &tmp2); + + bigint_t res, base; + uint8_t base_b[MAX(a->length_B,r->length_B*2)], res_b[r->length_B*2]; + uint16_t i; + uint8_t j, t; + res.wordv = res_b; + base.wordv = base_b; + bigint_copy(&base, a); + bigint_reduce(&base, r); + res.wordv[0]=1; + res.length_B=1; + res.info = 0; + bigint_adjust(&res); + for(i=0; i+1length_B; ++i){ + t=exp->wordv[i]; + for(j=0; j<8; ++j){ + if(t&1){ + bigint_mul_u(&res, &res, &base); + bigint_reduce(&res, r); } + bigint_square(&base, &base); + bigint_reduce(&base, r); + t>>=1; } } - for(--i; i>=0; --i){ - for(j=0x80; j>0; j>>=1){ - bigint_square(&tmp2, &tmp); - bigint_reduce(&tmp2, r); - if(exp->wordv[i]&j){ - bigint_mul_u(&tmp, &tmp2, &x); - bigint_reduce(&tmp, r); - }else{ - bigint_copy(&tmp, &tmp2); - } + t=exp->wordv[i]; + while(t){ + if(t&1){ + bigint_mul_u(&res, &res, &base); + bigint_reduce(&res, r); } + bigint_square(&base, &base); + bigint_reduce(&base, r); + t>>=1; } - bigint_copy(dest, &tmp); + SET_POS(&res); + bigint_copy(dest, &res); } /******************************************************************************/ @@ -755,14 +761,28 @@ void bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, c /******************************************************************************/ -void bigint_inverse(bigint_t* dest, bigint_t* a, bigint_t* m){ +void bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m){ bigint_gcdext(NULL, dest, NULL, a, m); while(dest->info&BIGINT_NEG_MASK){ bigint_add_s(dest, dest, m); } } - +/******************************************************************************/ + +void bigint_changeendianess(bigint_t* a){ + uint8_t t, *p, *q; + p = a->wordv; + q = p+a->length_B-1; + while(p. + */ + + +/** + * base64 decoder (RFC3548) + * Author: Daniel Otte + * License: GPLv3 + * + * + */ + +#include +#include "base64_dec.h" + +#include "test_src/cli.h" + +/* + #define USE_GCC_EXTENSION +*/ +#if 1 + +#ifdef USE_GCC_EXTENSION + +static +int ascii2bit6(char a){ + switch(a){ + case 'A'...'Z': + return a-'A'; + case 'a'...'z': + return a-'a'+26; + case '0'...'9': + return a-'0'+52; + case '+': + case '-': + return 62; + case '/': + case '_': + return 63; + default: + return -1; + } +} + +#else + +static +uint8_t ascii2bit6(char a){ + int r; + switch(a>>4){ + case 0x5: + case 0x4: + r=a-'A'; + if(r<0 || r>25){ + return -1; + } else { + return r; + } + case 0x7: + case 0x6: + r=a-'a'; + if(r<0 || r>25){ + return -1; + } else { + return r+26; + } + break; + case 0x3: + if(a>'9') + return -1; + return a-'0'+52; + default: + break; + } + switch (a){ + case '+': + case '-': + return 62; + case '/': + case '_': + return 63; + default: + return 0xff; + } +} + +#endif + +#else + +static +uint8_t ascii2bit6(uint8_t a){ + if(a>='A' && a<='Z'){ + return a-'A'; + } else { + if(a>='a' && a<= 'z'){ + return a-'a'+26; + } else { + if(a>='0' && a<='9'){ + return a-'0'+52; + } else { + if(a=='+' || a=='-'){ + return 62; + } else { + if(a=='/' || a=='_'){ + return 63; + } else { + return 0xff; + } + } + } + } + } +} + +#endif + +int base64_binlength(char* str, uint8_t strict){ + int l=0; + uint8_t term=0; + for(;;){ + if(*str=='\0') + break; + if(*str=='\n' || *str=='\r'){ + str++; + continue; + } + if(*str=='='){ + term++; + str++; + if(term==2){ + break; + } + continue; + } + if(term) + return -1; + if(ascii2bit6(*str)==-1){ + if(strict) + return -1; + } else { + l++; + } + str++; + } + switch(term){ + case 0: + if(l%4!=0) + return -1; + return l/4*3; + case 1: + if(l%4!=3) + return -1; + return (l+1)/4*3-1; + case 2: + if(l%4!=2) + return -1; + return (l+2)/4*3-2; + default: + return -1; + } +} + +/* + |543210543210543210543210| + |765432107654321076543210| + + . . . . + |54321054|32105432|10543210| + |76543210|76543210|76543210| + +*/ + +int base64dec(void* dest, const char* b64str, uint8_t strict){ + uint8_t buffer[4]; + uint8_t idx=0; + uint8_t term=0; + for(;;){ +// cli_putstr_P(PSTR("\r\n DBG: got 0x")); +// cli_hexdump(b64str, 1); + buffer[idx]= ascii2bit6(*b64str); +// cli_putstr_P(PSTR(" --> 0x")); +// cli_hexdump(buffer+idx, 1); + + if(buffer[idx]==0xFF){ + if(*b64str=='='){ + term++; + b64str++; + if(term==2) + goto finalize; /* definitly the end */ + }else{ + if(*b64str == '\0'){ + goto finalize; /* definitly the end */ + }else{ + if(*b64str == '\r' || *b64str == '\n' || !(strict)){ + b64str++; /* charcters that we simply ignore */ + }else{ + return -1; + } + } + } + }else{ + if(term) + return -1; /* this happens if we get a '=' in the stream */ + idx++; + b64str++; + } + if(idx==4){ + ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4; + ((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2; + ((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3]; + dest = (uint8_t*)dest +3; + idx=0; + } + } + finalize: + /* the final touch */ + if(idx==0) + return 0; + if(term==1){ + ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4; + ((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2; + return 0; + } + if(term==2){ + ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4; + return 0; + } + return -1; +} diff --git a/dsa/base64_dec.h b/dsa/base64_dec.h new file mode 100644 index 0000000..1c9f1d9 --- /dev/null +++ b/dsa/base64_dec.h @@ -0,0 +1,29 @@ +/* base64_dec.h */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef BASE64_DEC_H_ +#define BASE64_DEC_H_ + +#include + +int base64_binlength(char* str, uint8_t strict); +int base64dec(void* dest, const char* b64str, uint8_t strict); + +#endif /*BASE64_DEC_H_*/ diff --git a/dsa/base64_enc.c b/dsa/base64_enc.c new file mode 100644 index 0000000..c588998 --- /dev/null +++ b/dsa/base64_enc.c @@ -0,0 +1,117 @@ +/* base64_enc.c */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +/** + * base64 encoder (RFC3548) + * Author: Daniel Otte + * License: GPLv3 + * + * + */ + +#include +#include "base64_enc.h" + +#if 1 +#include + +char base64_alphabet[64] PROGMEM = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/' }; + +static +char bit6toAscii(uint8_t a){ + a &= (uint8_t)0x3F; + return pgm_read_byte(base64_alphabet+a); +} + +#else + +static +char bit6toAscii(uint8_t a){ + a &= (uint8_t)0x3F; + + if(a<=25){ + return a+'A'; + } else { + if(a<=51){ + return a-26+'a'; + } else { + if(a<=61){ + return a-52+'0'; + } else { + if(a==62){ + return '+'; + } else { + return '/'; /* a == 63 */ + } + } + } + } +} + +#endif + +void base64enc(char* dest,const void* src, uint16_t length){ + uint16_t i,j; + uint8_t a[4]; + for(i=0; i>2; + a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F; + a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F; + a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F; + for(j=0; j<4; ++j){ + *dest++=bit6toAscii(a[j]); + } + } + /* now we do the rest */ + switch(length%3){ + case 0: + break; + case 1: + a[0]=(((uint8_t*)src)[i*3+0])>>2; + a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F; + *dest++ = bit6toAscii(a[0]); + *dest++ = bit6toAscii(a[1]); + *dest++ = '='; + *dest++ = '='; + break; + case 2: + a[0]= (((uint8_t*)src)[i*3+0])>>2; + a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F; + a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F; + *dest++ = bit6toAscii(a[0]); + *dest++ = bit6toAscii(a[1]); + *dest++ = bit6toAscii(a[2]); + *dest++ = '='; + break; + default: /* this will not happen! */ + break; + } +/* finalize: */ + *dest='\0'; +} + diff --git a/dsa/base64_enc.h b/dsa/base64_enc.h new file mode 100644 index 0000000..89a5f86 --- /dev/null +++ b/dsa/base64_enc.h @@ -0,0 +1,28 @@ +/* base64_enc.h */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef BASE64_ENC_H_ +#define BASE64_ENC_H_ + +#include + +void base64enc(char* dest, const void* src, uint16_t length); + +#endif /*BASE64_ENC_H_*/ diff --git a/dsa/bigint.c b/dsa/bigint.c new file mode 100644 index 0000000..7004e0d --- /dev/null +++ b/dsa/bigint.c @@ -0,0 +1,808 @@ +/* bigint.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file bigint.c + * \author Daniel Otte + * \date 2010-02-22 + * + * \license GPLv3 or later + * + */ + + +#define STRING2(x) #x +#define STRING(x) STRING2(x) +#define STR_LINE STRING(__LINE__) + +#include "bigint.h" +#include +/* +#include "cli.h" +#include "bigint_io.h" +*/ +#ifndef MAX + #define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + +#ifndef MIN + #define MIN(a,b) (((a)<(b))?(a):(b)) +#endif + +#define SET_FBS(a, v) do{(a)->info &=0xF8; (a)->info |= (v);}while(0) +#define GET_FBS(a) ((a)->info&BIGINT_FBS_MASK) +#define SET_NEG(a) (a)->info |= BIGINT_NEG_MASK +#define SET_POS(a) (a)->info &= ~BIGINT_NEG_MASK +#define XCHG(a,b) do{(a)^=(b); (b)^=(a); (a)^=(b);}while(0) +#define XCHG_PTR(a,b) do{ a = (void*)(((uint16_t)(a)) ^ ((uint16_t)(b))); \ + b = (void*)(((uint16_t)(a)) ^ ((uint16_t)(b))); \ + a = (void*)(((uint16_t)(a)) ^ ((uint16_t)(b)));}while(0) + +#define GET_SIGN(a) ((a)->info&BIGINT_NEG_MASK) + +/******************************************************************************/ +void bigint_adjust(bigint_t* a){ + while(a->length_B!=0 && a->wordv[a->length_B-1]==0){ + a->length_B--; + } + if(a->length_B==0){ + a->info=0; + return; + } + uint8_t t; + uint8_t i = 0x07; + t = a->wordv[a->length_B-1]; + while((t&0x80)==0 && i){ + t<<=1; + i--; + } + SET_FBS(a, i); +} + +/******************************************************************************/ + +void bigint_copy(bigint_t* dest, const bigint_t* src){ + memcpy(dest->wordv, src->wordv, src->length_B); + dest->length_B = src->length_B; + dest->info = src->info; +} + +/******************************************************************************/ + +/* this should be implemented in assembly */ +/* +void bigint_add_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + uint16_t t=0, i; + if(a->length_B < b->length_B){ + XCHG_PTR(a,b); + } + for(i=0; ilength_B; ++i){ + t = a->wordv[i] + b->wordv[i] + t; + dest->wordv[i] = (uint8_t)t; + t>>=8; + } + for(; ilength_B; ++i){ + t = a->wordv[i] + t; + dest->wordv[i] = (uint8_t)t; + t>>=8; + } + dest->wordv[i++] = t; + dest->length_B = i; + bigint_adjust(dest); +} +*/ +/******************************************************************************/ + +/* this should be implemented in assembly */ +void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale){ + uint16_t i,j=0; + uint16_t t=0; + if(scale>dest->length_B) + memset(dest->wordv+dest->length_B, 0, scale-dest->length_B); + for(i=scale; ilength_B+scale; ++i,++j){ + t = a->wordv[j] + t; + if(dest->length_B>i){ + t += dest->wordv[i]; + } + dest->wordv[i] = (uint8_t)t; + t>>=8; + } + while(t){ + if(dest->length_B>i){ + t = dest->wordv[i] + t; + } + dest->wordv[i] = (uint8_t)t; + t>>=8; + ++i; + } + if(dest->length_B < i){ + dest->length_B = i; + } + bigint_adjust(dest); +} + +/******************************************************************************/ + +/* this should be implemented in assembly */ +void bigint_sub_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + int8_t borrow=0; + int8_t r; + int16_t t; + uint16_t i, min, max; + min = MIN(a->length_B, b->length_B); + max = MAX(a->length_B, b->length_B); + r = bigint_cmp_u(a,b); + if(r==0){ + dest->length_B = 0; + dest->wordv[0] = 0; + bigint_adjust(dest); + return; + } + if(b->length_B==0){ + dest->length_B = a->length_B; + memcpy(dest->wordv, a->wordv, a->length_B); + dest->info = a->info; + SET_POS(dest); + return; + } + if(a->length_B==0){ + dest->length_B = b->length_B; + memcpy(dest->wordv, b->wordv, b->length_B); + dest->info = b->info; + SET_NEG(dest); + return; + } + if(r<0){ + bigint_sub_u(dest, b, a); + SET_NEG(dest); + }else{ + for(i=0; iwordv[i] - b->wordv[i] - borrow; + if(t<0){ + borrow = 1; + dest->wordv[i]=(uint8_t)t; + }else{ + borrow = 0; + dest->wordv[i]=(uint8_t)t; + } + } + for(;iwordv[i] - borrow; + if(t<0){ + borrow = 1; + dest->wordv[i]=(uint8_t)t; + }else{ + borrow = 0; + dest->wordv[i]=(uint8_t)t; + } + + } + SET_POS(dest); + dest->length_B = i; + bigint_adjust(dest); + } +} + +/******************************************************************************/ + +int8_t bigint_cmp_u(const bigint_t* a, const bigint_t* b){ + if(a->length_B > b->length_B){ + return 1; + } + if(a->length_B < b->length_B){ + return -1; + } + if(a->length_B==0){ + return 0; + } + uint16_t i; + i = a->length_B-1; + do{ + if(a->wordv[i]!=b->wordv[i]){ + if(a->wordv[i]>b->wordv[i]){ + return 1; + }else{ + return -1; + } + } + }while(i--); + return 0; +} + +/******************************************************************************/ + +void bigint_add_s(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + uint8_t s; + s = GET_SIGN(a)?2:0; + s |= GET_SIGN(b)?1:0; + switch(s){ + case 0: /* both positive */ + bigint_add_u(dest, a,b); + SET_POS(dest); + break; + case 1: /* a positive, b negative */ + bigint_sub_u(dest, a, b); + break; + case 2: /* a negative, b positive */ + bigint_sub_u(dest, b, a); + break; + case 3: /* both negative */ + bigint_add_u(dest, a, b); + SET_NEG(dest); + break; + default: /* how can this happen?*/ + break; + } +} + +/******************************************************************************/ + +void bigint_sub_s(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + uint8_t s; + s = GET_SIGN(a)?2:0; + s |= GET_SIGN(b)?1:0; + switch(s){ + case 0: /* both positive */ + bigint_sub_u(dest, a,b); + break; + case 1: /* a positive, b negative */ + bigint_add_u(dest, a, b); + SET_POS(dest); + break; + case 2: /* a negative, b positive */ + bigint_add_u(dest, a, b); + SET_NEG(dest); + break; + case 3: /* both negative */ + bigint_sub_u(dest, b, a); + break; + default: /* how can this happen?*/ + break; + } + +} + +/******************************************************************************/ + +int8_t bigint_cmp_s(const bigint_t* a, const bigint_t* b){ + uint8_t s; + if(a->length_B==0 && b->length_B==0){ + return 0; + } + s = GET_SIGN(a)?2:0; + s |= GET_SIGN(b)?1:0; + switch(s){ + case 0: /* both positive */ + return bigint_cmp_u(a, b); + break; + case 1: /* a positive, b negative */ + return 1; + break; + case 2: /* a negative, b positive */ + return -1; + break; + case 3: /* both negative */ + return bigint_cmp_u(b, a); + break; + default: /* how can this happen?*/ + break; + } + return 0; /* just to satisfy the compiler */ +} + +/******************************************************************************/ + +void bigint_shiftleft(bigint_t* a, uint16_t shift){ + uint16_t byteshift; + uint16_t i; + uint8_t bitshift; + uint16_t t=0; + byteshift = (shift+3)/8; + bitshift = shift&7; + memmove(a->wordv+byteshift, a->wordv, a->length_B); + memset(a->wordv, 0, byteshift); + if(bitshift!=0){ + if(bitshift<=4){ /* shift to the left */ + for(i=byteshift; ilength_B+byteshift; ++i){ + t |= (a->wordv[i])<wordv[i] = (uint8_t)t; + t >>= 8; + } + a->wordv[i] = (uint8_t)t; + byteshift++; + }else{ /* shift to the right */ + for(i=a->length_B+byteshift-1; i>byteshift-1; --i){ + t |= (a->wordv[i])<<(bitshift); + a->wordv[i] = (uint8_t)(t>>8); + t <<= 8; + } + t |= (a->wordv[i])<<(bitshift); + a->wordv[i] = (uint8_t)(t>>8); + } + } + a->length_B += byteshift; + bigint_adjust(a); +} + +/******************************************************************************/ + +void bigint_shiftright(bigint_t* a, uint16_t shift){ + uint16_t byteshift; + uint16_t i; + uint8_t bitshift; + uint16_t t=0; + byteshift = shift/8; + bitshift = shift&7; + if(byteshift >= a->length_B){ /* we would shift out more than we have */ + bigint_set_zero(a); + return; + } + if(byteshift == a->length_B-1 && bitshift>GET_FBS(a)){ + bigint_set_zero(a); + return; + } + if(byteshift){ + memmove(a->wordv, a->wordv+byteshift, a->length_B-byteshift); + memset(a->wordv+a->length_B-byteshift, 0, byteshift); + } + if(bitshift!=0){ + /* shift to the right */ + for(i=a->length_B-byteshift-1; i>0; --i){ + t |= (a->wordv[i])<<(8-bitshift); + a->wordv[i] = (uint8_t)(t>>8); + t <<= 8; + } + t |= (a->wordv[0])<<(8-bitshift); + a->wordv[0] = (uint8_t)(t>>8); + } + a->length_B -= byteshift; + bigint_adjust(a); +} + +/******************************************************************************/ + +void bigint_xor(bigint_t* dest, const bigint_t* a){ + uint16_t i; + for(i=0; ilength_B; ++i){ + dest->wordv[i] ^= a->wordv[i]; + } + bigint_adjust(dest); +} + +/******************************************************************************/ + +void bigint_set_zero(bigint_t* a){ + a->length_B=0; +} + +/******************************************************************************/ + +/* using the Karatsuba-Algorithm */ +/* x*y = (xh*yh)*b**2n + ((xh+xl)*(yh+yl) - xh*yh - xl*yl)*b**n + yh*yl */ +void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + if(a->length_B==0 || b->length_B==0){ + bigint_set_zero(dest); + return; + } + if(dest==a || dest==b){ + bigint_t d; + uint8_t d_b[a->length_B+b->length_B]; + d.wordv = d_b; + bigint_mul_u(&d, a, b); + bigint_copy(dest, &d); + return; + } + if(a->length_B==1 || b->length_B==1){ + if(a->length_B!=1){ + XCHG_PTR(a,b); + } + uint16_t i, t=0; + uint8_t x = a->wordv[0]; + for(i=0; ilength_B; ++i){ + t += b->wordv[i]*x; + dest->wordv[i] = (uint8_t)t; + t>>=8; + } + dest->wordv[i] = (uint8_t)t; + dest->length_B=i+1; + bigint_adjust(dest); + return; + } + if(a->length_B<=4 && b->length_B<=4){ + uint32_t p=0, q=0; + uint64_t r; + memcpy(&p, a->wordv, a->length_B); + memcpy(&q, b->wordv, b->length_B); + r = (uint64_t)p*(uint64_t)q; + memcpy(dest->wordv, &r, a->length_B+b->length_B); + dest->length_B = a->length_B+b->length_B; + bigint_adjust(dest); + return; + } + bigint_set_zero(dest); + /* split a in xh & xl; split b in yh & yl */ + uint16_t n; + n=(MAX(a->length_B, b->length_B)+1)/2; + bigint_t xl, xh, yl, yh; + xl.wordv = a->wordv; + yl.wordv = b->wordv; + if(a->length_B<=n){ + xh.info=0; + xh.length_B = 0; + xl.length_B = a->length_B; + xl.info = 0; + }else{ + xl.length_B=n; + xl.info = 0; + bigint_adjust(&xl); + xh.wordv = a->wordv+n; + xh.length_B = a->length_B-n; + xh.info = 0; + } + if(b->length_B<=n){ + yh.info=0; + yh.length_B = 0; + yl.length_B = b->length_B; + yl.info = b->info; + }else{ + yl.length_B=n; + yl.info = 0; + bigint_adjust(&yl); + yh.wordv = b->wordv+n; + yh.length_B = b->length_B-n; + yh.info = 0; + } + /* now we have split up a and b */ + uint8_t tmp_b[2*n+2], m_b[2*(n+1)]; + bigint_t tmp, tmp2, m; + tmp.wordv = tmp_b; + tmp2.wordv = tmp_b+n+1; + m.wordv = m_b; + + bigint_mul_u(dest, &xl, &yl); /* dest <= xl*yl */ + bigint_add_u(&tmp2, &xh, &xl); /* tmp2 <= xh+xl */ + bigint_add_u(&tmp, &yh, &yl); /* tmp <= yh+yl */ + bigint_mul_u(&m, &tmp2, &tmp); /* m <= tmp2*tmp */ + bigint_mul_u(&tmp, &xh, &yh); /* h <= xh*yh */ + bigint_sub_u(&m, &m, dest); /* m <= m-dest */ + bigint_sub_u(&m, &m, &tmp); /* m <= m-h */ + bigint_add_scale_u(dest, &m, n); + bigint_add_scale_u(dest, &tmp, 2*n); +} + +/******************************************************************************/ + +void bigint_mul_s(bigint_t* dest, const bigint_t* a, const bigint_t* b){ + uint8_t s; + s = GET_SIGN(a)?2:0; + s |= GET_SIGN(b)?1:0; + switch(s){ + case 0: /* both positive */ + bigint_mul_u(dest, a,b); + SET_POS(dest); + break; + case 1: /* a positive, b negative */ + bigint_mul_u(dest, a,b); + SET_NEG(dest); + break; + case 2: /* a negative, b positive */ + bigint_mul_u(dest, a,b); + SET_NEG(dest); + break; + case 3: /* both negative */ + bigint_mul_u(dest, a,b); + SET_POS(dest); + break; + default: /* how can this happen?*/ + break; + } +} + +/******************************************************************************/ + +/* square */ +/* (xh*b^n+xl)^2 = xh^2*b^2n + 2*xh*xl*b^n + xl^2 */ +void bigint_square(bigint_t* dest, const bigint_t* a){ + if(a->length_B<=4){ + uint64_t r=0; + memcpy(&r, a->wordv, a->length_B); + r = r*r; + memcpy(dest->wordv, &r, 2*a->length_B); + SET_POS(dest); + dest->length_B=2*a->length_B; + bigint_adjust(dest); + return; + } + if(dest==a){ + bigint_t d; + uint8_t d_b[a->length_B*2]; + d.wordv = d_b; + bigint_square(&d, a); + bigint_copy(dest, &d); + return; + } + uint16_t n; + n=(a->length_B+1)/2; + bigint_t xh, xl, tmp; /* x-high, x-low, temp */ + uint8_t buffer[2*n+1]; + xl.wordv = a->wordv; + xl.length_B = n; + xh.wordv = a->wordv+n; + xh.length_B = a->length_B-n; + tmp.wordv = buffer; + bigint_square(dest, &xl); + bigint_square(&tmp, &xh); + bigint_add_scale_u(dest, &tmp, 2*n); + bigint_mul_u(&tmp, &xl, &xh); + bigint_shiftleft(&tmp, 1); + bigint_add_scale_u(dest, &tmp, n); +} + +/******************************************************************************/ + +void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale){ + bigint_t tmp; + uint8_t tmp_b[b->length_B+1]; + uint16_t i,j,byteshift=bitscale/8; + uint8_t borrow=0; + int16_t t; + + if(a->length_B < b->length_B+byteshift){ + bigint_set_zero(a); + return; + } + + tmp.wordv = tmp_b; + bigint_copy(&tmp, b); + bigint_shiftleft(&tmp, bitscale&7); + + for(j=0,i=byteshift; iwordv[i] - tmp.wordv[j] - borrow; + a->wordv[i] = (uint8_t)t; + if(t<0){ + borrow = 1; + }else{ + borrow = 0; + } + } + while(borrow){ + if(i+1 > a->length_B){ + bigint_set_zero(a); + return; + } + a->wordv[i] -= borrow; + if(a->wordv[i]!=0xff){ + borrow=0; + } + ++i; + } + bigint_adjust(a); +} + +/******************************************************************************/ + +void bigint_reduce(bigint_t* a, const bigint_t* r){ +// bigint_adjust(r); + uint8_t rfbs = GET_FBS(r); + + if(r->length_B==0 || a->length_B==0){ + return; + } + while(a->length_B > r->length_B){ + bigint_sub_u_bitscale(a, r, (a->length_B-r->length_B)*8+GET_FBS(a)-rfbs-1); + } + while((GET_FBS(a) > rfbs+1) && (a->length_B == r->length_B)){ + bigint_sub_u_bitscale(a, r, GET_FBS(a)-rfbs-1); + } + while(bigint_cmp_u(a,r)>=0){ + bigint_sub_u(a,a,r); + } + bigint_adjust(a); +} + +/******************************************************************************/ + +/* calculate dest = a**exp % r */ +/* using square&multiply */ +void bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r){ + if(a->length_B==0 || r->length_B==0){ + return; + } + + bigint_t res, base; + uint8_t base_b[MAX(a->length_B,r->length_B*2)], res_b[r->length_B*2]; + uint16_t i; + uint8_t j, t; + res.wordv = res_b; + base.wordv = base_b; + bigint_copy(&base, a); + bigint_reduce(&base, r); + res.wordv[0]=1; + res.length_B=1; + res.info = 0; + bigint_adjust(&res); + for(i=0; i+1length_B; ++i){ + t=exp->wordv[i]; + for(j=0; j<8; ++j){ + if(t&1){ + bigint_mul_u(&res, &res, &base); + bigint_reduce(&res, r); + } + bigint_square(&base, &base); + bigint_reduce(&base, r); + t>>=1; + } + } + t=exp->wordv[i]; + while(t){ + if(t&1){ + bigint_mul_u(&res, &res, &base); + bigint_reduce(&res, r); + } + bigint_square(&base, &base); + bigint_reduce(&base, r); + t>>=1; + } + SET_POS(&res); + bigint_copy(dest, &res); +} + +/******************************************************************************/ +/* gcd <-- gcd(x,y) a*x+b*y=gcd */ +void bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, const bigint_t* y){ + bigint_t g, x_, y_, u, v, a_, b_, c_, d_; + volatile uint16_t i=0; + if(x->length_B==0 || y->length_B==0){ + return; + } + while(x->wordv[i]==0 && y->wordv[i]==0){ + ++i; + } + uint8_t g_b[i+2], x_b[x->length_B-i], y_b[y->length_B-i]; + uint8_t u_b[x->length_B-i], v_b[y->length_B-i]; + uint8_t a_b[y->length_B+2], c_b[y->length_B+2]; + uint8_t b_b[x->length_B+2], d_b[x->length_B+2]; + + g.wordv = g_b; + x_.wordv = x_b; + y_.wordv = y_b; + memset(g_b, 0, i); + g_b[i]=1; + g.length_B = i+1; + g.info=0; + x_.info = y_.info = 0; + x_.length_B = x->length_B-i; + y_.length_B = y->length_B-i; + memcpy(x_.wordv, x->wordv+i, x_.length_B); + memcpy(y_.wordv, y->wordv+i, y_.length_B); + for(i=0; (x_.wordv[0]&(1<=0){ + bigint_sub_u(&u, &u, &v); + bigint_sub_s(&a_, &a_, &c_); + bigint_sub_s(&b_, &b_, &d_); + }else{ + bigint_sub_u(&v, &v, &u); + bigint_sub_s(&c_, &c_, &a_); + bigint_sub_s(&d_, &d_, &b_); + } + }while(u.length_B); + if(gcd){ + bigint_mul_s(gcd, &v, &g); + } + if(a){ + bigint_copy(a, &c_); + } + if(b){ + bigint_copy(b, &d_); + } +} + +/******************************************************************************/ + +void bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m){ + bigint_gcdext(NULL, dest, NULL, a, m); + while(dest->info&BIGINT_NEG_MASK){ + bigint_add_s(dest, dest, m); + } +} + +/******************************************************************************/ + +void bigint_changeendianess(bigint_t* a){ + uint8_t t, *p, *q; + p = a->wordv; + q = p+a->length_B-1; + while(p. +*/ +/** + * \file bigint.h + * \author Daniel Otte + * \date 2010-02-22 + * + * \license GPLv3 or later + * + */ + +#ifndef BIGINT_H_ +#define BIGINT_H_ + +#include + +#define BIGINT_FBS_MASK 0x07 /* the last three bits indicate which is the first bit set */ +#define BIGINT_NEG_MASK 0x80 /* this bit indicates a negative value */ +typedef struct{ + uint16_t length_B; + uint8_t info; + uint8_t *wordv; /* word vector, pointing to the LSB */ +}bigint_t; + + +/******************************************************************************/ + +void bigint_adjust(bigint_t* a); +void bigint_copy(bigint_t* dest, const bigint_t* src); +void bigint_add_u(bigint_t* dest, const bigint_t* a, const bigint_t* b); +void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale); +void bigint_sub_u(bigint_t* dest, const bigint_t* a, const bigint_t* b); +int8_t bigint_cmp_u(const bigint_t * a, const bigint_t * b); +void bigint_add_s(bigint_t* dest, const bigint_t* a, const bigint_t* b); +void bigint_sub_s(bigint_t* dest, const bigint_t* a, const bigint_t* b); +int8_t bigint_cmp_s(const bigint_t* a, const bigint_t* b); +void bigint_shiftleft(bigint_t* a, uint16_t shift); +void bigint_shiftright(bigint_t* a, uint16_t shift); +void bigint_xor(bigint_t* dest, const bigint_t* a); +void bigint_set_zero(bigint_t* a); +void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b); +void bigint_mul_s(bigint_t* dest, const bigint_t* a, const bigint_t* b); +void bigint_square(bigint_t* dest, const bigint_t* a); +void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale); +void bigint_reduce(bigint_t* a, const bigint_t* r); +void bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r); +void bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, const bigint_t* y); +void bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m); +void bigint_changeendianess(bigint_t* a); +/******************************************************************************/ + +#endif /*BIGINT_H_*/ diff --git a/dsa/bigint_add_u.S b/dsa/bigint_add_u.S new file mode 100644 index 0000000..7c34f1a --- /dev/null +++ b/dsa/bigint_add_u.S @@ -0,0 +1,137 @@ +/* bigint_add_u.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * \file bigint_add_u.S + * \email daniel.otte@rub.de + * \author Daniel Otte + * \date 2010-03-01 + * \license GPLv3 or later + * + */ + +#include "avr-asm-macros.S" + +/* + param dest: r24:r25 + param a: r22:r23 + param b: r20:r21 +*/ +LEN_A_0 = 22 +LEN_A_1 = 23 +LEN_B_0 = 20 +LEN_B_1 = 21 + + +.global bigint_add_u +bigint_add_u: + push_range 28, 29 + push_range 24, 25 + movw r26, r24 ; X is our destination pointer + movw r30, r22 ; Z = a + movw r28, r20 ; Y = b + ldd LEN_A_0, Z+0 + ldd LEN_A_1, Z+1 + ldd LEN_B_0, Y+0 + ldd LEN_B_1, Y+1 + cp LEN_A_0, LEN_B_0 + cpc LEN_A_1, LEN_B_1 + brsh 3f + movw r18, LEN_A_0 ; swap length values + movw LEN_A_0, LEN_B_0 + movw LEN_B_0, r18 + movw r18, r30 ; swap pointers + movw r30, r28 + movw r28, r18 +3: ; now a is the longer integer + movw r24, LEN_A_0 + adiw r24, 0 + brne 4f + st X+, r1 ; store length + st X+, r1 + st X+, r1 ; store 0 in info field + rjmp 9f +4: + adiw r24, 1 + st X+, r24 ; store length + st X+, r25 + st X+, r1 ; store 0 in info field + ld r18, X+ + ld r19, X+ + movw r26, r18 + adiw r30, 3 ; adjust pointers to point at wordv + ld r18, Z+ + ld r19, Z+ + movw r30, r18 + adiw r28, 3 + ld r18, Y+ + ld r19, Y+ + movw r28, r18 + + sub LEN_A_0, LEN_B_0 + sbc LEN_A_1, LEN_B_1 + movw r24, LEN_B_0 + clr r0 + adiw r24, 0 + breq 6f + clc +5: + ld r0, Z+ + ld r1, Y+ + adc r0, r1 + st X+, r0 + dec r24 + brne 5b + rol r0 ; store carry bit + tst r25 + breq 6f + dec r25 + dec r24 + ror r0 ; write carry back + rjmp 5b +6: /* the main part is done */ + movw r24, LEN_A_0 + clr r1 + adiw r24, 0 + breq 8f +62: + ror r0 ; write carry back +7: + ld r0, Z+ + adc r0, r1 + st X+, r0 + dec r24 + brne 7b + rol r0 ; store carry bit + tst r25 + breq 8f + dec r25 + dec r24 + rjmp 62b +8: + ror r0 + clr r0 + rol r0 + st X+, r0 +9: + pop_range 24, 25 + pop_range 28, 29 + jmp bigint_adjust + + diff --git a/dsa/bigint_io.c b/dsa/bigint_io.c new file mode 100644 index 0000000..221c61c --- /dev/null +++ b/dsa/bigint_io.c @@ -0,0 +1,135 @@ +/* bigint_io.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "cli.h" +#include "hexdigit_tab.h" +#include "bigint.h" +#include +#include +#include + +void bigint_print_hex(const bigint_t* a){ + if(a->length_B==0){ + cli_putc('0'); + return; + } + if(a->info&BIGINT_NEG_MASK){ + cli_putc('-'); + } +// cli_putc((a->info&BIGINT_NEG_MASK)?'-':'+'); /* print sign */ + if(a->wordv[a->length_B-1]<0x10){ + cli_putc(pgm_read_byte(hexdigit_tab_uc_P+a->wordv[a->length_B-1])); + cli_hexdump_rev(a->wordv, a->length_B-1); + } else { + cli_hexdump_rev(a->wordv, a->length_B); + } +} + +#define BLOCKSIZE 20 + +static uint8_t char2nibble(char c){ + if(c>='0' && c <='9'){ + return c-'0'; + } + c |= 'A'^'a'; /* to lower case */ + if(c>='a' && c <='f'){ + return c-'a'+10; + } + return 0xff; +} + +static uint16_t read_byte(void){ + uint8_t t1, t2; + char c; + c = cli_getc_cecho(); + if(c=='-'){ + return 0x0500; + } + t1 = char2nibble(c); + if(t1 == 0xff){ + return 0x0100; + } + c = cli_getc_cecho(); + t2 = char2nibble(c); + if(t2 == 0xff){ + return 0x0200|t1; + } + return (t1<<4)|t2; +} + +uint8_t bigint_read_hex_echo(bigint_t* a){ + uint16_t allocated=0; + uint8_t shift4=0; + uint16_t t; + a->length_B = 0; + a->wordv = NULL; + a->info = 0; + for(;;){ + if(allocated-a->length_B < 1){ + uint8_t *p; + p = realloc(a->wordv, allocated+=BLOCKSIZE); + if(p==NULL){ + cli_putstr_P(PSTR("\r\nERROR: Out of memory!")); + free(a->wordv); + return 0xff; + } + a->wordv=p; + } + t = read_byte(); + if(a->length_B==0){ + if(t&0x0400){ + /* got minus */ + a->info |= BIGINT_NEG_MASK; + continue; + }else{ + if(t==0x0100){ + free(a->wordv); + a->wordv=NULL; + return 1; + } + } + } + if(t<=0x00ff){ + a->wordv[a->length_B++] = (uint8_t)t; + }else{ + if(t&0x0200){ + shift4 = 1; + a->wordv[a->length_B++] = (uint8_t)((t&0x0f)<<4); + } + break; + } + } + /* we have to reverse the byte array */ + uint8_t tmp; + uint8_t *p, *q; + p = a->wordv; + q = a->wordv+a->length_B-1; + while(q>p){ + tmp = *p; + *p = *q; + *q = tmp; + p++; q--; + } + if(shift4){ + bigint_adjust(a); + bigint_shiftright(a, 4); + } + bigint_adjust(a); + return 0; +} diff --git a/dsa/bigint_io.h b/dsa/bigint_io.h new file mode 100644 index 0000000..e47391d --- /dev/null +++ b/dsa/bigint_io.h @@ -0,0 +1,28 @@ +/* bigint_io.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef BIGINT_IO_H_ +#define BIGINT_IO_H_ + +#include "bigint.h" + +void bigint_print_hex(const bigint_t* a); +uint8_t bigint_read_hex_echo(bigint_t* a); + +#endif /* BIGINT_IO_H_ */ diff --git a/dsa/dsa.h b/dsa/dsa.h new file mode 100644 index 0000000..6a894f6 --- /dev/null +++ b/dsa/dsa.h @@ -0,0 +1,59 @@ +/* dsa.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef DSA_H_ +#define DSA_H_ + +#include +#include "hfal-basic.h" +#include "bigint.h" + +typedef struct{ + bigint_t p; + bigint_t q; + bigint_t g; +} dsa_domainparameters_t; + +typedef bigint_t dsa_pubkey_t; +typedef bigint_t dsa_privkey_t; +typedef struct{ + bigint_t r; + bigint_t s; +} dsa_signature_t; + +typedef struct{ + dsa_privkey_t priv; + dsa_pubkey_t pub; + dsa_domainparameters_t domain; +} dsa_ctx_t; + +#define DSA_SIGNATURE_OK 1 +#define DSA_SIGNATURE_FAIL 0 + +uint8_t dsa_sign_bigint(dsa_signature_t* s, const bigint_t* m, + const dsa_ctx_t* ctx, const bigint_t* k); +uint8_t dsa_sign_message(dsa_signature_t* s, const void* m, uint16_t m_len_b, + const hfdesc_t* hash_desc, const dsa_ctx_t* ctx, + const uint8_t(*rand_in)(void)); +uint8_t dsa_verify_bigint(const dsa_signature_t* s, const bigint_t* m, + const dsa_ctx_t* ctx); +uint8_t dsa_verify_message(const dsa_signature_t* s, const void* m, uint16_t m_len_b, + const hfdesc_t* hash_desc, const dsa_ctx_t* ctx); + +#endif /* DSA_H_ */ diff --git a/dsa/dsa_key_blob.c b/dsa/dsa_key_blob.c new file mode 100644 index 0000000..fe31e53 --- /dev/null +++ b/dsa/dsa_key_blob.c @@ -0,0 +1,304 @@ +/* dsa_key_blob.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include "cli.h" +#include "dsa.h" +#include "bigint.h" + + +#define DSA_KEY_BLOB_SIZE 1024 + +#if DSA_KEY_BLOB_SIZE == 512 + +#define PRIV_LEN_B (20) +#define PUB_LEN_B (8*8) +#define P_LEN_B (8*8) +#define Q_LEN_B (20) +#define G_LEN_B (8*8) +#define ALL_LEN_B (PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B+G_LEN_B) + + +static uint8_t dsa_key_blob[] PROGMEM = { + +// priv: + 0xac, 0xe6, 0xef, 0x99, 0x08, 0xe8, 0x5f, 0xc8, + 0xc7, 0x51, 0x97, 0x99, 0xf4, 0xd3, 0x00, 0x0f, + 0x49, 0x72, 0xee, 0x46, +// pub: + 0x18, 0x02, 0x53, 0x09, 0x61, 0xad, 0x0b, 0x9d, + 0x44, 0x80, 0x8d, 0xb4, 0x52, 0xdc, 0xb5, 0xf2, + 0x11, 0x20, 0x41, 0xc7, 0xd9, 0x7c, 0x7c, 0x6c, + 0xa0, 0x9e, 0xca, 0x0d, 0xff, 0x28, 0x3c, 0x64, + 0xfa, 0x92, 0xbb, 0x2c, 0xe9, 0x9b, 0x10, 0x94, + 0xa5, 0x8d, 0x03, 0x50, 0xa5, 0x59, 0xd4, 0x3f, + 0x57, 0x78, 0x8d, 0xcf, 0x0a, 0x99, 0x5d, 0xa3, + 0x2e, 0x80, 0xfa, 0x99, 0x44, 0x73, 0x6d, 0x9a, +// P: + 0x9f, 0x2d, 0xc8, 0x3c, 0x34, 0xf9, 0xa1, 0xbc, + 0x6a, 0xa7, 0x49, 0x17, 0xd3, 0x82, 0xa2, 0xe2, + 0x25, 0x31, 0xc4, 0x3d, 0x1a, 0x3f, 0x0f, 0x8a, + 0x8b, 0x84, 0x3c, 0x6c, 0x9c, 0xdd, 0x42, 0xd9, + 0x1a, 0x22, 0xf2, 0x55, 0x98, 0xee, 0x7d, 0x98, + 0x9c, 0x9a, 0x91, 0x42, 0x5f, 0x4f, 0xa8, 0x32, + 0xa0, 0xb0, 0x0f, 0x79, 0xe2, 0x76, 0x08, 0x78, + 0x6e, 0xba, 0xf7, 0x74, 0x43, 0x4a, 0xf2, 0xdf, +// Q: + 0xdb, 0x30, 0x69, 0xe6, 0x59, 0x77, 0xee, 0x38, + 0xea, 0xf7, 0xcc, 0x18, 0x83, 0xcf, 0xb4, 0x21, + 0xbc, 0xcf, 0x9a, 0x77, +// G: + 0x73, 0x90, 0x27, 0x68, 0xe7, 0xe9, 0x3a, 0x45, + 0x6f, 0x7f, 0x95, 0xca, 0x9b, 0xfd, 0x33, 0x75, + 0x75, 0xff, 0x0f, 0xe7, 0x69, 0xfd, 0xb7, 0x07, + 0x0f, 0x6c, 0x3a, 0xec, 0x47, 0x82, 0x78, 0xb2, + 0xb3, 0x0b, 0x7f, 0x11, 0x9d, 0x34, 0x3e, 0xff, + 0xb8, 0x09, 0x42, 0x82, 0x81, 0x21, 0xad, 0x2b, + 0x51, 0x20, 0xec, 0x9e, 0xf8, 0x15, 0xaa, 0x3d, + 0x5f, 0x29, 0x2d, 0xb5, 0xc5, 0x64, 0x53, 0x2d +}; + +#endif + +#if DSA_KEY_BLOB_SIZE == 1024 + +#define PRIV_LEN_B (20) +#define PUB_LEN_B (16*8) +#define P_LEN_B (16*8) +#define Q_LEN_B (20) +#define G_LEN_B (16*8) +#define ALL_LEN_B (PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B+G_LEN_B) + +static uint8_t dsa_key_blob[] PROGMEM = { + // priv: + 0x03, 0xad, 0x17, 0x81, 0x0f, 0x70, 0x7f, 0x89, + 0xa2, 0x0a, 0x70, 0x1c, 0x3b, 0x24, 0xff, 0xd2, + 0x39, 0x93, 0xd7, 0x8d, + // pub: + 0x42, 0x1c, 0xb2, 0x03, 0xe5, 0xc6, 0x69, 0x81, + 0x1e, 0x35, 0x85, 0x86, 0xd7, 0x94, 0xd2, 0x1f, + 0x77, 0x05, 0x2f, 0xcc, 0xa5, 0x69, 0x46, 0x8f, + 0xe1, 0x9f, 0x82, 0xf6, 0x24, 0x2c, 0x64, 0x1b, + 0x29, 0x63, 0xd5, 0xb3, 0x32, 0xdc, 0xd9, 0x5a, + 0x4e, 0x92, 0xd9, 0x69, 0xcc, 0x51, 0x81, 0xc2, + 0xa3, 0x7e, 0xd7, 0xf8, 0x72, 0x1f, 0x8d, 0xd4, + 0xe8, 0x59, 0xb0, 0xaa, 0xdd, 0xa0, 0x73, 0xe6, + 0xc4, 0x50, 0x7f, 0x4c, 0x7c, 0xde, 0x35, 0x27, + 0x49, 0x36, 0x23, 0x36, 0xe4, 0x90, 0x54, 0x24, + 0x45, 0x99, 0xa3, 0x10, 0xc3, 0x59, 0x2f, 0x61, + 0xff, 0x75, 0xf0, 0x51, 0x1d, 0xa0, 0x8f, 0x69, + 0xc1, 0x1e, 0x3e, 0x65, 0xaf, 0x82, 0x9e, 0xa9, + 0x91, 0x17, 0x04, 0x7c, 0x56, 0xd1, 0x68, 0x8a, + 0x4b, 0xc9, 0x48, 0x92, 0xaf, 0x72, 0xca, 0xbf, + 0xf2, 0x2b, 0x9e, 0x42, 0x92, 0x46, 0x19, 0x64, + // P: + 0x97, 0x40, 0xda, 0x05, 0x19, 0x77, 0xb7, 0x17, + 0x4b, 0x7d, 0xc0, 0x5b, 0x81, 0xdd, 0xcc, 0x0b, + 0x86, 0xe0, 0x3c, 0x4d, 0xab, 0x3d, 0x43, 0xe4, + 0xe3, 0x5f, 0xf3, 0x56, 0xcd, 0x5c, 0xf2, 0x85, + 0x00, 0x45, 0x3c, 0xba, 0xf0, 0x56, 0xb3, 0x8b, + 0x29, 0xc3, 0x55, 0x7b, 0xb6, 0xfb, 0x68, 0xca, + 0x35, 0xe5, 0x0e, 0x46, 0xd6, 0xff, 0xc9, 0xbd, + 0x08, 0x71, 0x65, 0x3b, 0xf7, 0xab, 0xb1, 0x96, + 0x9b, 0x70, 0xdc, 0x8e, 0xf3, 0x02, 0xa4, 0x0f, + 0xc6, 0xcd, 0x70, 0xe5, 0xeb, 0xd3, 0x07, 0xb5, + 0x7d, 0x40, 0x8c, 0xfd, 0x33, 0x45, 0x8f, 0x9c, + 0x7f, 0xa1, 0x69, 0xcb, 0xe6, 0x73, 0x1d, 0x37, + 0xc7, 0x5f, 0x18, 0x57, 0x38, 0x96, 0x46, 0x24, + 0xad, 0xa6, 0x59, 0x3d, 0x7a, 0x74, 0x6e, 0x88, + 0x57, 0x18, 0x86, 0x7b, 0x07, 0x79, 0x52, 0xdd, + 0xbc, 0xa7, 0x40, 0x88, 0xa6, 0x66, 0x50, 0x49, + // Q: + 0xb4, 0x6d, 0x89, 0x7a, 0x72, 0xdb, 0x8c, 0x92, + 0x60, 0xf9, 0x95, 0x47, 0x81, 0x57, 0xe8, 0x6b, + 0xb4, 0xf9, 0xde, 0x51, + // G: + 0x76, 0x1e, 0x1b, 0xd2, 0x5c, 0x5f, 0x92, 0x96, + 0x42, 0x18, 0xba, 0x8d, 0xe1, 0x24, 0x12, 0x24, + 0x6f, 0x3f, 0xb8, 0x05, 0xf9, 0x72, 0x74, 0xfa, + 0xef, 0xc3, 0x1e, 0xd5, 0xa5, 0x93, 0x28, 0x07, + 0xc0, 0x7b, 0x47, 0xef, 0x15, 0x13, 0x68, 0x18, + 0xfb, 0x0d, 0x69, 0xea, 0xcc, 0x5a, 0x43, 0x08, + 0x75, 0xec, 0xe4, 0x5e, 0x8e, 0xa9, 0x61, 0xe1, + 0xcd, 0x27, 0x8c, 0x55, 0xc9, 0x42, 0x11, 0x11, + 0x7f, 0x20, 0x4d, 0x70, 0x34, 0x49, 0x00, 0x8c, + 0x79, 0x95, 0x79, 0x0b, 0xfd, 0x8d, 0xda, 0xe3, + 0x0c, 0x27, 0x7a, 0x35, 0xe5, 0x35, 0xc9, 0x73, + 0x31, 0xaa, 0xed, 0xbe, 0x81, 0x89, 0x67, 0x06, + 0xf6, 0x97, 0x0d, 0x44, 0x07, 0xac, 0x09, 0xac, + 0x44, 0xf3, 0xc6, 0x8b, 0x30, 0x4c, 0x76, 0x0b, + 0x55, 0x74, 0x10, 0x06, 0xda, 0xd4, 0x3d, 0x96, + 0x7e, 0xc3, 0xf8, 0x22, 0x9c, 0x71, 0x1d, 0x9c +}; +#endif + +#if DSA_KEY_BLOB_2048 + +#define PRIV_LEN_B (20) +#define PUB_LEN_B (32*8) +#define P_LEN_B (32*8) +#define Q_LEN_B (20) +#define G_LEN_B (32*8) +#define ALL_LEN_B (PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B+G_LEN_B) + +static uint8_t dsa_key_blob[] PROGMEM = { +/* priv: */ + 0x1d, 0xe4, 0x81, 0x02, 0x52, 0x6b, 0x2b, 0x0e, + 0x98, 0x08, 0xc8, 0xb9, 0x81, 0x40, 0xd1, 0x1e, + 0x86, 0x69, 0x0d, 0xa9, +/* pub: */ + 0x70, 0xc4, 0x44, 0x28, 0x91, 0x77, 0x2b, 0x09, + 0xde, 0xe8, 0x66, 0x0b, 0xa5, 0xc8, 0x05, 0xb4, + 0x0a, 0x2d, 0x4f, 0x45, 0x8e, 0x0c, 0x8c, 0x38, + 0x61, 0xf3, 0x77, 0x05, 0x64, 0xf7, 0xe6, 0xe9, + 0x0b, 0x1f, 0x9b, 0x9f, 0x1f, 0xa1, 0x7e, 0x8f, + 0x5b, 0x14, 0x70, 0x1d, 0x4d, 0x1c, 0xdc, 0x9d, + 0xe0, 0x0a, 0xc4, 0x7b, 0x70, 0xfd, 0xef, 0xe6, + 0x20, 0x2d, 0x17, 0x13, 0xd7, 0x1c, 0xc0, 0xbb, + 0x5b, 0xce, 0x84, 0x6a, 0xa5, 0x4e, 0x27, 0x1c, + 0x9e, 0xaa, 0xb2, 0xdc, 0xc1, 0xec, 0x74, 0x93, + 0x67, 0xdb, 0xe1, 0xaa, 0x5a, 0x86, 0x1d, 0x8a, + 0xa9, 0x28, 0x7e, 0xfc, 0xd5, 0x72, 0x94, 0x6c, + 0x1d, 0x71, 0x85, 0x92, 0xa7, 0x6e, 0x84, 0x4f, + 0x27, 0xf3, 0x7e, 0x04, 0x7d, 0xf2, 0x7c, 0x07, + 0xa0, 0x7d, 0x02, 0x7c, 0x30, 0x70, 0xb5, 0x87, + 0xc3, 0xf0, 0xc2, 0x0c, 0xdb, 0x26, 0x72, 0x33, + 0x20, 0xca, 0xf0, 0x8b, 0x05, 0x20, 0x70, 0x98, + 0x65, 0x03, 0xd7, 0xd4, 0x47, 0xf0, 0xb2, 0x6e, + 0x2a, 0xbe, 0xcc, 0x83, 0x0d, 0xab, 0x60, 0x61, + 0x26, 0x7b, 0xaf, 0xae, 0x18, 0x9e, 0x20, 0xeb, + 0x12, 0x31, 0x18, 0x2e, 0x73, 0xca, 0xd4, 0x5e, + 0x66, 0x74, 0x61, 0x07, 0x9b, 0x20, 0x68, 0x12, + 0x88, 0xb1, 0xc5, 0x0f, 0x85, 0x9b, 0x45, 0x40, + 0x7d, 0x76, 0x62, 0x73, 0xba, 0x41, 0x7b, 0xaf, + 0xc7, 0xb9, 0x19, 0x7a, 0xd0, 0x55, 0xe6, 0xfd, + 0xb5, 0xb9, 0xc4, 0x1b, 0x22, 0x47, 0x8f, 0x7b, + 0xd7, 0x75, 0xe8, 0x7f, 0x01, 0xa2, 0x9b, 0x79, + 0xde, 0xea, 0x55, 0x3c, 0x61, 0x4d, 0xcd, 0xce, + 0x89, 0x8c, 0x76, 0x62, 0x12, 0x4d, 0xd4, 0x47, + 0x03, 0x0e, 0xe8, 0xe2, 0xb8, 0xda, 0xca, 0x20, + 0xb3, 0x64, 0xb6, 0x07, 0x06, 0x1b, 0xcb, 0x91, + 0x51, 0x2c, 0x2e, 0xfa, 0xe1, 0xee, 0x1e, 0x78, +/* P: */ + 0x8d, 0x09, 0x00, 0x56, 0x63, 0x39, 0x42, 0x8d, + 0x15, 0xd5, 0x1d, 0x86, 0x10, 0xde, 0xc7, 0xf4, + 0x07, 0xe5, 0x81, 0xbe, 0x67, 0xee, 0xc5, 0x33, + 0xd3, 0x41, 0x1b, 0xba, 0xd8, 0xa6, 0x61, 0x49, + 0x2d, 0x66, 0xcf, 0x60, 0x9f, 0x52, 0x60, 0x6e, + 0x0a, 0x16, 0xdc, 0x0b, 0x24, 0x1b, 0x62, 0x32, + 0xc4, 0xab, 0x52, 0x17, 0xbf, 0xc5, 0xa2, 0x2a, + 0xa4, 0x5e, 0x8c, 0xff, 0x97, 0x45, 0x51, 0xd9, + 0xc3, 0xf2, 0x32, 0x4a, 0xb9, 0x08, 0xc1, 0x6a, + 0x7b, 0x82, 0x93, 0x2a, 0x60, 0x29, 0x55, 0x1a, + 0x36, 0x1f, 0x05, 0x4f, 0xf1, 0x43, 0x12, 0xb2, + 0x73, 0x4e, 0xf6, 0x37, 0x65, 0x3d, 0x0b, 0x70, + 0x08, 0xc7, 0x34, 0x0b, 0x4d, 0xc9, 0x08, 0x70, + 0xaf, 0x4b, 0x95, 0x0b, 0x7c, 0x9f, 0xcf, 0xfc, + 0x57, 0x94, 0x47, 0x6d, 0xd1, 0xaf, 0xc6, 0x52, + 0xd9, 0xe2, 0x05, 0xce, 0xb2, 0xb8, 0x91, 0x6f, + 0x5a, 0x77, 0x6b, 0x1b, 0xff, 0x97, 0x8c, 0x5e, + 0x33, 0xfc, 0x80, 0x29, 0xdf, 0x83, 0x91, 0x0c, + 0x28, 0x1b, 0x00, 0xb4, 0xc9, 0x3e, 0xb7, 0x67, + 0xca, 0xab, 0x63, 0xd4, 0x48, 0xfe, 0xd2, 0xfd, + 0x65, 0x57, 0x33, 0x25, 0xbd, 0xf1, 0xa5, 0x51, + 0x51, 0x50, 0xf6, 0xcf, 0xfa, 0x0d, 0x67, 0x4e, + 0x90, 0x08, 0x87, 0x34, 0xf6, 0x33, 0xc9, 0x58, + 0xb1, 0x87, 0xf8, 0x5d, 0x73, 0x80, 0xde, 0x51, + 0xcd, 0x17, 0x70, 0x3e, 0xa4, 0xa8, 0x4f, 0xda, + 0xcd, 0xa2, 0x66, 0x0f, 0x95, 0xa7, 0xc6, 0xf7, + 0x12, 0x2e, 0x27, 0x94, 0xa9, 0x26, 0x1b, 0x25, + 0x16, 0x18, 0x99, 0x3b, 0x32, 0xaf, 0x71, 0x13, + 0x35, 0xda, 0xb8, 0x71, 0x5b, 0x50, 0x7c, 0x7a, + 0x9d, 0xcc, 0x0d, 0x95, 0xef, 0x6f, 0x64, 0x3c, + 0x28, 0x4b, 0x15, 0xe9, 0xd4, 0xad, 0xcc, 0x56, + 0xcb, 0x24, 0xf9, 0x61, 0x79, 0xd7, 0x56, 0xd3, +/* Q: */ + 0xf7, 0xdf, 0x85, 0xf5, 0x63, 0x36, 0x63, 0x71, + 0x74, 0x34, 0x98, 0x19, 0xff, 0x79, 0xf2, 0xe2, + 0x15, 0x75, 0x3c, 0x95, +/* G: */ + 0x0c, 0xf6, 0x8b, 0x1a, 0xbe, 0x66, 0x84, 0x98, + 0xae, 0xcb, 0xb0, 0xd9, 0x75, 0x75, 0x32, 0x4b, + 0xa3, 0xf2, 0x28, 0xa6, 0x6d, 0x13, 0xf2, 0xf3, + 0xfd, 0x93, 0x91, 0xb1, 0x21, 0x1e, 0xcc, 0x08, + 0x87, 0xce, 0x74, 0xb1, 0xd0, 0x19, 0x50, 0xff, + 0xac, 0xef, 0x9f, 0x82, 0xda, 0x75, 0xda, 0x6d, + 0x89, 0xf3, 0x0b, 0xdc, 0x27, 0x98, 0x85, 0x01, + 0x68, 0xb7, 0xbd, 0x98, 0x83, 0xb1, 0xb0, 0x65, + 0x31, 0x71, 0x43, 0x05, 0xa7, 0x76, 0x63, 0xe4, + 0x7d, 0x61, 0x53, 0xc7, 0x3e, 0x3b, 0x82, 0x28, + 0x65, 0x07, 0xfe, 0x9e, 0xa3, 0x35, 0x2c, 0xdc, + 0x9e, 0x05, 0x7c, 0x9a, 0x69, 0xc6, 0x9f, 0xc2, + 0x3f, 0x94, 0x6b, 0xad, 0xa4, 0x2b, 0x5d, 0x48, + 0x9e, 0x2c, 0xad, 0xd2, 0x89, 0x49, 0xdc, 0xdb, + 0x55, 0x49, 0x56, 0xaf, 0xe9, 0x0e, 0x37, 0xe7, + 0x1f, 0x42, 0x6a, 0x7c, 0xac, 0xe8, 0x1b, 0xbb, + 0x21, 0x82, 0x14, 0x72, 0x17, 0x64, 0xf0, 0x3c, + 0x3d, 0xc1, 0x43, 0x27, 0x27, 0x9f, 0xe9, 0x21, + 0xf2, 0x2f, 0xf7, 0xfa, 0x3c, 0xed, 0xbf, 0xab, + 0xab, 0xb7, 0x3c, 0x6d, 0x1e, 0x85, 0x9f, 0x77, + 0x4f, 0x69, 0x09, 0x4e, 0xed, 0x13, 0x84, 0x40, + 0x1a, 0xc6, 0xa1, 0xd9, 0x68, 0xb6, 0x18, 0x32, + 0x79, 0x25, 0x9e, 0xa6, 0x41, 0x30, 0xd1, 0xc2, + 0x7a, 0x8f, 0x0d, 0x46, 0xee, 0xda, 0xb0, 0xbf, + 0x64, 0x42, 0x59, 0x7e, 0x22, 0x88, 0xd6, 0x52, + 0xec, 0xed, 0xc4, 0x13, 0xb1, 0x7f, 0x5c, 0x77, + 0x4c, 0xfd, 0x22, 0x90, 0xd3, 0xe3, 0xa9, 0xc1, + 0x0f, 0x25, 0xac, 0xd5, 0x04, 0x84, 0xe6, 0xa8, + 0xc7, 0xb4, 0x4f, 0xa2, 0x67, 0xae, 0xaa, 0x92, + 0xe9, 0x0a, 0xed, 0x45, 0x5b, 0xf0, 0x1b, 0x69, + 0xec, 0xaf, 0x7d, 0xf2, 0x71, 0x25, 0xbf, 0x92, + 0xd4, 0xd0, 0x5b, 0xde, 0x5a, 0x2d, 0x18, 0x8e +}; +#endif + +void load_dsa_key_blob(dsa_ctx_t* ctx){ + if(ctx->priv.wordv){ + free(ctx->priv.wordv); + } + ctx->priv.wordv = malloc(ALL_LEN_B); + if(ctx->priv.wordv==NULL){ + cli_putstr_P(PSTR("\r\nERROR: OUT OF MEMORY!!!")); + return; + } + memcpy_P(ctx->priv.wordv, dsa_key_blob, ALL_LEN_B); + ctx->priv.length_B=PRIV_LEN_B; + ctx->pub.wordv = ctx->priv.wordv+PRIV_LEN_B; + ctx->pub.length_B = PUB_LEN_B; + ctx->domain.p.wordv = ctx->priv.wordv+PRIV_LEN_B+PUB_LEN_B; + ctx->domain.p.length_B = P_LEN_B; + ctx->domain.q.wordv = ctx->priv.wordv+PRIV_LEN_B+PUB_LEN_B+P_LEN_B; + ctx->domain.q.length_B = Q_LEN_B; + ctx->domain.g.wordv = ctx->priv.wordv+PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B; + ctx->domain.g.length_B = G_LEN_B; + + bigint_changeendianess(&(ctx->priv)); + bigint_changeendianess(&(ctx->pub)); + bigint_changeendianess(&(ctx->domain.p)); + bigint_changeendianess(&(ctx->domain.q)); + bigint_changeendianess(&(ctx->domain.g)); + + bigint_adjust(&(ctx->priv)); + bigint_adjust(&(ctx->pub)); + bigint_adjust(&(ctx->domain.p)); + bigint_adjust(&(ctx->domain.q)); + bigint_adjust(&(ctx->domain.g)); +} diff --git a/dsa/dsa_key_blob.h b/dsa/dsa_key_blob.h new file mode 100644 index 0000000..8099a1e --- /dev/null +++ b/dsa/dsa_key_blob.h @@ -0,0 +1,25 @@ +/* dsa_key_blob.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef DSA_KEY_BLOB_H_ +#define DSA_KEY_BLOB_H_ + +void load_dsa_key_blob(dsa_ctx_t* ctx); + +#endif /* DSA_KEY_BLOB_H_ */ diff --git a/dsa/dsa_sign.c b/dsa/dsa_sign.c new file mode 100644 index 0000000..e9def2b --- /dev/null +++ b/dsa/dsa_sign.c @@ -0,0 +1,73 @@ +/* dsa_sign.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include "cli.h" +#include "bigint.h" +#include "dsa.h" +#include "hashfunction_descriptor.h" +#include "hfal-basic.h" + +uint8_t dsa_sign_bigint(dsa_signature_t* s, const bigint_t* m, + const dsa_ctx_t* ctx, const bigint_t* k){ + bigint_t tmp, tmp2; + uint8_t tmp_b[ctx->domain.p.length_B+5], tmp2_b[ctx->domain.q.length_B+5]; + tmp.wordv= tmp_b; + tmp2.wordv = tmp2_b; + bigint_expmod_u(&tmp, &(ctx->domain.g), k, &(ctx->domain.p)); + bigint_reduce(&tmp, &(ctx->domain.q)); + bigint_copy(&(s->r), &tmp); + bigint_mul_u(&tmp, &tmp, &(ctx->priv)); + bigint_add_u(&tmp, &tmp, m); + bigint_inverse(&tmp2, k, &(ctx->domain.q)); + bigint_mul_u(&tmp, &tmp, &tmp2); + bigint_reduce(&tmp, &(ctx->domain.q)); + bigint_copy(&(s->s), &tmp); + + if(s->s.length_B==0 || s->r.length_B==0){ + return 1; + } + + return 0; +} + +uint8_t dsa_sign_message(dsa_signature_t* s, const void* m, uint16_t m_len_b, + const hfdesc_t* hash_desc, const dsa_ctx_t* ctx, + const uint8_t(*rand_in)(void)){ + bigint_t z, k; + uint8_t i, n_B = ctx->domain.q.length_B; + uint8_t hash_value[(n_B>(hfal_hash_getHashsize(hash_desc)+7)/8)?n_B:(hfal_hash_getHashsize(hash_desc)+7)/8]; + uint8_t k_b[n_B]; + hfal_hash_mem(hash_desc, hash_value, m, m_len_b); + z.wordv = hash_value; + z.length_B = n_B; + bigint_changeendianess(&z); + k.wordv = k_b; + k.length_B = n_B; + do{ + for(i=0; i. +*/ + +#include +#include "bigint.h" +#include "dsa.h" +#include "hfal-basic.h" + +uint8_t dsa_verify_bigint(const dsa_signature_t* s, const bigint_t* m, + const dsa_ctx_t* ctx){ + if(s->r.length_B==0 || s->s.length_B==0){ + return DSA_SIGNATURE_FAIL; + } + if(bigint_cmp_u(&(s->r), &(ctx->domain.q))>=0 || bigint_cmp_u(&(s->s), &(ctx->domain.q))>=0){ + return DSA_SIGNATURE_FAIL; + } + bigint_t w, u1, u2, v1, v2; + uint8_t w_b[ctx->domain.q.length_B], u1_b[ctx->domain.q.length_B*2], u2_b[ctx->domain.q.length_B*2]; + uint8_t v1_b[ctx->domain.p.length_B*2], v2_b[ctx->domain.p.length_B]; + w.wordv = w_b; + u1.wordv = u1_b; + u2.wordv = u2_b; + v1.wordv = v1_b; + v2.wordv = v2_b; + bigint_inverse(&w, &(s->s), &(ctx->domain.q)); + bigint_mul_u(&u1, &w, m); + bigint_reduce(&u1, &(ctx->domain.q)); + bigint_mul_u(&u2, &w, &(s->r)); + bigint_reduce(&u2, &(ctx->domain.q)); + bigint_expmod_u(&v1, &(ctx->domain.g), &u1, &(ctx->domain.p)); + bigint_expmod_u(&v2, &(ctx->pub), &u2, &(ctx->domain.p)); + bigint_mul_u(&v1, &v1, &v2); + bigint_reduce(&v1, &(ctx->domain.p)); + bigint_reduce(&v1, &(ctx->domain.q)); + if(bigint_cmp_u(&v1, &(s->r))==0){ + return DSA_SIGNATURE_OK; + } + return DSA_SIGNATURE_FAIL; +} + +uint8_t dsa_verify_message(const dsa_signature_t* s, const void* m, uint16_t m_len_b, + const hfdesc_t* hash_desc, const dsa_ctx_t* ctx){ + bigint_t z; + uint8_t n_B = ctx->domain.q.length_B; + uint8_t hash_value[(hfal_hash_getHashsize(hash_desc)+7)/8]; + hfal_hash_mem(hash_desc, hash_value, m, m_len_b); + z.wordv=hash_value; + z.length_B=n_B; + bigint_changeendianess(&z); + bigint_adjust(&z); + return dsa_verify_bigint(s, &z, ctx); +} + + + + + + + diff --git a/dsa/memxor.S b/dsa/memxor.S new file mode 100644 index 0000000..a32058b --- /dev/null +++ b/dsa/memxor.S @@ -0,0 +1,66 @@ +/* memxor.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * File: memxor.S + * Author: Daniel Otte + * Date: 2008-08-07 + * License: GPLv3 or later + * Description: memxor, XORing one block into another + * + */ + +/* + * void memxor(void* dest, const void* src, uint16_t n); + */ + /* + * param dest is passed in r24:r25 + * param src is passed in r22:r23 + * param n is passed in r20:r21 + */ +.global memxor +memxor: + movw r30, r24 + movw r26, r22 + movw r24, r20 + adiw r24, 0 + breq 2f +1: + ld r20, X+ + ld r21, Z + eor r20, r21 + st Z+, r20 + sbiw r24, 1 + brne 1b +2: + ret + + + + + + + + + + + + + + diff --git a/dsa/memxor.h b/dsa/memxor.h new file mode 100644 index 0000000..a62a616 --- /dev/null +++ b/dsa/memxor.h @@ -0,0 +1,7 @@ +#ifndef MEMXOR_H_ +#define MEMXOR_H_ +#include + +void memxor(void* dest, const void* src, uint16_t n); + +#endif diff --git a/dsa/noekeon.h b/dsa/noekeon.h new file mode 100644 index 0000000..9c046f3 --- /dev/null +++ b/dsa/noekeon.h @@ -0,0 +1,85 @@ +/* noekeon.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +#ifndef NOEKEON_H_ +#define NOEKEON_H_ + +/** + * \file noekeon.h + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2008-04-11 + * \license GPLv3 or later + * \brief Implementation of the Noekeon block cipher + * \ingroup Noekeon + * This is an implementation of the Noekeon block cipher. + * For more details on Noekeon see http://gro.noekeon.org/ + */ + +#include + +/** \typedef noekeon_ctx_t + * \brief holds key data for indirect mode + * + * A variable of this type may hold the key data for the indirect mode. + * For direct mode simply pass the key directly to the encryption or + * decryption function. + */ +typedef uint8_t noekeon_ctx_t[16]; + +/** \fn void noekeon_enc(void* buffer, const void* key) + * \brief noekeon encrytion funtion + * + * This function encrypts a block (64 bit = 8 byte) with the noekeon encrytion + * algorithm. Due to the two modes of noekeon (direct mode and indirect mode) + * the second parameter either points directly to the key (direct mode) or to a + * context generated by the noekeon_init() function (indirect mode). + * \param buffer pointer to the 64 bit (8 byte) block to encrypt + * \param key pointer to either the key (128 bit = 16 byte; direct mode) or + * to the context (indirect mode) + */ +void noekeon_enc(void* buffer, const void* key); + +/** \fn void noekeon_dec(void* buffer, const void* key) + * \brief noekeon encrytion funtion + * + * This function decrypts a block (64 bit = 8 byte) encrypted with the noekeon + * encrytion algorithm. Due to the two modes of noekeon (direct mode and + * indirect mode) the second parameter either points directly to the key + * (direct mode) or to a context generated by the noekeon_init() function + * (indirect mode). + * \param buffer pointer to the 64 bit (8 byte) block to decrypt + * \param key pointer to either the key (128 bit = 16 byte; direct mode) or + * to the context (indirect mode) + */ +void noekeon_dec(void* buffer, const void* key); + + +/** \fn void noekeon_init(const void* key, noekeon_ctx_t* ctx) + * \brief noekeon context generation function for indirect mode + * + * This function generates a context from the supplied key for using + * noekeon in indirect mode. For using noekeon in direct mode supply the key + * direct to the noekeon_enc() and noekeon_dec() functions. + * \param key pointer to the key (128 bit = 16 byte) + * \param ctx pointer to the context to fill with key material + * to the context (indirect mode) + */ +void noekeon_init(const void* key, noekeon_ctx_t* ctx); + +#endif /*NOEKEON_H_*/ diff --git a/dsa/noekeon_asm.S b/dsa/noekeon_asm.S new file mode 100644 index 0000000..b0a2a16 --- /dev/null +++ b/dsa/noekeon_asm.S @@ -0,0 +1,641 @@ +/* noekeon_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * noekeon assembler implementation for avr + * author: Daniel Otte + * email: daniel.otte@rub.de + * license: GPLv3 + */ + +#include + +.macro push_all + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro pop_all + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm + +push_all_func: + pop r31 + pop r30 + push_all + ijmp + +pop_all_func: + pop r31 + pop r30 + pop_all + ijmp + +.macro xchg a b + eor \a, \b + eor \b, \a + eor \a, \b +.endm + +.macro op32 op a b + \op \a\()_0, \b\()_0 + \op \a\()_1, \b\()_1 + \op \a\()_2, \b\()_2 + \op \a\()_3, \b\()_3 +.endm + + +.macro op32_4t op a b c d w x y z + \op \a, \w + \op \b, \x + \op \c, \y + \op \d, \z +.endm + + +.macro op32_prefix op p q a b c d w x y z + \op \p\()\a, \q\()\w + \op \p\()\b, \q\()\x + \op \p\()\c, \q\()\y + \op \p\()\d, \q\()\z +.endm + +; === bigendian_rotl32 === +; this function rotates a 32bit bigendian word n bits to the left +; param1: the 32-bit value +; given in r25,r24,r23,r22 (r22 is most significant) +; param2: the 8-bit parameter giving the number of bits to rotate +; given in r20 +; return: the rotatet 32-bit word +; given in r25,r24,r23,r22 + +bigendian_rotl32: + /* copy high bit of r22 to carry */ + mov r1, r22 +2: + rol r1 + + rol r25 + rol r24 + rol r23 + rol r22 + + dec r20 + brne 2b +bigendian_rotl32_exit: + clr r1 + ret + + +/******************************************************************************/ + +; === bigendian_rotl32 === +; this function rotates a 32bit bigendian word n bits to the right +; param1: the 32-bit value +; given in r25,r24,r23,r22 (r22 is most significant) +; param2: the 8-bit parameter giving the number of bits to rotate +; given in r20 +; return: the rotatet 32-bit word +; given in r25,r24,r23,r22 + +bigendian_rotr32: + /* copy high bit of r25 to carry */ + + mov r1, r25 +2: + ror r1 + + ror r22 + ror r23 + ror r24 + ror r25 + dec r20 + brne 2b +bigendian_rotr32_exit: + clr r1 + ret + +/******************************************************************************/ +/* +void theta(uint32_t* k, uint32_t* a){ + uint32_t temp; + temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8); + a[1] ^= temp; + a[3] ^= temp; + + a[0] ^= k[0]; + a[1] ^= k[1]; + a[2] ^= k[2]; + a[3] ^= k[3]; + + temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8); + a[0] ^= temp; + a[2] ^= temp; +} +*/ + +round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \ + 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \ + 0xD4 + +;-- a[0] +state0_0 = 2 +state0_1 = 3 +state0_2 = 4 +state0_3 = 5 +;-- a[1] +state1_0 = 6 +state1_1 = 7 +state1_2 = 8 +state1_3 = 9 +;-- a[2] +state2_0 = 10 +state2_1 = 11 +state2_2 = 12 +state2_3 = 13 +;-- a[3] +state3_0 = 14 +state3_1 = 15 +state3_2 = 16 +state3_3 = 17 + +; === theta === +; +; param1: the state in r2-r17 +; param2: pointer to k in X (r26,r27) +; +temp_a = 18 +temp_b = 19 +temp_c = 20 +temp_d = 21 + +theta: + /* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */ + op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3 + op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3 + + mov r1, temp_a + eor r1, temp_b + eor r1, temp_c + eor r1, temp_d + + op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1 + + /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */ + /* a[1] ^= temp */ + eor state1_0, temp_c + eor state1_1, temp_d + eor state1_2, temp_a + eor state1_3, temp_b + /* a[3] ^= temp */ + eor state3_0, temp_c + eor state3_1, temp_d + eor state3_2, temp_a + eor state3_3, temp_b + + /* state ^ k (X points to K) */ + ldi r28, 2 + clr r29 /* Y points to r2 aka state0_0 */ + ldi temp_a, 16 +1: + ld r1, X+ + ld r0, Y + eor r1, r0 + st Y+, r1 + dec temp_a + brne 1b + sbiw r26, 16 /* set X back to key */ + + mov temp_a, state1_0 + mov temp_b, state1_1 + mov temp_c, state1_2 + mov temp_d, state1_3 + eor temp_a, state3_0 + eor temp_b, state3_1 + eor temp_c, state3_2 + eor temp_d, state3_3 + mov r1, temp_a + eor r1, temp_b + eor r1, temp_c + eor r1, temp_d + eor temp_a, r1 + eor temp_b, r1 + eor temp_c, r1 + eor temp_d, r1 + /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */ + /* a[0] ^= temp */ + eor state0_0, temp_c + eor state0_1, temp_d + eor state0_2, temp_a + eor state0_3, temp_b + /* a[2] ^= temp */ + eor state2_0, temp_c + eor state2_1, temp_d + eor state2_2, temp_a + eor state2_3, temp_b + + clr r1 + ret + +/******************************************************************************/ +#ifndef NOEKEON_NO_ENC +; === noekeon_enc === +; +; param1: pointer to buffer (r24,r25) +; param2: pointer to k (r22,r23) +; +.global noekeon_enc +noekeon_enc: + rcall push_all_func + /* load state */ + movw r26, r22 + ldi r28, 2 + clr r29 /* Y points at r2 aka state0_0 */ + movw r30, r24 /* Z points at state */ + push r30 + push r31 + ldi r22, 16 + push r22 /* 16 is also the number of rounds and gets pushed here */ +1: + ld r0, Z+ + st Y+, r0 + dec r22 + brne 1b + /* state loaded */ + push r1 /* push round constan2 (0x00) */ + ldi r20, 0x80 + push r20 /* push round constan2 (0x00) */ + rjmp 3f +2: + ldi r30, lo8(round_const+15) + ldi r31, hi8(round_const+15) + sub r30, r22 + sbci r31, 0 + clr r1 + push r1 + lpm r0, Z + push r0 +3: + rcall round /* pops rc2 & rc1 */ + pop r22 + dec r22 + push r22 + brne 2b + + pop r22 + + ldi r22, 0xD4 + eor state0_3, r22 + rcall theta + + pop r31 + pop r30 + clr r29 + ldi r28, 2 + ldi r22, 16 +1: + ld r0, Y+ + st Z+, r0 + dec r22 + brne 1b + + rcall pop_all_func + ret +#endif +/******************************************************************************/ +/******************************************************************************/ +#ifndef NOEKEON_NO_DEC + +; === noekeon_dec === +; +; param1: pointer to buffer/state (r24,r25) +; param2: pointer to k (r22,r23) +; +.global noekeon_dec +noekeon_dec: + rcall push_all_func + /* allocate 16 bytes on the stack */ + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + sbiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 + + adiw r30, 1 + /* push state pointer */ + push r24 + push r25 + movw r26, r22 /* move key ptr to X */ + + /* set stackkey to zero */ + ldi r22, 16 +1: st Z+, r1 + dec r22 + brne 1b + + /* copy key to state */ + clr r29 + ldi r28, 2 + ldi r22, 16 +1: ld r0, X+ + st Y+, r0 + dec r22 + brne 1b + + movw r26, r30 + sbiw r26, 16 /* set X back to begining of stack key */ + rcall theta + + /* mov state to stackkey */ + clr r29 + ldi r28, 2 + ldi r22, 16 +1: ld r0, Y+ + st X+, r0 + dec r22 + brne 1b + sbiw r26, 16 /* set X back to begining of stack key */ + + /* move data from stateptr to state */ + pop r31 + pop r30 + push r30 + push r31 + clr r29 + ldi r28, 2 + ldi r22, 16 + push r22 +1: ld r0, Z+ + st Y+, r0 + dec r22 + brne 1b + +;--- snip 8< ---- + + ldi r20, 0xD4 + push r20 /* push round constant2 (0xD4) */ + push r22 /* push round constan1 (0x00) */ + rjmp 3f +2: + ldi r30, lo8(round_const-1) + ldi r31, hi8(round_const-1) + clr r1 + add r30, r22 + adc r31, r1 + lpm r0, Z + push r0 + push r1 +3: + rcall round /* pops rc2 & rc1 */ + pop r22 + dec r22 + push r22 + brne 2b +;---- + pop r22 + + rcall theta + ldi r22, 0x80 + eor state0_3, r22 + +write_state_back: + /* write state back */ + pop r31 /* pop state pointer */ + pop r30 + clr r29 + ldi r28, 2 + ldi r22, 16 +1: + ld r0, Y+ + st Z+, r0 + dec r22 + brne 1b + + /* remove key from stack */ + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + adiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 + rcall pop_all_func + ret +#endif +/******************************************************************************/ + + +round: + pop r24 + pop r25 + pop r1 + eor state0_3, r1 + rcall theta + pop r1 + eor state0_3, r1 + push r25 + push r24 +pi_gamma_pi: + ldi r30, pm_lo8(bigendian_rotl32) + ldi r31, pm_hi8(bigendian_rotl32) + rcall pi + /* pi1 done; now gamma */ + rcall gamma_1 + /* a[0] <-> a[3] */ + xchg state0_0, state3_0 + xchg state0_1, state3_1 + xchg state0_2, state3_2 + xchg state0_3, state3_3 + /* a[2] ^= a[0] ^ a[1] ^ a[3] */ + op32 eor, state2, state0 + op32 eor, state2, state1 + op32 eor, state2, state3 + + rcall gamma_1 + ldi r30, pm_lo8(bigendian_rotr32) + ldi r31, pm_hi8(bigendian_rotr32) + rcall pi + ret + +gamma_1: + /* a[1] ^= ~(a[3]|a[2])*/ + mov r1, state3_0 + or r1, state2_0 + com r1 + eor state1_0, r1 + + mov r1, state3_1 + or r1, state2_1 + com r1 + eor state1_1, r1 + + mov r1, state3_2 + or r1, state2_2 + com r1 + eor state1_2, r1 + + mov r1, state3_3 + or r1, state2_3 + com r1 + eor state1_3, r1 + + /* a[0] ^= a[2]&a[1] */ + mov r1, state2_0 + and r1, state1_0 + eor state0_0, r1 + + mov r1, state2_1 + and r1, state1_1 + eor state0_1, r1 + + mov r1, state2_2 + and r1, state1_2 + eor state0_2, r1 + + mov r1, state2_3 + and r1, state1_3 + eor state0_3, r1 + ret + +pi: + /* a[1] <<<= 1*/ + mov r22, state1_0 + mov r23, state1_1 + mov r24, state1_2 + mov r25, state1_3 + ldi r20, 1 + icall + mov state1_0, r22 + mov state1_1, r23 + mov state1_2, r24 + mov state1_3, r25 + /* a[2] <<<= 5*/ + mov r22, state2_0 + mov r23, state2_1 + mov r24, state2_2 + mov r25, state2_3 + ldi r20, 5 + icall + mov state2_0, r22 + mov state2_1, r23 + mov state2_2, r24 + mov state2_3, r25 + /* a[3] <<<= 2*/ + mov r22, state3_0 + mov r23, state3_1 + mov r24, state3_2 + mov r25, state3_3 + ldi r20, 2 + icall + mov state3_0, r22 + mov state3_1, r23 + mov state3_2, r24 + mov state3_3, r25 + ret + +/******************************************************************************/ + +/* +void noekeon_init(void* key, noekeon_ctx_t* ctx){ + uint8_t nullv[16]; + + memset(nullv, 0, 16); + memcpy(ctx, key, 16); + noekeon_enc(ctx, nullv); +} +*/ + +#ifndef NOEKEON_NO_INIT + +.global noekeon_init +noekeon_init: +; === noekeon_init === +; +; param1: pointer to key (r24,r25) +; param2: pointer to context (r22,r23) +; + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + sbiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 + + movw r26, r22 + adiw r30, 1 + movw r22, r30 + /* set nullv(stack) to zero */ + ldi r20, 16 +1: st Z+, r1 + dec r20 + brne 1b + + /* copy key data to ctx */ + movw r30, r24 + ldi r20, 16 +1: ld r1, Z+ + st X+, r1 + dec r20 + brne 1b + clr r1 + + sbiw r26, 16 + movw r24, r26 + rcall noekeon_enc + + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + adiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 + ret + +#endif + + diff --git a/dsa/noekeon_prng.c b/dsa/noekeon_prng.c new file mode 100644 index 0000000..5b5bc29 --- /dev/null +++ b/dsa/noekeon_prng.c @@ -0,0 +1,75 @@ +/* noekeon_prng.c */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/** + * \author Daniel Otte + * \date 2008-08-24 + * \license GPLv3 or later + * \brief random number generator based on noekeon running in CFB-mode + * + */ + +#include "noekeon.h" +#include "memxor.h" +#include +#include + +static uint8_t random_state[16]; +static uint8_t random_key[16]; +static uint8_t i=0; + +uint8_t random8(void){ + static uint8_t sr[16]; + + if(i==0){ + noekeon_enc(random_state, random_key); + memcpy(sr, random_state, 16); + i=15; + return sr[15]; + } + --i; + return sr[i]; +} + +void random_block(void* dest){ + i=0; + noekeon_enc(random_state, random_key); + memcpy(dest, random_state, 16); +} + +void srandom32(uint32_t seed){ + memcpy(random_key, &seed, 4); + memset(random_key+4, 0, 12); + memset(random_state, 0, 16); + i=0; +} + +void random_seed(const void* buffer){ + memcpy(random_key, buffer, 16); + memset(random_state, 0, 16); + i=0; +} + +void random_add(const void* buffer){ + i=0; + noekeon_enc(random_state, random_key); + memxor(random_key, random_state, 16); + memxor(random_key, buffer, 16); +} + + diff --git a/dsa/noekeon_prng.h b/dsa/noekeon_prng.h new file mode 100644 index 0000000..ff9ca03 --- /dev/null +++ b/dsa/noekeon_prng.h @@ -0,0 +1,40 @@ +/* noekeon_prng.h */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/** + * \author Daniel Otte + * \date 2008-08-24 + * \license GPLv3 or later + * \brief random number generator based on noekeon running in CFB-mode + * + */ + +#ifndef PRNG_H_ +#define PRNG_H_ + +#include + +uint8_t random8(void); +void random_block(void* dest); +void srandom32(uint32_t seed); +void random_seed(const void* buffer); +void random_add(const void* buffer); + +#endif /* PRNG_H_*/ + + diff --git a/dsa/sha1-asm.S b/dsa/sha1-asm.S new file mode 100644 index 0000000..4d185f9 --- /dev/null +++ b/dsa/sha1-asm.S @@ -0,0 +1,883 @@ +/* sha1-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; SHA1 implementation in assembler for AVR +SHA1_BLOCK_BITS = 512 +SHA1_HASH_BITS = 160 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +.macro delay +/* + push r0 + push r1 + clr r0 +1: clr r1 +2: dec r1 + brne 2b + dec r0 + brne 1b + pop r1 + pop r0 // */ +.endm + +/* X points to Block */ +.macro dbg_hexdump length +/* + precall + hexdump \length + postcall + // */ +.endm + + + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha1_ctx_t is: +; +; [h0][h1][h2][h3][h4][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha1_ctx2hash +; === sha1_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha1_ctx structure +; given in r23,r22 +sha1_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 5 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha1 +; === sha1 === +; this function calculates SHA-1 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha1: +sha1_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r30, SPL + in r31, SPH + sbiw r30, 5*4+8 + in r0, SREG + cli + out SPL, r30 + out SREG, r0 + out SPH, r31 + + push r25 + push r24 + adiw r30, 1 + movw r16, r30 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha1_init + /* if length >= 512 */ +1: + tst r11 + brne 2f + tst r10 + breq 4f +2: + movw r24, r16 + movw r22, r12 + rcall sha1_nextBlock + ldi r19, 64 + add r12, r19 + adc r13, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha1_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha1_ctx2hash + +sha1_epilog: + in r30, SPL + in r31, SPH + adiw r30, 5*4+8 + in r0, SREG + cli + out SPL, r30 + out SREG, r0 + out SPH, r31 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha1_lastBlock +; === sha1_lastBlock === +; this function does padding & Co. for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1) + + +sha1_lastBlock: + cpi r21, 0x02 + brlo sha1_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 2 + ldi r19, 64 + sub r22, r19 + sbc r23, r1 + rjmp sha1_lastBlock +sha1_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r0, SREG + subi r30, lo8(64) + sbci r31, hi8(64) /* ??? */ + cli + out SPL, r30 + out SREG, r0 + out SPH, r31 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha1_lastBlock_post_copy + mov r1, r18 +sha1_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha1_lastBlock_copy_loop +sha1_lastBlock_post_copy: +sha1_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha1_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*5+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha1_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha1_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha1_lastBlock_epilog +sha1_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 5*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha1_nextBlock + +sha1_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r0, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SREG, r0 + out SPH, r31 + clr r1 + ret + +/**/ +;########################################################### + +.global sha1_nextBlock +; === sha1_nextBlock === +; this is the core function for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte) + +xtmp = 0 +xNULL = 1 +W1 = 10 +W2 = 11 +T1 = 12 +T2 = 13 +T3 = 14 +T4 = 15 +LoopC = 16 +S = 17 +tmp1 = 18 +tmp2 = 19 +tmp3 = 20 +tmp4 = 21 +F1 = 22 +F2 = 23 +F3 = 24 +F4 = 25 + +/* byteorder: high number <--> high significance */ +sha1_nextBlock: + ; initial, let's make some space ready for local vars + /* replace push & pop by mem ops? */ + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ + movw r30, r22 ; Z points to message + subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha1_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SREG, r0 + out SPH, r21 + + push r18 + push r19 /* push old SP on new stack */ + push r24 + push r25 /* param1 will be needed later */ + + /* load a[] with state */ + movw 28, r24 /* load pointer to state in Y */ + adiw r26, 1 ; X++ + + ldi LoopC, 5*4 +1: ld tmp1, Y+ + st X+, tmp1 + dec LoopC + brne 1b + + movw W1, r26 /* save pointer to w[0] */ + /* load w[] with endian fixed message */ + /* we might also use the changeendian32() function at bottom */ + movw r30, r22 /* mv param2 (ponter to msg) to Z */ + ldi LoopC, 16 +1: + ldd tmp1, Z+3 + st X+, tmp1 + ldd tmp1, Z+2 + st X+, tmp1 + ldd tmp1, Z+1 + st X+, tmp1 + ld tmp1, Z + st X+, tmp1 + adiw r30, 4 + dec LoopC + brne 1b + + ;clr LoopC /* LoopC is named t in FIPS 180-2 */ + clr xtmp +sha1_nextBlock_mainloop: + mov S, LoopC + lsl S + lsl S + andi S, 0x3C /* S is a bytepointer so *4 */ + /* load w[s] */ + movw r26, W1 + add r26, S /* X points at w[s] */ + adc r27, xNULL + ld T1, X+ + ld T2, X+ + ld T3, X+ + ld T4, X+ + +/* + push r26 + push r27 + push T4 + push T3 + push T2 + push T1 + in r26, SPL + in r27, SPH + adiw r26, 1 + dbg_hexdump 4 + pop T1 + pop T2 + pop T3 + pop T4 + pop r27 + pop r26 +*/ + + cpi LoopC, 16 + brlt sha1_nextBlock_mainloop_core + /* update w[s] */ + ldi tmp1, 2*4 + rcall 1f + ldi tmp1, 8*4 + rcall 1f + ldi tmp1, 13*4 + rcall 1f + rjmp 2f +1: /* this might be "outsourced" to save the jump above */ + add tmp1, S + andi tmp1, 0x3f + movw r26, W1 + add r26, tmp1 + adc r27, xNULL + ld tmp2, X+ + eor T1, tmp2 + ld tmp2, X+ + eor T2, tmp2 + ld tmp2, X+ + eor T3, tmp2 + ld tmp2, X+ + eor T4, tmp2 + ret +2: /* now we just hav to do a ROTL(T) and save T back */ + mov tmp2, T4 + rol tmp2 + rol T1 + rol T2 + rol T3 + rol T4 + movw r26, W1 + add r26, S + adc r27, xNULL + st X+, T1 + st X+, T2 + st X+, T3 + st X+, T4 + +sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ + /* T already contains w[s] */ + movw r26, W1 + sbiw r26, 4*1 /* X points at a[4] aka e */ + ld tmp1, X+ + add T1, tmp1 + ld tmp1, X+ + adc T2, tmp1 + ld tmp1, X+ + adc T3, tmp1 + ld tmp1, X+ + adc T4, tmp1 /* T = w[s]+e */ + sbiw r26, 4*5 /* X points at a[0] aka a */ + ld F1, X+ + ld F2, X+ + ld F3, X+ + ld F4, X+ + mov tmp1, F4 /* X points at a[1] aka b */ + ldi tmp2, 5 +1: + rol tmp1 + rol F1 + rol F2 + rol F3 + rol F4 + dec tmp2 + brne 1b + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, F4 /* T = ROTL(a,5) + e + w[s] */ + + /* now we have to do this fucking conditional stuff */ + ldi r30, lo8(sha1_nextBlock_xTable) + ldi r31, hi8(sha1_nextBlock_xTable) + add r30, xtmp + adc r31, xNULL + lpm tmp1, Z + cp tmp1, LoopC + brne 1f + inc xtmp +1: ldi r30, lo8(sha1_nextBlock_KTable) + ldi r31, hi8(sha1_nextBlock_KTable) + lsl xtmp + lsl xtmp + add r30, xtmp + adc r31, xNULL + lsr xtmp + lsr xtmp + + lpm tmp1, Z+ + add T1, tmp1 + lpm tmp1, Z+ + adc T2, tmp1 + lpm tmp1, Z+ + adc T3, tmp1 + lpm tmp1, Z+ + adc T4, tmp1 + /* T = ROTL(a,5) + e + kt + w[s] */ + + /* Z-4 is just pointing to kt ... */ + movw r28, r26 /* copy X in Y */ + adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */ + lsr r31 + ror r30 + + icall + mov F1, tmp1 + icall + mov F2, tmp1 + icall + mov F3, tmp1 + icall + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */ + /* X points still at a[1] aka b, Y points at a[2] aka c */ + /* update a[] */ +sha1_nextBlock_update_a: + /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/ + //adiw r28, 3*4 /* Y should point at a[4] aka e */ + movw r28, W1 + sbiw r28, 4 + + ldi tmp2, 4*4 +1: + ld tmp1, -Y + std Y+4, tmp1 + dec tmp2 + brne 1b + /* Y points at a[0] aka a*/ + + movw r28, W1 + sbiw r28, 5*4 + /* store T in a[0] aka a */ + st Y+, T1 + st Y+, T2 + st Y+, T3 + st Y+, T4 + /* Y points at a[1] aka b*/ + + /* rotate c */ + ldd T1, Y+1*4 + ldd T2, Y+1*4+1 + ldd T3, Y+1*4+2 + ldd T4, Y+1*4+3 + mov tmp1, T1 + ldi tmp2, 2 +1: ror tmp1 + ror T4 + ror T3 + ror T2 + ror T1 + dec tmp2 + brne 1b + std Y+1*4+0, T1 + std Y+1*4+1, T2 + std Y+1*4+2, T3 + std Y+1*4+3, T4 +/* + push r27 + push r26 + movw r26, W1 + sbiw r26, 4*5 + dbg_hexdump 4*5 + pop r26 + pop r27 +*/ + inc LoopC + cpi LoopC, 80 + brge 1f + rjmp sha1_nextBlock_mainloop +/**************************************/ +1: + /* littel patch */ + sbiw r28, 4 + +/* add a[] to state and inc length */ + pop r27 + pop r26 /* now X points to state (and Y still at a[0]) */ + ldi tmp4, 5 +1: clc + ldi tmp3, 4 +2: ld tmp1, X + ld tmp2, Y+ + adc tmp1, tmp2 + st X+, tmp1 + dec tmp3 + brne 2b + dec tmp4 + brne 1b + + /* now length += 512 */ + adiw r26, 1 /* we skip the least significant byte */ + ld tmp1, X + ldi tmp2, hi8(512) /* 2 */ + add tmp1, tmp2 + st X+, tmp1 + ldi tmp2, 6 +1: + ld tmp1, X + adc tmp1, xNULL + st X+, tmp1 + dec tmp2 + brne 1b + +; EPILOG +sha1_nextBlock_epilog: +/* now we should clean up the stack */ + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SREG, r0 + out SPH, r21 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret + +sha1_nextBlock_xTable: +.byte 20,40,60,0 +sha1_nextBlock_KTable: +.int 0x5a827999 +.int 0x6ed9eba1 +.int 0x8f1bbcdc +.int 0xca62c1d6 +sha1_nextBlock_JumpTable: +rjmp sha1_nextBlock_Ch + nop +rjmp sha1_nextBlock_Parity + nop +rjmp sha1_nextBlock_Maj + nop +rjmp sha1_nextBlock_Parity + + /* X and Y still point at a[1] aka b ; return value in tmp1 */ +sha1_nextBlock_Ch: + ld tmp1, Y+ + mov tmp2, tmp1 + com tmp2 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp3, Y+7 /* load from d */ + and tmp2, tmp3 + eor tmp1, tmp2 + ret + +sha1_nextBlock_Maj: + ld tmp1, Y+ + mov tmp2, tmp1 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp4, Y+7 /* load from d */ + and tmp2, tmp4 + eor tmp1, tmp2 + and tmp3, tmp4 + eor tmp1, tmp3 + ret + +sha1_nextBlock_Parity: + ld tmp1, Y+ + ldd tmp2, Y+3 /* load from c */ + eor tmp1, tmp2 + ldd tmp2, Y+7 /* load from d */ + eor tmp1, tmp2 + ret +/* +ch_str: .asciz "\r\nCh" +maj_str: .asciz "\r\nMaj" +parity_str: .asciz "\r\nParity" +*/ +;########################################################### + +.global sha1_init +;void sha1_init(sha1_ctx_t *state){ +; DEBUG_S("\r\nSHA1_INIT"); +; state->h[0] = 0x67452301; +; state->h[1] = 0xefcdab89; +; state->h[2] = 0x98badcfe; +; state->h[3] = 0x10325476; +; state->h[4] = 0xc3d2e1f0; +; state->length = 0; +;} +; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha1_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha1_init_vector)) + ldi r31, hi8((sha1_init_vector)) + ldi r22, 5*4 /* bytes to copy */ +sha1_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha1_init_vloop + ldi r22, 8 +sha1_init_lloop: + st X+, r1 + dec r22 + brne sha1_init_lloop + ret + +sha1_init_vector: +.int 0x67452301; +.int 0xefcdab89; +.int 0x98badcfe; +.int 0x10325476; +.int 0xc3d2e1f0; + diff --git a/dsa/sha1.h b/dsa/sha1.h new file mode 100644 index 0000000..6675d20 --- /dev/null +++ b/dsa/sha1.h @@ -0,0 +1,117 @@ +/* sha1.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha1.h + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2006-10-08 + * \license GPLv3 or later + * \brief SHA-1 declaration. + * \ingroup SHA-1 + * + */ + +#ifndef SHA1_H_ +#define SHA1_H_ + +#include +/** \def SHA1_HASH_BITS + * definees the size of a SHA-1 hash in bits + */ + +/** \def SHA1_HASH_BYTES + * definees the size of a SHA-1 hash in bytes + */ + +/** \def SHA1_BLOCK_BITS + * definees the size of a SHA-1 input block in bits + */ + +/** \def SHA1_BLOCK_BYTES + * definees the size of a SHA-1 input block in bytes + */ +#define SHA1_HASH_BITS 160 +#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8) +#define SHA1_BLOCK_BITS 512 +#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8) + +/** \typedef sha1_ctx_t + * \brief SHA-1 context type + * + * A vatiable of this type may hold the state of a SHA-1 hashing process + */ +typedef struct { + uint32_t h[5]; + uint64_t length; +} sha1_ctx_t; + +/** \typedef sha1_hash_t + * \brief hash value type + * A variable of this type may hold a SHA-1 hash value + */ +typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8]; + +/** \fn sha1_init(sha1_ctx_t *state) + * \brief initializes a SHA-1 context + * This function sets a ::sha1_ctx_t variable to the initialization vector + * for SHA-1 hashing. + * \param state pointer to the SHA-1 context variable + */ +void sha1_init(sha1_ctx_t *state); + +/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block) + * \brief process one input block + * This function processes one input block and updates the hash context + * accordingly + * \param state pointer to the state variable to update + * \param block pointer to the message block to process + */ +void sha1_nextBlock (sha1_ctx_t *state, const void* block); + +/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b) + * \brief processes the given block and finalizes the context + * This function processes the last block in a SHA-1 hashing process. + * The block should have a maximum length of a single input block. + * \param state pointer to the state variable to update and finalize + * \param block pointer to themessage block to process + * \param length_b length of the message block in bits + */ +void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b); + +/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state) + * \brief convert a state variable into an actual hash value + * Writes the hash value corresponding to the state to the memory pointed by dest. + * \param dest pointer to the hash value destination + * \param state pointer to the hash context + */ +void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state); + +/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b) + * \brief hashing a message which in located entirely in RAM + * This function automatically hashes a message which is entirely in RAM with + * the SHA-1 hashing algorithm. + * \param dest pointer to the hash value destination + * \param msg pointer to the message which should be hashed + * \param length_b length of the message in bits + */ +void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b); + + + +#endif /*SHA1_H_*/ diff --git a/dsa/sha256-asm.S b/dsa/sha256-asm.S new file mode 100644 index 0000000..e9729a1 --- /dev/null +++ b/dsa/sha256-asm.S @@ -0,0 +1,1040 @@ +/* sha256-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; sha-256 implementation in assembler +SHA256_BLOCK_BITS = 512 +SHA256_HASH_BITS = 256 + + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +/* X points to Block */ +.macro dbg_hexdump length + precall + hexdump \length + postcall +.endm + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha256_ctx_t is: +; +; [h0][h1][h2][h3][h4][h5][h6][h7][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha256_ctx2hash +; === sha256_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha256_ctx structure +; given in r23,r22 +sha256_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 8 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha256 +; === sha256 === +; this function calculates SHA-256 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha256: +sha256_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r30, SPL + in r31, SPH + sbiw r30, 8*4+8 + in r0, SREG + cli + out SPL, r30 + out SREG, r0 + out SPH, r31 + + push r25 + push r24 + adiw r30, 1 + movw r16, r30 + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha256_init + /* if length > 0xffff */ +1: + tst r11 + brne 2f + tst r10 + breq 4f +2: + movw r24, r16 + movw r22, r12 + rcall sha256_nextBlock + ldi r19, 64 + add r12, r19 + adc r13, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha256_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha256_ctx2hash + +sha256_epilog: + in r30, SPL + in r31, SPH + adiw r30, 8*4+8 + in r0, SREG + cli + out SPL, r30 + out SREG, r0 + out SPH, r31 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha256_lastBlock +; === sha256_lastBlock === +; this function does padding & Co. for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1) + + +sha256_lastBlock: + cpi r21, 0x02 + brlo sha256_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 0x02 + ldi r19, 64 + add r22, r19 + adc r23, r1 + rjmp sha256_lastBlock +sha256_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r0, SREG + subi r30, lo8(64) + sbci r31, hi8(64) + cli + out SPL, r30 + out SREG,r0 + out SPH, r31 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha256_lastBlock_post_copy + mov r1, r18 +sha256_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha256_lastBlock_copy_loop +sha256_lastBlock_post_copy: +sha256_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha256_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*8+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha256_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha256_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha256_lastBlock_epilog +sha256_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 8*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha256_nextBlock + +sha256_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r0, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SREG,r0 + out SPH, r31 + clr r1 + ret + +/**/ +;########################################################### + +.global sha256_nextBlock +; === sha256_nextBlock === +; this is the core function for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte) + +Bck1 = 12 +Bck2 = 13 +Bck3 = 14 +Bck4 = 15 +Func1 = 22 +Func2 = 23 +Func3 = 24 +Func4 = 25 +Accu1 = 16 +Accu2 = 17 +Accu3 = 18 +Accu4 = 19 +XAccu1 = 8 +XAccu2 = 9 +XAccu3 = 10 +XAccu4 = 11 +T1 = 4 +T2 = 5 +T3 = 6 +T4 = 7 +LoopC = 1 +/* byteorder: high number <--> high significance */ +sha256_nextBlock: + ; initial, let's make some space ready for local vars + push r4 /* replace push & pop by mem ops? */ + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack + movw r30, r22 ; Z points to message + subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha256_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SREG, r0 + out SPH, r21 + push r18 + push r19 + push r24 + push r25 /* param1 will be needed later */ + ; now we fill the w array with message (think about endianess) + adiw r26, 1 ; X++ + ldi r20, 16 +sha256_nextBlock_wcpyloop: + ld r23, Z+ + ld r22, Z+ + ld r19, Z+ + ld r18, Z+ + st X+, r18 + st X+, r19 + st X+, r22 + st X+, r23 + dec r20 + brne sha256_nextBlock_wcpyloop +/* for (i=16; i<64; ++i){ + w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; + } */ + /* r25,r24,r23,r24 (r21,r20) are function values + r19,r18,r17,r16 are the accumulator + r15,r14,r13,rBck1 are backup1 + r11,r10,r9 ,r8 are xor accu + r1 is round counter */ + + ldi r20, 64-16 + mov LoopC, r20 +sha256_nextBlock_wcalcloop: + movw r30, r26 ; cp X to Z + sbiw r30, 63 + sbiw r30, 1 ; substract 64 = 16*4 + ld Accu1, Z+ + ld Accu2, Z+ + ld Accu3, Z+ + ld Accu4, Z+ /* w[i] = w[i-16] */ + ld Bck1, Z+ + ld Bck2, Z+ + ld Bck3, Z+ + ld Bck4, Z+ /* backup = w[i-15] */ + /* now sigma 0 */ + mov Func1, Bck2 + mov Func2, Bck3 + mov Func3, Bck4 + mov Func4, Bck1 /* prerotated by 8 */ + ldi r20, 1 + rcall bitrotl + movw XAccu1, Func1 + movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/ +sigma0_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + ror Bck1 + dec Func2 + brne sigma0_shr + eor XAccu1, Bck1 + eor XAccu2, Bck2 + eor XAccu3, Bck3 + eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + ldd Func1, Z+7*4 /* now accu += w[i-7] */ + ldd Func2, Z+7*4+1 + ldd Func3, Z+7*4+2 + ldd Func4, Z+7*4+3 + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + ldd Bck1, Z+12*4 /* now backup = w[i-2]*/ + ldd Bck2, Z+12*4+1 + ldd Bck3, Z+12*4+2 + ldd Bck4, Z+12*4+3 + /* now sigma 1 */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 1 + rcall bitrotr + movw XAccu3, Func3 + movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */ +; movw Func1, Bck3 +; movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/ +sigma1_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + dec Func2 + brne sigma1_shr + eor XAccu1, Bck2 + eor XAccu2, Bck3 + eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + /* now let's store the shit */ + st X+, Accu1 + st X+, Accu2 + st X+, Accu3 + st X+, Accu4 + dec LoopC + breq 3f ; skip if zero + rjmp sha256_nextBlock_wcalcloop +3: + /* we are finished with w array X points one byte post w */ +/* init a array */ + pop r31 + pop r30 + push r30 + push r31 + ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */ +init_a_array: + ld r1, Z+ + st X+, r1 + dec r25 + brne init_a_array + +/* now the real fun begins */ +/* for (i=0; i<64; ++i){ + t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i]; + t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]); + memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; + a[4] += t1; + a[0] = t1 + t2; + } */ + /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */ + sbiw r26, 8*4 /* X still points at a[7]+1*/ + movw r28, r26 + ldi r30, lo8(sha256_kv) + ldi r31, hi8(sha256_kv) + dec r27 /* X - (64*4 == 256) */ + ldi r25, 64 + mov LoopC, r25 +sha256_main_loop: + /* now calculate t1 */ + /*CH(x,y,z) = (x&y)^((~x)&z)*/ + ldd T1, Y+5*4 + ldd T2, Y+5*4+1 + ldd T3, Y+5*4+2 + ldd T4, Y+5*4+3 /* y in T */ + ldd Func1, Y+4*4 + ldd Func2, Y+4*4+1 + ldd Func3, Y+4*4+2 + ldd Func4, Y+4*4+3 /* x in Func */ + ldd Bck1, Y+6*4 + ldd Bck2, Y+6*4+1 + ldd Bck3, Y+6*4+2 + ldd Bck4, Y+6*4+3 /* z in Bck */ + and T1, Func1 + and T2, Func2 + and T3, Func3 + and T4, Func4 + com Func1 + com Func2 + com Func3 + com Func4 + and Bck1, Func1 + and Bck2, Func2 + and Bck3, Func3 + and Bck4, Func4 + eor T1, Bck1 + eor T2, Bck2 + eor T3, Bck3 + eor T4, Bck4 /* done, CH(x,y,z) is in T */ + /* now SIGMA1(a[4]) */ + ldd Bck4, Y+4*4 /* think about using it from Func reg above*/ + ldd Bck1, Y+4*4+1 + ldd Bck2, Y+4*4+2 + ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */ + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotl /* rotr(x,6) */ + movw XAccu1, Func1 + movw XAccu3, Func3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 3 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + movw Func1, Bck3 /* this prerotates furteh 16 bits*/ + movw Func3, Bck1 /* so we have now prerotated by 24 bits*/ + ldi r20, 1 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* finished with SIGMA1, add it to T */ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 + /* now we've to add a[7], w[i] and k[i] */ + ldd XAccu1, Y+4*7 + ldd XAccu2, Y+4*7+1 + ldd XAccu3, Y+4*7+2 + ldd XAccu4, Y+4*7+3 + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add a[7] */ + ld XAccu1, X+ + ld XAccu2, X+ + ld XAccu3, X+ + ld XAccu4, X+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add w[i] */ + lpm XAccu1, Z+ + lpm XAccu2, Z+ + lpm XAccu3, Z+ + lpm XAccu4, Z+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add k[i] */ /* finished with t1 */ + /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/ + /* starting with MAJ(x,y,z) */ + ldd Func1, Y+4*0+0 + ldd Func2, Y+4*0+1 + ldd Func3, Y+4*0+2 + ldd Func4, Y+4*0+3 /* load x=a[0] */ + ldd XAccu1, Y+4*1+0 + ldd XAccu2, Y+4*1+1 + ldd XAccu3, Y+4*1+2 + ldd XAccu4, Y+4*1+3 /* load y=a[1] */ + and XAccu1, Func1 + and XAccu2, Func2 + and XAccu3, Func3 + and XAccu4, Func4 /* XAccu == (x & y) */ + ldd Bck1, Y+4*2+0 + ldd Bck2, Y+4*2+1 + ldd Bck3, Y+4*2+2 + ldd Bck4, Y+4*2+3 /* load z=a[2] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */ + ldd Func1, Y+4*1+0 + ldd Func2, Y+4*1+1 + ldd Func3, Y+4*1+2 + ldd Func4, Y+4*1+3 /* load y=a[1] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */ + /* SIGMA0(a[0]) */ + ldd Bck1, Y+4*0+0 /* we should combine this with above */ + ldd Bck2, Y+4*0+1 + ldd Bck3, Y+4*0+2 + ldd Bck4, Y+4*0+3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotr + movw Accu1, Func1 + movw Accu3, Func3 /* Accu = shr(a[0], 2) */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotate by 16 bits */ + ldi r20, 3 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */ + mov Func1, Bck4 + mov Func2, Bck1 + mov Func3, Bck2 + mov Func4, Bck3 /* prerotate by 24 bits */ + ldi r20, 2 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */ + add Accu1, XAccu1 /* add previous result (MAJ)*/ + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 + /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/ + /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + + ldi r21, 7*4 + adiw r28, 7*4 +a_shift_loop: + ld r25, -Y /* warning: this is PREdecrement */ + std Y+4, r25 + dec r21 + brne a_shift_loop + + ldd Bck1, Y+4*4+0 + ldd Bck2, Y+4*4+1 + ldd Bck3, Y+4*4+2 + ldd Bck4, Y+4*4+3 + add Bck1, T1 + adc Bck2, T2 + adc Bck3, T3 + adc Bck4, T4 + std Y+4*4+0, Bck1 + std Y+4*4+1, Bck2 + std Y+4*4+2, Bck3 + std Y+4*4+3, Bck4 + add Accu1, T1 + adc Accu2, T2 + adc Accu3, T3 + adc Accu4, T4 + std Y+4*0+0, Accu1 + std Y+4*0+1, Accu2 + std Y+4*0+2, Accu3 + std Y+4*0+3, Accu4 /* a array updated */ + + + dec LoopC + breq update_state + rjmp sha256_main_loop ;brne sha256_main_loop +update_state: + /* update state */ + /* pointers to state should still exist on the stack ;-) */ + pop r31 + pop r30 + ldi r21, 8 +update_state_loop: + ldd Accu1, Z+0 + ldd Accu2, Z+1 + ldd Accu3, Z+2 + ldd Accu4, Z+3 + ld Func1, Y+ + ld Func2, Y+ + ld Func3, Y+ + ld Func4, Y+ + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + st Z+, Accu1 + st Z+, Accu2 + st Z+, Accu3 + st Z+, Accu4 + dec r21 + brne update_state_loop + /* now we just have to update the length */ + adiw r30, 1 /* since we add 512, we can simply skip the LSB */ + ldi r21, 2 + ldi r22, 6 + ld r20, Z + add r20, r21 + st Z+, r20 + clr r21 +sha256_nextBlock_fix_length: + brcc sha256_nextBlock_epilog + ld r20, Z + adc r20, r21 + st Z+, r20 + dec r22 + brne sha256_nextBlock_fix_length + +; EPILOG +sha256_nextBlock_epilog: +/* now we should clean up the stack */ + + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SREG, r0 + out SPH, r21 + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + ret + +sha256_kv: ; round-key-vector stored in ProgMem +.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c +.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b +.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9 +.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429 +.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272 +.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a +.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e +.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671 + + +;########################################################### + +.global sha256_init +;uint32_t sha256_init_vector[]={ +; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, +; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; +; +;void sha256_init(sha256_ctx_t *state){ +; state->length=0; +; memcpy(state->h, sha256_init_vector, 8*4); +;} +; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha256_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha256_init_vector)) + ldi r31, hi8((sha256_init_vector)) + ldi r22, 32+8 +sha256_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha256_init_vloop + ret + +sha256_init_vector: +.word 0xE667, 0x6A09 +.word 0xAE85, 0xBB67 +.word 0xF372, 0x3C6E +.word 0xF53A, 0xA54F +.word 0x527F, 0x510E +.word 0x688C, 0x9B05 +.word 0xD9AB, 0x1F83 +.word 0xCD19, 0x5BE0 +.word 0x0000, 0x0000 +.word 0x0000, 0x0000 + +;########################################################### + +.global rotl32 +; === ROTL32 === +; function that rotates a 32 bit word to the left +; param1: the 32-bit word to rotate +; given in r25,r24,r23,r22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotl32 +bitrotl: + clr r21 + clc +bitrotl_loop: + tst r20 + breq fixrotl +2: + rol r22 + rol r23 + rol r24 + rol r25 + rol r21 + dec r20 + brne 2b +fixrotl: + or r22, r21 + ret + + +;########################################################### + +.global rotr32 +; === ROTR32 === +; function that rotates a 32 bit word to the right +; param1: the 32-bit word to rotate +; given in r25,r24,r23,22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotr32: + cpi r20, 8 + brlo bitrotr + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r21 + subi r20, 8 + rjmp rotr32 +bitrotr: + clr r21 + clc +bitrotr_loop: + tst r20 + breq fixrotr +2: + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + dec r20 + brne 2b +fixrotr: + or r25, r21 + ret + + +;########################################################### + +.global change_endian32 +; === change_endian32 === +; function that changes the endianess of a 32-bit word +; param1: the 32-bit word +; given in r25,r24,r23,22 (r25 is most significant) +; modifys: r21, r22 +change_endian32: + movw r20, r22 ; (r22,r23) --> (r20,r21) + mov r22, r25 + mov r23, r24 + mov r24, r21 + mov r25, r20 + ret + diff --git a/dsa/sha256.h b/dsa/sha256.h new file mode 100644 index 0000000..24960a3 --- /dev/null +++ b/dsa/sha256.h @@ -0,0 +1,122 @@ +/* sha256.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha256.h + * \author Daniel Otte + * \date 2006-05-16 + * \license GPLv3 or later + * + */ + +#ifndef SHA256_H_ +#define SHA256_H_ + +#define __LITTLE_ENDIAN__ + + +#include + +/** \def SHA256_HASH_BITS + * defines the size of a SHA-256 hash value in bits + */ + +/** \def SHA256_HASH_BYTES + * defines the size of a SHA-256 hash value in bytes + */ + +/** \def SHA256_BLOCK_BITS + * defines the size of a SHA-256 input block in bits + */ + +/** \def SHA256_BLOCK_BYTES + * defines the size of a SHA-256 input block in bytes + */ + +#define SHA256_HASH_BITS 256 +#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8) +#define SHA256_BLOCK_BITS 512 +#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8) + +/** \typedef sha256_ctx_t + * \brief SHA-256 context type + * + * A variable of this type may hold the state of a SHA-256 hashing process + */ +typedef struct { + uint32_t h[8]; + uint64_t length; +} sha256_ctx_t; + +/** \typedef sha256_hash_t + * \brief SHA-256 hash value type + * + * A variable of this type may hold the hash value produced by the + * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function. + */ +typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES]; + +/** \fn void sha256_init(sha256_ctx_t *state) + * \brief initialise a SHA-256 context + * + * This function sets a ::sha256_ctx_t to the initial values for hashing. + * \param state pointer to the SHA-256 hashing context + */ +void sha256_init(sha256_ctx_t *state); + +/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block) + * \brief update the context with a given block + * + * This function updates the SHA-256 hash context by processing the given block + * of fixed length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + */ +void sha256_nextBlock (sha256_ctx_t* state, const void* block); + +/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b) + * \brief finalize the context with the given block + * + * This function finalizes the SHA-256 hash context by processing the given block + * of variable length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + * \param length_b the length of the block in bits + */ +void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b); + +/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) + * \brief convert the hash state into the hash value + * This function reads the context and writes the hash value to the destination + * \param dest pointer to the location where the hash value should be written + * \param state pointer to the SHA-256 hash context + */ +void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state); + +/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b) + * \brief simple SHA-256 hashing function for direct hashing + * + * This function automaticaly hashes a given message of arbitary length with + * the SHA-256 hashing algorithm. + * \param dest pointer to the location where the hash value is going to be written to + * \param msg pointer to the message thats going to be hashed + * \param length_b length of the message in bits + */ +void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b); + +#endif /*SHA256_H_*/ diff --git a/hfal-performance.c b/hfal-performance.c index 9bfaa85..47fccf8 100644 --- a/hfal-performance.c +++ b/hfal-performance.c @@ -27,6 +27,7 @@ #include "hfal-performance.h" #include "hashfunction_descriptor.h" +#include "stack_measuring.h" #include "cli.h" #include "performance_test.h" #include @@ -34,6 +35,8 @@ #include #include +#define PATTERN_A 0xAA +#define PATTERN_B 0x55 static void printvalue(unsigned long v){ @@ -128,6 +131,87 @@ void hfal_performance(const hfdesc_t* hd){ } } +void hfal_stacksize(const hfdesc_t* hd){ + hfdesc_t hf; + stack_measuring_ctx_t smctx; + memcpy_P(&hf, hd, sizeof(hfdesc_t)); + uint8_t ctx[hf.ctxsize_B]; + uint8_t data[(hf.blocksize_b+7)/8]; + uint8_t digest[(hf.hashsize_b+7)/8]; + uint16_t t1, t2; + uint8_t i; + + if(hf.type!=HFDESC_TYPE_HASHFUNCTION) + return; + cli_putstr_P(PSTR("\r\n\r\n === ")); + cli_putstr_P(hf.name); + cli_putstr_P(PSTR(" stack-usage === " + "\r\n type: hashfunction" + "\r\n hashsize (bits): ")); + printvalue(hf.hashsize_b); + + cli_putstr_P(PSTR("\r\n ctxsize (bytes): ")); + printvalue(hf.ctxsize_B); + + cli_putstr_P(PSTR("\r\n blocksize (bits): ")); + printvalue(hf.blocksize_b); + + cli(); + stack_measure_init(&smctx, PATTERN_A); + hf.init(&ctx); + t1 = stack_measure_final(&smctx); + stack_measure_init(&smctx, PATTERN_B); + hf.init(&ctx); + t1 = stack_measure_final(&smctx); + sei(); + + t1 = (t1>t2)?t1:t2; + cli_putstr_P(PSTR("\r\n init (bytes): ")); + printvalue((unsigned long)t1); + + cli(); + stack_measure_init(&smctx, PATTERN_A); + hf.nextBlock(&ctx, data); + t1 = stack_measure_final(&smctx); + stack_measure_init(&smctx, PATTERN_B); + hf.nextBlock(&ctx, data); + t1 = stack_measure_final(&smctx); + sei(); + + t1 = (t1>t2)?t1:t2; + cli_putstr_P(PSTR("\r\n nextBlock (bytes): ")); + printvalue((unsigned long)t1); + + cli(); + stack_measure_init(&smctx, PATTERN_A); + hf.lastBlock(&ctx, data, 0); + t1 = stack_measure_final(&smctx); + stack_measure_init(&smctx, PATTERN_B); + hf.lastBlock(&ctx, data, 0); + t1 = stack_measure_final(&smctx); + sei(); + + t1 = (t1>t2)?t1:t2; + cli_putstr_P(PSTR("\r\n lastBlock (bytes): ")); + printvalue((unsigned long)t1); + + cli(); + stack_measure_init(&smctx, PATTERN_A); + hf.ctx2hash(digest, &ctx); + t1 = stack_measure_final(&smctx); + stack_measure_init(&smctx, PATTERN_B); + hf.ctx2hash(digest, &ctx); + t1 = stack_measure_final(&smctx); + sei(); + + t1 = (t1>t2)?t1:t2; + cli_putstr_P(PSTR("\r\n ctx2hash (bytes): ")); + printvalue((unsigned long)t1); + + if(hf.free){ + hf.free(&ctx); + } +} void hfal_performance_multiple(const hfdesc_t** hd_list){ const hfdesc_t* hd; @@ -138,6 +222,7 @@ void hfal_performance_multiple(const hfdesc_t** hd_list){ return; } hfal_performance(hd); + hfal_stacksize(hd); hd_list = (void*)((uint8_t*)hd_list + 2); } } diff --git a/host/get_performance.rb b/host/get_performance.rb index 43096f2..b78ba99 100644 --- a/host/get_performance.rb +++ b/host/get_performance.rb @@ -79,27 +79,26 @@ end def readPerformanceVector(param) lb="" - buffer="" fname="" fout=0 begin lb = read_line() if lb.match(/End of performance figures/) return false - end - if m=lb.match(/=== (.*) performance ===/) - fout.close if fout!=0 - fname=$dir+m[1] - fname+="."+param if param != "" - fname+=".txt" - fout = File.open(fname, "w+") - printf("> %s \n", fname) - fout.write(lb) - else - if fout!=0 && lb!="" - fout.write(lb) - end - end + end + if m=lb.match(/=== (.*) performance ===/) + fout.close if fout!=0 + fname=$dir+m[1] + fname+="."+param if param != "" + fname+=".txt" + fout = File.open(fname, "w+") + printf("> %s \n", fname) + fout.write(lb) + else + if fout!=0 && lb!="" + fout.write(lb) + end + end end while true end diff --git a/host/performance2wiki.rb b/host/performance2wiki.rb index a60cde1..cfdc3ce 100644 --- a/host/performance2wiki.rb +++ b/host/performance2wiki.rb @@ -56,10 +56,26 @@ def process_hashfunction(fin, name) lb = fin.readline() m = lb.match(/ctx2hash \(cycles\):[\s]*([\d]*)/) convtime = m[1].to_i() + begin + lb = fin.readline() + end until m = lb.match(/init \(bytes\):[\s]*([\d]*)/) + initstack = m[1].to_i() + lb = fin.readline() + m = lb.match(/nextBlock \(bytes\):[\s]*([\d]*)/) + nextblockstack = m[1].to_i() + lb = fin.readline() + m = lb.match(/lastBlock \(bytes\):[\s]*([\d]*)/) + lastblockstack = m[1].to_i() + lb = fin.readline() + m = lb.match(/ctx2hash \(bytes\):[\s]*([\d]*)/) + convstack = m[1].to_i() + s1 = (initstack>nextblockstack)?initstack:nextblockstack + s2 = (lastblockstack>convstack)?lastblockstack:convstack + stack = (s1>s2)?s1:s2 - printf("| %20s || %3s || %3s || || %4d || || %4d || %4d ||" + + printf("| %20s || %3s || %3s || || %4d || %4d || %4d || %4d ||" + " %6d || %6d || %7.2f || %6d || || || \n|-\n" , - name, $lang, $lang ,ctxsize, hashsize, blocksize, + name, $lang, $lang ,ctxsize, stack, hashsize, blocksize, inittime, nextblocktime, nextblocktime.to_f/(blocksize/8), lastblocktime+convtime) end diff --git a/mkfiles/001_hfal_std.mk b/mkfiles/001_hfal_std.mk index 0404803..eed080c 100644 --- a/mkfiles/001_hfal_std.mk +++ b/mkfiles/001_hfal_std.mk @@ -1,2 +1,3 @@ HFAL_STD = nessie_common.o nessie_hash_test.o performance_test.o \ - hfal-basic.o hfal-performance.o hfal-nessie.o hfal-test.o shavs.o + hfal-basic.o hfal-performance.o hfal-nessie.o hfal-test.o shavs.o \ + stack_measuring.o diff --git a/mkfiles/bigint.mk b/mkfiles/bigint.mk index de7100e..7e3a4b7 100644 --- a/mkfiles/bigint.mk +++ b/mkfiles/bigint.mk @@ -1,8 +1,8 @@ # Makefile for BigInt ALGO_NAME := BIGINT -# comment out the following line for removement of base64 from the build process -ENCODINGS += $(ALGO_NAME) +# comment out the following line for removement of BigInt from the build process +AUX += $(ALGO_NAME) $(ALGO_NAME)_DIR := bigint/ $(ALGO_NAME)_OBJ := bigint.o bigint_io.o bigint_add_u.o diff --git a/mkfiles/dsa.mk b/mkfiles/dsa.mk new file mode 100644 index 0000000..b3b64c1 --- /dev/null +++ b/mkfiles/dsa.mk @@ -0,0 +1,13 @@ +# Makefile for DSA +ALGO_NAME := DSA + +# comment out the following line for removement of DSA from the build process +SIGNATURE += $(ALGO_NAME) + +$(ALGO_NAME)_DIR := dsa/ +$(ALGO_NAME)_OBJ := bigint.o bigint_io.o bigint_add_u.o sha1-asm.o dsa_sign.o dsa_verify.o dsa_key_blob.o base64_enc.o +$(ALGO_NAME)_TEST_BIN := main-dsa-test.o $(CLI_STD) hfal_sha1.o $(HFAL_STD) \ + noekeon_asm.o noekeon_prng.o memxor.o + +$(ALGO_NAME)_PERFORMANCE_TEST := performance + diff --git a/mkfiles/shabal.mk b/mkfiles/shabal.mk index e2600d4..853b88b 100644 --- a/mkfiles/shabal.mk +++ b/mkfiles/shabal.mk @@ -1,7 +1,7 @@ -# Makefile for Skein +# Makefile for Shabal ALGO_NAME := SHABAL -# comment out the following line for removement of Skein from the build process +# comment out the following line for removement of Shabal from the build process HASHES += $(ALGO_NAME) $(ALGO_NAME)_DIR := shabal/ diff --git a/mkfiles/shabal_c.mk b/mkfiles/shabal_c.mk index 5e8082c..d55c033 100644 --- a/mkfiles/shabal_c.mk +++ b/mkfiles/shabal_c.mk @@ -1,7 +1,7 @@ -# Makefile for Skein +# Makefile for Shabal ALGO_NAME := SHABAL_C -# comment out the following line for removement of Skein from the build process +# comment out the following line for removement of Shabal from the build process HASHES += $(ALGO_NAME) $(ALGO_NAME)_DIR := shabal/ diff --git a/stack_measuring.S b/stack_measuring.S new file mode 100644 index 0000000..a70f32d --- /dev/null +++ b/stack_measuring.S @@ -0,0 +1,96 @@ +/* stack_measuring.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +.extern __brkval + +.global stack_measure_init +stack_measure_init: + movw r30, r24 + lds r20, __brkval + lds r21, __brkval+1 + in r0, _SFR_IO_ADDR(SREG) + cli + in r26, _SFR_IO_ADDR(SPL) + out _SFR_IO_ADDR(SREG), r0 + in r27, _SFR_IO_ADDR(SPH) + st Z+, r20 + st Z+, r21 + st Z+, r26 + st Z+, r27 + st Z, r22 + movw r24, r26 + sub r24, r20 + sbc r25, r21 + sbiw r24, 1 + st X, r22 +1: st -X, r22 + sbiw r24, 1 + brne 1b + ret + +.global stack_measure_final +stack_measure_final: + movw r30, r24 + lds r20, __brkval + lds r21, __brkval+1 + ld r26, Z+ + ld r27, Z+ + cp r20, r26 + cpc r21, r27 + brlo 1f + movw r26, r20 +1: + adiw r26, 1 + ld r24, Z+ + ld r25, Z+ + ld r22, Z + sub r24, r26 + sbc r25, r27 + adiw r24, 2 +1: + sbiw r24, 1 + breq 2f + ld r20, X+ + cpse r20, r22 +2: ret + rjmp 1b + +/* for testing only +.global stack_measure_usestack +stack_measure_usestack: + adiw r24, 0 + brne 2f +1: ret +2: + in r0, _SFR_IO_ADDR(SREG) + cli + in r26, _SFR_IO_ADDR(SPL) + out _SFR_IO_ADDR(SREG), r0 + in r27, _SFR_IO_ADDR(SPH) + st X, r22 + sbiw r24, 1 + breq 1b +3: st -X, r22 + sbiw r24, 1 + breq 1b + rjmp 3b + + */ diff --git a/stack_measuring.h b/stack_measuring.h new file mode 100644 index 0000000..23ca63e --- /dev/null +++ b/stack_measuring.h @@ -0,0 +1,36 @@ +/* stack_measuring.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef STACK_MEASURING_H_ +#define STACK_MEASURING_H_ + +#include + +typedef struct { + uint16_t heap_top; + uint16_t stack_bottom; + uint8_t pattern; +} stack_measuring_ctx_t; + +void stack_measure_init(stack_measuring_ctx_t* ctx, uint8_t pattern); +uint16_t stack_measure_final(const stack_measuring_ctx_t* ctx); +/* for testing only + void stack_measure_usestack(uint16_t size, uint8_t value); */ + +#endif /* STACK_MEASURING_H_ */ diff --git a/test_src/main-bigint-test.c b/test_src/main-bigint-test.c index 5b65d9e..8fd4136 100644 --- a/test_src/main-bigint-test.c +++ b/test_src/main-bigint-test.c @@ -1,7 +1,7 @@ -/* main-base64-test.c */ +/* main-bigint-test.c */ /* This file is part of the AVR-Crypto-Lib. - Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de) + Copyright (C) 2008, 2009, 2010 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ along with this program. If not, see . */ /* - * base64 test-suit + * bigint test-suit * */ diff --git a/test_src/main-bmw-test.c b/test_src/main-bmw-test.c index 1aadbf1..2da30a0 100644 --- a/test_src/main-bmw-test.c +++ b/test_src/main-bmw-test.c @@ -157,19 +157,19 @@ const char shavs_test1_str[] PROGMEM = "shavs_test1"; const char shavs_test3_str[] PROGMEM = "shavs_test3"; cmdlist_entry_t cmdlist[] PROGMEM = { - { nessie_str, NULL, testrun_nessie_bmw}, - { test_str, NULL, testrun_stdtest_bmw}, - { testshort_str, NULL, testshort}, - { testlshort_str, NULL, testlshort}, - { test506_str, NULL, test506}, - { test507_str, NULL, test507}, - { performance_str, NULL, performance_bmw}, - { shavs_list_str, NULL, shavs_listalgos}, - { shavs_set_str, (void*)1, (void_fpt)shavs_setalgo}, - { shavs_test1_str, NULL, shavs_test1}, - { shavs_test3_str, NULL, shavs_test3}, - { echo_str, (void*)1, (void_fpt)echo_ctrl}, - { NULL, NULL, NULL} + { nessie_str, NULL, testrun_nessie_bmw }, + { test_str, NULL, testrun_stdtest_bmw }, + { testshort_str, NULL, testshort }, + { testlshort_str, NULL, testlshort }, + { test506_str, NULL, test506 }, + { test507_str, NULL, test507 }, + { performance_str, NULL, performance_bmw }, + { shavs_list_str, NULL, shavs_listalgos }, + { shavs_set_str, (void*)1, (void_fpt)shavs_setalgo }, + { shavs_test1_str, NULL, shavs_test1 }, + { shavs_test3_str, NULL, shavs_test3 }, + { echo_str, (void*)1, (void_fpt)echo_ctrl }, + { NULL, NULL, NULL } }; int main (void){ diff --git a/test_src/main-dsa-test.c b/test_src/main-dsa-test.c new file mode 100644 index 0000000..acbffa9 --- /dev/null +++ b/test_src/main-dsa-test.c @@ -0,0 +1,191 @@ +/* main-dsa-test.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * DSA test-suit + * +*/ + +#include "config.h" + +#include "uart_i.h" +#include "debug.h" + +#include "noekeon.h" +#include "noekeon_prng.h" +#include "bigint.h" +#include "bigint_io.h" +#include "dsa.h" +#include "dsa_key_blob.h" + +#include "cli.h" +#include "performance_test.h" +#include "hfal_sha1.h" +#include "base64_enc.h" +#include "base64_dec.h" +#include +#include +#include + +char* algo_name = "DSA"; + +/***************************************************************************** + * additional validation-functions * + *****************************************************************************/ + +dsa_ctx_t dsa_ctx; + +void load_fix_dsa(void){ + load_dsa_key_blob(&dsa_ctx); +} + +void dsa_print_item(bigint_t* a, PGM_P pstr){ + uint8_t *p; + cli_putstr_P(PSTR("\r\n")); + cli_putstr_P(pstr); + cli_putstr_P(PSTR(": ")); + uint16_t i; + p = a->wordv + a->length_B -1; + for(i=0; ilength_B-1; ++i){ + if(i%16==0){ + cli_putstr_P(PSTR("\r\n ")); + } + cli_hexdump(p, 1); + cli_putc(':'); + --p; + } + if(i%16==0){ + cli_putstr_P(PSTR("\r\n ")); + } + cli_hexdump(p, 1); +} + +void dsa_print_signature_b64(dsa_signature_t* s){ + uint16_t size_r, size_s, size_o, i,j; + size_r = s->r.length_B +2; + size_s = s->s.length_B +2; + size_o = size_r + size_s +2; + uint8_t bin_b[size_o]; + bin_b[0] = 0x30; + bin_b[1] = size_o -2; + bin_b[2] = 0x02; + bin_b[3] = size_r-2; + j=4; + for(i=s->r.length_B; i>0; --i){ + bin_b[j++] = s->r.wordv[i-1]; + } + bin_b[j++] = 0x02; + bin_b[j++] = size_s -2; + for(i=s->s.length_B; i>0; --i){ + bin_b[j++] = s->s.wordv[i-1]; + } + char b64_b[size_o*4/3+5]; + base64enc(b64_b, bin_b, size_o); + cli_putstr(b64_b); +} + +void dsa_print_ctx(dsa_ctx_t* ctx){ + dsa_print_item(&(ctx->priv), PSTR("private")); + dsa_print_item(&(ctx->pub), PSTR("public")); + dsa_print_item(&(ctx->domain.p), PSTR("P")); + dsa_print_item(&(ctx->domain.q), PSTR("Q")); + dsa_print_item(&(ctx->domain.g), PSTR("G")); +} + +void dsa_print_signature(const dsa_signature_t* sig){ + cli_putstr_P(PSTR("\r\nDSA-Signature:\r\n r:")); + bigint_print_hex(&(sig->r)); + cli_putstr_P(PSTR("\r\n s:")); + bigint_print_hex(&(sig->s)); +} + +void quick_test(void){ + dsa_signature_t dsa_sig; + uint8_t i, t=0, message[] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}; + load_fix_dsa(); + uint8_t dsa_sig_s_b[dsa_ctx.domain.q.length_B], + dsa_sig_r_b[dsa_ctx.domain.q.length_B]; + dsa_print_ctx(&dsa_ctx); + dsa_sig.r.wordv = dsa_sig_r_b; + dsa_sig.s.wordv = dsa_sig_s_b; + cli_putstr_P(PSTR("\r\n\r\n=== DSA QUICK TEST ===")); + for(i=0; i<8; ++i){ + cli_putstr_P(PSTR("\r\n")); + cli_putc('1'+i); + cli_putstr_P(PSTR(": message: ")); + if (i){ + cli_hexdump(message, i); + }else{ + cli_putstr_P(PSTR("")); + } + cli_putstr_P(PSTR("\r\n computing signature ... ")); + dsa_sign_message(&dsa_sig, message, i*8, &sha1_desc, &dsa_ctx, random8); + dsa_print_signature(&dsa_sig); + cli_putstr_P(PSTR("\r\n base64:\r\n--- SIGNATURE ---\r\n ")); + dsa_print_signature_b64(&dsa_sig); + cli_putstr_P(PSTR("\r\n verifying signature ... ")); + t = dsa_verify_message(&dsa_sig, message, i*8, &sha1_desc, &dsa_ctx); + cli_putstr_P(PSTR("\r\n verification: ")); + if(t==DSA_SIGNATURE_OK){ + cli_putstr_P(PSTR("[PASS]")); + }else{ + cli_putstr_P(PSTR("[FAIL]")); + } + } +} + +void reset_prng(void){ + uint8_t buf[16]; + memset(buf, 0, 16); + random_seed(buf); + cli_putstr_P(PSTR("\r\nPRNG reset")); +} + +void testrun_performance_bigint(void){ + +} +/***************************************************************************** + * main * + *****************************************************************************/ + +const char echo_test_str[] PROGMEM = "echo-test"; +const char reset_prng_str[] PROGMEM = "reset-prng"; +const char quick_test_str[] PROGMEM = "quick-test"; +const char performance_str[] PROGMEM = "performance"; +const char echo_str[] PROGMEM = "echo"; + +cmdlist_entry_t cmdlist[] PROGMEM = { + { reset_prng_str, NULL, reset_prng }, + { quick_test_str, NULL, quick_test }, + { performance_str, NULL, testrun_performance_bigint }, + { echo_str, (void*)1, (void_fpt)echo_ctrl }, + { NULL, NULL, NULL } +}; + +int main (void){ + DEBUG_INIT(); + + cli_rx = (cli_rx_fpt)uart0_getc; + cli_tx = (cli_tx_fpt)uart0_putc; + for(;;){ + cli_putstr_P(PSTR("\r\n\r\nCrypto-VS (")); + cli_putstr(algo_name); + cli_putstr_P(PSTR(")\r\nloaded and running\r\n")); + cmd_interface(cmdlist); + } +}