From c9c11514d91b8c19f77d65ac051b998bd99048b0 Mon Sep 17 00:00:00 2001
From: bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
Date: Thu, 18 Mar 2010 18:53:47 +0000
Subject: [PATCH] stack usage measurement

---
 Makefile                    |   12 +-
 base64/base64_dec.c         |    2 +-
 base64/base64_dec.h         |    2 +-
 base64/base64_enc.c         |    2 +-
 base64/base64_enc.h         |    2 +-
 bigint/bigint.c             |  106 ++--
 bigint/bigint.h             |    4 +-
 dsa/base64_dec.c            |  246 +++++++++
 dsa/base64_dec.h            |   29 +
 dsa/base64_enc.c            |  117 ++++
 dsa/base64_enc.h            |   28 +
 dsa/bigint.c                |  808 +++++++++++++++++++++++++++
 dsa/bigint.h                |   68 +++
 dsa/bigint_add_u.S          |  137 +++++
 dsa/bigint_io.c             |  135 +++++
 dsa/bigint_io.h             |   28 +
 dsa/dsa.h                   |   59 ++
 dsa/dsa_key_blob.c          |  304 ++++++++++
 dsa/dsa_key_blob.h          |   25 +
 dsa/dsa_sign.c              |   73 +++
 dsa/dsa_verify.c            |   75 +++
 dsa/memxor.S                |   66 +++
 dsa/memxor.h                |    7 +
 dsa/noekeon.h               |   85 +++
 dsa/noekeon_asm.S           |  641 +++++++++++++++++++++
 dsa/noekeon_prng.c          |   75 +++
 dsa/noekeon_prng.h          |   40 ++
 dsa/sha1-asm.S              |  883 +++++++++++++++++++++++++++++
 dsa/sha1.h                  |  117 ++++
 dsa/sha256-asm.S            | 1040 +++++++++++++++++++++++++++++++++++
 dsa/sha256.h                |  122 ++++
 hfal-performance.c          |   85 +++
 host/get_performance.rb     |   29 +-
 host/performance2wiki.rb    |   20 +-
 mkfiles/001_hfal_std.mk     |    3 +-
 mkfiles/bigint.mk           |    4 +-
 mkfiles/dsa.mk              |   13 +
 mkfiles/shabal.mk           |    4 +-
 mkfiles/shabal_c.mk         |    4 +-
 stack_measuring.S           |   96 ++++
 stack_measuring.h           |   36 ++
 test_src/main-bigint-test.c |    6 +-
 test_src/main-bmw-test.c    |   26 +-
 test_src/main-dsa-test.c    |  191 +++++++
 44 files changed, 5765 insertions(+), 90 deletions(-)
 create mode 100644 dsa/base64_dec.c
 create mode 100644 dsa/base64_dec.h
 create mode 100644 dsa/base64_enc.c
 create mode 100644 dsa/base64_enc.h
 create mode 100644 dsa/bigint.c
 create mode 100644 dsa/bigint.h
 create mode 100644 dsa/bigint_add_u.S
 create mode 100644 dsa/bigint_io.c
 create mode 100644 dsa/bigint_io.h
 create mode 100644 dsa/dsa.h
 create mode 100644 dsa/dsa_key_blob.c
 create mode 100644 dsa/dsa_key_blob.h
 create mode 100644 dsa/dsa_sign.c
 create mode 100644 dsa/dsa_verify.c
 create mode 100644 dsa/memxor.S
 create mode 100644 dsa/memxor.h
 create mode 100644 dsa/noekeon.h
 create mode 100644 dsa/noekeon_asm.S
 create mode 100644 dsa/noekeon_prng.c
 create mode 100644 dsa/noekeon_prng.h
 create mode 100644 dsa/sha1-asm.S
 create mode 100644 dsa/sha1.h
 create mode 100644 dsa/sha256-asm.S
 create mode 100644 dsa/sha256.h
 create mode 100644 mkfiles/dsa.mk
 create mode 100644 stack_measuring.S
 create mode 100644 stack_measuring.h
 create mode 100644 test_src/main-dsa-test.c

diff --git a/Makefile b/Makefile
index 561f84e..97c56d5 100644
--- a/Makefile
+++ b/Makefile
@@ -8,6 +8,8 @@ HASHES         :=
 MACS           :=
 PRNGS          :=
 ENCODINGS      :=
+SIGNATURE      :=
+PK_CIPHERS     :=
 AUX            :=
 
 # we use the gnu make standard library
@@ -20,7 +22,7 @@ include mkfiles/*.mk
 
 #-------------------------------------------------------------------------------
 ALGORITHMS = $(BLOCK_CIPHERS) $(STREAM_CIPHERS) $(HASHES) $(PRNGS) $(MACS) \
-			 $(ENCODINGS) $(AUX)
+			 $(ENCODINGS) $(SIGNATURE) $(PK_CIPHERS) $(AUX)
 ALGORITHMS_OBJ = $(patsubst %,%_OBJ, $(ALGORITHMS))
 ALGORITHMS_TEST_BIN = $(patsubst %,%_TEST_BIN, $(ALGORITHMS))
 
@@ -250,8 +252,14 @@ info:
 	@echo "    $(MACS)"
 	@echo "  PRNG functions:"
 	@echo "    $(PRNGS)"
+	@echo "  signature functions:"
+	@echo "    $(SIGNATURE)"
+	@echo "  public key ciphers:"
+	@echo "    $(PK_CIPHERS)"
 	@echo "  encodings:"
 	@echo "    $(ENCODINGS)"
+	@echo "  auxiliary functions:"
+	@echo "    $(AUX)"
 	@echo " targets:"
 	@echo "  all           - all algorithm cores"
 	@echo "  cores         - all algorithm cores"
@@ -264,6 +272,8 @@ info:
 	@echo "  macs          - all MAC cores"
 	@echo "  prngs         - all PRNG cores"
 	@echo "  all_testrun   - testrun all algorithms"
+	@echo "  hash_size     - measure size of all hash functions"
+	@echo "  hash_speed    - measure performance of all hash functions"
 	@echo "  docu          - build doxygen documentation"
 	@echo "  clean         - remove a lot of builded files"
 	@echo "  depclean      - also remove dependency files"
diff --git a/base64/base64_dec.c b/base64/base64_dec.c
index f057f54..322cec0 100644
--- a/base64/base64_dec.c
+++ b/base64/base64_dec.c
@@ -187,7 +187,7 @@ int base64_binlength(char* str, uint8_t strict){
 
 */
 
-int base64dec(void* dest, char* b64str, uint8_t strict){
+int base64dec(void* dest, const char* b64str, uint8_t strict){
 	uint8_t buffer[4];
 	uint8_t idx=0;
 	uint8_t term=0;
diff --git a/base64/base64_dec.h b/base64/base64_dec.h
index 39beff8..1c9f1d9 100644
--- a/base64/base64_dec.h
+++ b/base64/base64_dec.h
@@ -24,6 +24,6 @@
 #include <stdint.h>
 
 int base64_binlength(char* str, uint8_t strict);
-int base64dec(void* dest, char* b64str, uint8_t strict);
+int base64dec(void* dest, const char* b64str, uint8_t strict);
 
 #endif /*BASE64_DEC_H_*/
diff --git a/base64/base64_enc.c b/base64/base64_enc.c
index 400f25c..c588998 100644
--- a/base64/base64_enc.c
+++ b/base64/base64_enc.c
@@ -75,7 +75,7 @@ char bit6toAscii(uint8_t a){
 
 #endif
 
-void base64enc(char* dest, void* src, uint16_t length){
+void base64enc(char* dest,const void* src, uint16_t length){
 	uint16_t i,j;
 	uint8_t a[4];
 	for(i=0; i<length/3; ++i){
diff --git a/base64/base64_enc.h b/base64/base64_enc.h
index 9065132..89a5f86 100644
--- a/base64/base64_enc.h
+++ b/base64/base64_enc.h
@@ -23,6 +23,6 @@
 
 #include <stdint.h>
 
-void base64enc(char* dest, void* src, uint16_t length);
+void base64enc(char* dest, const void* src, uint16_t length);
 
 #endif /*BASE64_ENC_H_*/
diff --git a/bigint/bigint.c b/bigint/bigint.c
index f57d285..7004e0d 100644
--- a/bigint/bigint.c
+++ b/bigint/bigint.c
@@ -26,9 +26,16 @@
  */
  
 
+#define STRING2(x) #x
+#define STRING(x) STRING2(x)
+#define STR_LINE STRING(__LINE__)
+
 #include "bigint.h"
 #include <string.h>
-
+/*
+#include "cli.h"
+#include "bigint_io.h"
+*/
 #ifndef MAX
  #define MAX(a,b) (((a)>(b))?(a):(b))
 #endif
@@ -388,6 +395,10 @@ void bigint_set_zero(bigint_t* a){
 /* using the Karatsuba-Algorithm */
 /* x*y = (xh*yh)*b**2n + ((xh+xl)*(yh+yl) - xh*yh - xl*yl)*b**n + yh*yl */
 void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	if(a->length_B==0 || b->length_B==0){
+		bigint_set_zero(dest);
+		return;
+	}
 	if(dest==a || dest==b){
 		bigint_t d;
 		uint8_t d_b[a->length_B+b->length_B];
@@ -396,10 +407,6 @@ void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){
 		bigint_copy(dest, &d);
 		return;
 	}
-	if(a->length_B==0 || b->length_B==0){
-		bigint_set_zero(dest);
-		return;
-	}
 	if(a->length_B==1 || b->length_B==1){
 		if(a->length_B!=1){
 			XCHG_PTR(a,b);
@@ -575,7 +582,6 @@ void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale){
 	}
 	while(borrow){
 		if(i+1 > a->length_B){
-			cli_hexdump_rev(&bitscale, 2);
 			bigint_set_zero(a);
 			return;
 		}
@@ -591,19 +597,18 @@ void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale){
 /******************************************************************************/
 
 void bigint_reduce(bigint_t* a, const bigint_t* r){
+//	bigint_adjust(r);
 	uint8_t rfbs = GET_FBS(r);
 
-	if(r->length_B==0){
+	if(r->length_B==0 || a->length_B==0){
 		return;
 	}
 	while(a->length_B > r->length_B){
 		bigint_sub_u_bitscale(a, r, (a->length_B-r->length_B)*8+GET_FBS(a)-rfbs-1);
 	}
-
 	while((GET_FBS(a) > rfbs+1) && (a->length_B == r->length_B)){
 		bigint_sub_u_bitscale(a, r, GET_FBS(a)-rfbs-1);
 	}
-
 	while(bigint_cmp_u(a,r)>=0){
 		bigint_sub_u(a,a,r);
 	}
@@ -615,45 +620,46 @@ void bigint_reduce(bigint_t* a, const bigint_t* r){
 /* calculate dest = a**exp % r */
 /* using square&multiply */
 void bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r){
-	bigint_t tmp, tmp2, x;
-	uint8_t x_b[MAX(r->length_B, a->length_B)], tmp_b[r->length_B*2], tmp2_b[r->length_B*2];
-	int16_t i;
-	uint8_t j;
-	x.wordv = x_b;
-	tmp.wordv = tmp_b;
-	tmp2.wordv = tmp2_b;
-	bigint_copy(&x, a);
-	bigint_reduce(&x, r);
-	bigint_copy(&tmp, &x);
-	if(a->length_B==0 || exp->length_B==0 || r->length_B==0){
+	if(a->length_B==0 || r->length_B==0){
 		return;
 	}
-	i=exp->length_B-1;
-	if(exp->wordv[i]!=1){
-		for(j=1<<(GET_FBS(exp)-1); j>0; j>>=1){
-			bigint_square(&tmp2, &tmp);
-			bigint_reduce(&tmp2, r);
-			if(exp->wordv[i]&j){
-				bigint_mul_u(&tmp, &tmp2, &x);
-				bigint_reduce(&tmp, r);
-			}else{
-				bigint_copy(&tmp, &tmp2);
+
+	bigint_t res, base;
+	uint8_t base_b[MAX(a->length_B,r->length_B*2)], res_b[r->length_B*2];
+	uint16_t i;
+	uint8_t j, t;
+	res.wordv = res_b;
+	base.wordv = base_b;
+	bigint_copy(&base, a);
+	bigint_reduce(&base, r);
+	res.wordv[0]=1;
+	res.length_B=1;
+	res.info = 0;
+	bigint_adjust(&res);
+	for(i=0; i+1<exp->length_B; ++i){
+		t=exp->wordv[i];
+		for(j=0; j<8; ++j){
+			if(t&1){
+				bigint_mul_u(&res, &res, &base);
+				bigint_reduce(&res, r);
 			}
+			bigint_square(&base, &base);
+			bigint_reduce(&base, r);
+			t>>=1;
 		}
 	}
-	for(--i; i>=0; --i){
-		for(j=0x80; j>0; j>>=1){
-			bigint_square(&tmp2, &tmp);
-			bigint_reduce(&tmp2, r);
-			if(exp->wordv[i]&j){
-				bigint_mul_u(&tmp, &tmp2, &x);
-				bigint_reduce(&tmp, r);
-			}else{
-				bigint_copy(&tmp, &tmp2);
-			}
+	t=exp->wordv[i];
+	while(t){
+		if(t&1){
+			bigint_mul_u(&res, &res, &base);
+			bigint_reduce(&res, r);
 		}
+		bigint_square(&base, &base);
+		bigint_reduce(&base, r);
+		t>>=1;
 	}
-	bigint_copy(dest, &tmp);
+	SET_POS(&res);
+	bigint_copy(dest, &res);
 }
 
 /******************************************************************************/
@@ -755,14 +761,28 @@ void bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, c
 
 /******************************************************************************/
 
-void bigint_inverse(bigint_t* dest, bigint_t* a, bigint_t* m){
+void bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m){
 	bigint_gcdext(NULL, dest, NULL, a, m);
 	while(dest->info&BIGINT_NEG_MASK){
 		bigint_add_s(dest, dest, m);
 	}
 }
 
-
+/******************************************************************************/
+
+void bigint_changeendianess(bigint_t* a){
+	uint8_t t, *p, *q;
+	p = a->wordv;
+	q = p+a->length_B-1;
+	while(p<q){
+		t = *p;
+		*p = *q;
+		*q = t;
+		++p; --q;
+	}
+}
+
+/******************************************************************************/
 
 
 
diff --git a/bigint/bigint.h b/bigint/bigint.h
index 7a7702f..642b4e0 100644
--- a/bigint/bigint.h
+++ b/bigint/bigint.h
@@ -61,8 +61,8 @@ void   bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale);
 void   bigint_reduce(bigint_t* a, const bigint_t* r);
 void   bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r);
 void   bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, const bigint_t* y);
-void   bigint_inverse(bigint_t* dest, bigint_t* a, bigint_t* m);
-
+void   bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m);
+void   bigint_changeendianess(bigint_t* a);
 /******************************************************************************/
 
 #endif /*BIGINT_H_*/
diff --git a/dsa/base64_dec.c b/dsa/base64_dec.c
new file mode 100644
index 0000000..322cec0
--- /dev/null
+++ b/dsa/base64_dec.c
@@ -0,0 +1,246 @@
+/* base64_dec.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * base64 decoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include "base64_dec.h"
+
+#include "test_src/cli.h"
+
+/*
+ #define USE_GCC_EXTENSION
+*/
+#if 1
+
+#ifdef USE_GCC_EXTENSION
+
+static
+int ascii2bit6(char a){
+	switch(a){
+		case 'A'...'Z':
+			return a-'A';
+		case 'a'...'z':
+			return a-'a'+26;
+		case '0'...'9':
+			return a-'0'+52;
+		case '+':
+		case '-':
+			return 62;
+		case '/':
+		case '_':
+			return 63;
+		default:
+			return -1;
+	}
+}
+
+#else
+
+static
+uint8_t ascii2bit6(char a){
+	int r;
+	switch(a>>4){
+		case 0x5:
+		case 0x4: 
+			r=a-'A';
+			if(r<0 || r>25){
+				return -1;
+			} else {
+				return r;
+			}
+		case 0x7:
+		case 0x6: 
+			r=a-'a';
+			if(r<0 || r>25){
+				return -1;
+			} else {
+				return r+26;
+			}
+			break;
+		case 0x3:
+			if(a>'9')
+				return -1;
+			return a-'0'+52;
+		default:
+			break;	
+	}
+	switch (a){
+		case '+':
+		case '-':
+			return 62;
+		case '/':
+		case '_':
+			return 63;
+		default:
+			return 0xff;
+	}
+}
+
+#endif
+
+#else
+
+static 
+uint8_t ascii2bit6(uint8_t a){
+	if(a>='A' && a<='Z'){
+		return a-'A';
+	} else {
+		if(a>='a' && a<= 'z'){
+			return a-'a'+26;
+		} else {
+			if(a>='0' && a<='9'){
+				return a-'0'+52;
+			} else {
+				if(a=='+' || a=='-'){
+					return 62;
+				} else {
+					if(a=='/' || a=='_'){
+						return 63;
+					} else {
+						return 0xff;
+					}
+				}
+			}
+		}
+	}
+}
+
+#endif
+
+int base64_binlength(char* str, uint8_t strict){
+	int l=0;
+	uint8_t term=0;
+	for(;;){
+		if(*str=='\0')
+			break;
+		if(*str=='\n' || *str=='\r'){
+			str++;
+			continue;
+		}
+		if(*str=='='){
+			term++;
+			str++;
+			if(term==2){
+				break;
+			}
+			continue;
+		}
+		if(term)
+			return -1;
+		if(ascii2bit6(*str)==-1){
+			if(strict)
+				return -1;
+		} else {
+			l++;
+		}
+		str++;
+	}
+	switch(term){
+		case 0:
+			if(l%4!=0)
+				return -1;
+			return l/4*3;
+		case 1:
+			if(l%4!=3)
+				return -1;
+			return (l+1)/4*3-1;
+		case 2:
+			if(l%4!=2)
+				return -1;
+			return (l+2)/4*3-2;
+		default:
+			return -1;
+	}
+}
+
+/*
+  |543210543210543210543210|
+  |765432107654321076543210|
+
+        .      .      .     .
+  |54321054|32105432|10543210|
+  |76543210|76543210|76543210|
+
+*/
+
+int base64dec(void* dest, const char* b64str, uint8_t strict){
+	uint8_t buffer[4];
+	uint8_t idx=0;
+	uint8_t term=0;
+	for(;;){
+//		cli_putstr_P(PSTR("\r\n  DBG: got 0x"));
+//		cli_hexdump(b64str, 1);
+		buffer[idx]= ascii2bit6(*b64str);
+//		cli_putstr_P(PSTR(" --> 0x"));
+//		cli_hexdump(buffer+idx, 1);
+		
+		if(buffer[idx]==0xFF){
+			if(*b64str=='='){
+				term++;
+				b64str++;
+				if(term==2)
+					goto finalize; /* definitly the end */
+			}else{
+				if(*b64str == '\0'){
+					goto finalize; /* definitly the end */
+				}else{
+					if(*b64str == '\r' || *b64str == '\n' || !(strict)){
+						b64str++; /* charcters that we simply ignore */
+					}else{
+						return -1;
+					}
+				}
+			}
+		}else{
+			if(term)
+				return -1; /* this happens if we get a '=' in the stream */
+			idx++;
+			b64str++;
+		}
+		if(idx==4){
+			((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+			((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;
+			((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3];
+			dest = (uint8_t*)dest +3;
+			idx=0;
+		}
+	}
+  finalize:	
+	/* the final touch */
+	if(idx==0)
+		return 0;
+	if(term==1){
+		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+		((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;			
+		return 0;
+	}
+	if(term==2){
+		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+		return 0;
+	}
+	return -1;
+}
diff --git a/dsa/base64_dec.h b/dsa/base64_dec.h
new file mode 100644
index 0000000..1c9f1d9
--- /dev/null
+++ b/dsa/base64_dec.h
@@ -0,0 +1,29 @@
+/* base64_dec.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef BASE64_DEC_H_
+#define BASE64_DEC_H_
+
+#include <stdint.h>
+
+int base64_binlength(char* str, uint8_t strict);
+int base64dec(void* dest, const char* b64str, uint8_t strict);
+
+#endif /*BASE64_DEC_H_*/
diff --git a/dsa/base64_enc.c b/dsa/base64_enc.c
new file mode 100644
index 0000000..c588998
--- /dev/null
+++ b/dsa/base64_enc.c
@@ -0,0 +1,117 @@
+/* base64_enc.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * base64 encoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include "base64_enc.h"
+
+#if 1
+#include <avr/pgmspace.h>
+
+char base64_alphabet[64] PROGMEM = {
+	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
+	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
+	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
+	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
+	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
+	'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
+	'w', 'x', 'y', 'z', '0', '1', '2', '3', 
+	'4', '5', '6', '7', '8', '9', '+', '/' }; 
+
+static 
+char bit6toAscii(uint8_t a){
+	a &= (uint8_t)0x3F;
+	return pgm_read_byte(base64_alphabet+a);
+}
+
+#else
+
+static 
+char bit6toAscii(uint8_t a){
+	a &= (uint8_t)0x3F;
+	
+	if(a<=25){
+		return a+'A';
+	} else {
+		if(a<=51){
+			return a-26+'a';
+		} else {
+			if(a<=61){
+				return a-52+'0';
+			} else {
+				if(a==62){
+					return '+';
+				} else {
+					return '/'; /* a == 63 */
+				}
+			}
+		}
+	}
+}
+
+#endif
+
+void base64enc(char* dest,const void* src, uint16_t length){
+	uint16_t i,j;
+	uint8_t a[4];
+	for(i=0; i<length/3; ++i){
+		a[0]= (((uint8_t*)src)[i*3+0])>>2;
+		a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+		a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F;
+		a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F;
+		for(j=0; j<4; ++j){
+			*dest++=bit6toAscii(a[j]);
+		}
+	}
+	/* now we do the rest */
+	switch(length%3){
+		case 0: 
+			break;
+		case 1:
+			a[0]=(((uint8_t*)src)[i*3+0])>>2;
+			a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F;
+			*dest++ = bit6toAscii(a[0]);
+			*dest++ = bit6toAscii(a[1]);
+			*dest++ = '=';
+			*dest++ = '=';
+			break;
+		case 2:		
+			a[0]= (((uint8_t*)src)[i*3+0])>>2;
+			a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+			a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F;
+			*dest++ = bit6toAscii(a[0]);
+			*dest++ = bit6toAscii(a[1]);
+			*dest++ = bit6toAscii(a[2]);
+			*dest++ = '=';
+			break;
+		default: /* this will not happen! */
+			break;	
+	}
+/*  finalize: */
+  	*dest='\0';
+}
+
diff --git a/dsa/base64_enc.h b/dsa/base64_enc.h
new file mode 100644
index 0000000..89a5f86
--- /dev/null
+++ b/dsa/base64_enc.h
@@ -0,0 +1,28 @@
+/* base64_enc.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef BASE64_ENC_H_
+#define BASE64_ENC_H_
+
+#include <stdint.h>
+
+void base64enc(char* dest, const void* src, uint16_t length);
+
+#endif /*BASE64_ENC_H_*/
diff --git a/dsa/bigint.c b/dsa/bigint.c
new file mode 100644
index 0000000..7004e0d
--- /dev/null
+++ b/dsa/bigint.c
@@ -0,0 +1,808 @@
+/* bigint.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file		bigint.c
+ * \author		Daniel Otte
+ * \date		2010-02-22
+ * 
+ * \license	    GPLv3 or later
+ * 
+ */
+ 
+
+#define STRING2(x) #x
+#define STRING(x) STRING2(x)
+#define STR_LINE STRING(__LINE__)
+
+#include "bigint.h"
+#include <string.h>
+/*
+#include "cli.h"
+#include "bigint_io.h"
+*/
+#ifndef MAX
+ #define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+#ifndef MIN
+ #define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+
+#define SET_FBS(a, v) do{(a)->info &=0xF8; (a)->info |= (v);}while(0)
+#define GET_FBS(a)   ((a)->info&BIGINT_FBS_MASK)
+#define SET_NEG(a)   (a)->info |= BIGINT_NEG_MASK
+#define SET_POS(a)   (a)->info &= ~BIGINT_NEG_MASK
+#define XCHG(a,b)    do{(a)^=(b); (b)^=(a); (a)^=(b);}while(0)
+#define XCHG_PTR(a,b)    do{ a = (void*)(((uint16_t)(a)) ^ ((uint16_t)(b))); \
+	                         b = (void*)(((uint16_t)(a)) ^ ((uint16_t)(b))); \
+	                         a = (void*)(((uint16_t)(a)) ^ ((uint16_t)(b)));}while(0)
+
+#define GET_SIGN(a) ((a)->info&BIGINT_NEG_MASK)
+
+/******************************************************************************/
+void bigint_adjust(bigint_t* a){
+	while(a->length_B!=0 && a->wordv[a->length_B-1]==0){
+		a->length_B--;
+	}
+	if(a->length_B==0){
+		a->info=0;
+		return;
+	}
+	uint8_t t;
+	uint8_t i = 0x07;
+	t = a->wordv[a->length_B-1];
+	while((t&0x80)==0 && i){
+		t<<=1;
+		i--;
+	}
+	SET_FBS(a, i);
+}
+
+/******************************************************************************/
+
+void bigint_copy(bigint_t* dest, const bigint_t* src){
+	memcpy(dest->wordv, src->wordv, src->length_B);
+	dest->length_B = src->length_B;
+	dest->info = src->info;
+}
+
+/******************************************************************************/
+
+/* this should be implemented in assembly */
+/*
+void bigint_add_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	uint16_t t=0, i;
+	if(a->length_B < b->length_B){
+		XCHG_PTR(a,b);
+	}
+	for(i=0; i<b->length_B; ++i){
+		t = a->wordv[i] + b->wordv[i] + t;
+		dest->wordv[i] = (uint8_t)t;
+		t>>=8;
+	}
+	for(; i<a->length_B; ++i){
+		t = a->wordv[i] + t;
+		dest->wordv[i] = (uint8_t)t;
+		t>>=8;
+	}
+	dest->wordv[i++] = t;
+	dest->length_B = i;
+	bigint_adjust(dest);
+}
+*/
+/******************************************************************************/
+
+/* this should be implemented in assembly */
+void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale){
+	uint16_t i,j=0;
+	uint16_t t=0;
+	if(scale>dest->length_B)
+		memset(dest->wordv+dest->length_B, 0, scale-dest->length_B);
+	for(i=scale; i<a->length_B+scale; ++i,++j){
+		t = a->wordv[j] + t;
+		if(dest->length_B>i){
+			t += dest->wordv[i];
+		}
+		dest->wordv[i] = (uint8_t)t;
+		t>>=8;
+	}
+	while(t){
+		if(dest->length_B>i){
+			t = dest->wordv[i] + t;
+		}
+		dest->wordv[i] = (uint8_t)t;
+		t>>=8;
+		++i;
+	}
+	if(dest->length_B < i){
+		dest->length_B = i;
+	}
+	bigint_adjust(dest);
+}
+
+/******************************************************************************/
+
+/* this should be implemented in assembly */
+void bigint_sub_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	int8_t borrow=0;
+	int8_t  r;
+	int16_t t;
+	uint16_t i, min, max;
+	min = MIN(a->length_B, b->length_B);
+	max = MAX(a->length_B, b->length_B);
+	r = bigint_cmp_u(a,b);
+	if(r==0){
+		dest->length_B = 0;
+		dest->wordv[0] = 0;
+		bigint_adjust(dest);
+		return;
+	}
+	if(b->length_B==0){
+		dest->length_B = a->length_B;
+		memcpy(dest->wordv, a->wordv, a->length_B);
+		dest->info = a->info;
+		SET_POS(dest);
+		return;
+	}
+	if(a->length_B==0){
+			dest->length_B = b->length_B;
+			memcpy(dest->wordv, b->wordv, b->length_B);
+			dest->info = b->info;
+			SET_NEG(dest);
+			return;
+	}
+	if(r<0){
+		bigint_sub_u(dest, b, a);
+		SET_NEG(dest);
+	}else{
+		for(i=0; i<min; ++i){
+			t = a->wordv[i] - b->wordv[i] - borrow;
+			if(t<0){
+				borrow = 1;
+				dest->wordv[i]=(uint8_t)t;
+			}else{
+				borrow = 0;
+				dest->wordv[i]=(uint8_t)t;
+			}
+		}
+		for(;i<max; ++i){
+			t = a->wordv[i] - borrow;
+			if(t<0){
+				borrow = 1;
+				dest->wordv[i]=(uint8_t)t;
+			}else{
+				borrow = 0;
+				dest->wordv[i]=(uint8_t)t;
+			}
+
+		}
+		SET_POS(dest);
+		dest->length_B = i;
+		bigint_adjust(dest);
+	}
+}
+
+/******************************************************************************/
+
+int8_t bigint_cmp_u(const bigint_t* a, const bigint_t* b){
+	if(a->length_B > b->length_B){
+		return 1;
+	}
+	if(a->length_B < b->length_B){
+		return -1;
+	}
+	if(a->length_B==0){
+		return 0;
+	}
+	uint16_t i;
+	i = a->length_B-1;
+	do{
+		if(a->wordv[i]!=b->wordv[i]){
+			if(a->wordv[i]>b->wordv[i]){
+				return 1;
+			}else{
+				return -1;
+			}
+		}
+	}while(i--);
+	return 0;
+}
+
+/******************************************************************************/
+
+void bigint_add_s(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	uint8_t s;
+	s  = GET_SIGN(a)?2:0;
+	s |= GET_SIGN(b)?1:0;
+	switch(s){
+		case 0: /* both positive */
+			bigint_add_u(dest, a,b);
+			SET_POS(dest);
+			break;
+		case 1: /* a positive, b negative */
+			bigint_sub_u(dest, a, b);
+			break;
+		case 2: /* a negative, b positive */
+			bigint_sub_u(dest, b, a);
+			break;
+		case 3: /* both negative */
+			bigint_add_u(dest, a, b);
+			SET_NEG(dest);
+			break;
+		default: /* how can this happen?*/
+			break;
+	}
+}
+
+/******************************************************************************/
+
+void bigint_sub_s(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	uint8_t s;
+	s  = GET_SIGN(a)?2:0;
+	s |= GET_SIGN(b)?1:0;
+	switch(s){
+		case 0: /* both positive */
+			bigint_sub_u(dest, a,b);
+			break;
+		case 1: /* a positive, b negative */
+			bigint_add_u(dest, a, b);
+			SET_POS(dest);
+			break;
+		case 2: /* a negative, b positive */
+			bigint_add_u(dest, a, b);
+			SET_NEG(dest);
+			break;
+		case 3: /* both negative */
+			bigint_sub_u(dest, b, a);
+			break;
+		default: /* how can this happen?*/
+					break;
+	}
+
+}
+
+/******************************************************************************/
+
+int8_t bigint_cmp_s(const bigint_t* a, const bigint_t* b){
+	uint8_t s;
+	if(a->length_B==0 && b->length_B==0){
+		return 0;
+	}
+	s  = GET_SIGN(a)?2:0;
+	s |= GET_SIGN(b)?1:0;
+	switch(s){
+		case 0: /* both positive */
+			return bigint_cmp_u(a, b);
+			break;
+		case 1: /* a positive, b negative */
+			return 1;
+			break;
+		case 2: /* a negative, b positive */
+			return -1;
+			break;
+		case 3: /* both negative */
+			return bigint_cmp_u(b, a);
+			break;
+		default: /* how can this happen?*/
+					break;
+	}
+	return 0; /* just to satisfy the compiler */
+}
+
+/******************************************************************************/
+
+void bigint_shiftleft(bigint_t* a, uint16_t shift){
+	uint16_t byteshift;
+	uint16_t i;
+	uint8_t bitshift;
+	uint16_t t=0;
+	byteshift = (shift+3)/8;
+	bitshift = shift&7;
+	memmove(a->wordv+byteshift, a->wordv, a->length_B);
+	memset(a->wordv, 0, byteshift);
+	if(bitshift!=0){
+		if(bitshift<=4){ /* shift to the left */
+			for(i=byteshift; i<a->length_B+byteshift; ++i){
+				t |= (a->wordv[i])<<bitshift;
+				a->wordv[i] = (uint8_t)t;
+				t >>= 8;
+			}
+			a->wordv[i] = (uint8_t)t;
+			byteshift++;
+		}else{ /* shift to the right */
+			for(i=a->length_B+byteshift-1; i>byteshift-1; --i){
+				t |= (a->wordv[i])<<(bitshift);
+				a->wordv[i] = (uint8_t)(t>>8);
+				t <<= 8;
+			}
+			t |= (a->wordv[i])<<(bitshift);
+			a->wordv[i] = (uint8_t)(t>>8);
+		}
+	}
+	a->length_B += byteshift;
+	bigint_adjust(a);
+}
+
+/******************************************************************************/
+
+void bigint_shiftright(bigint_t* a, uint16_t shift){
+	uint16_t byteshift;
+	uint16_t i;
+	uint8_t bitshift;
+	uint16_t t=0;
+	byteshift = shift/8;
+	bitshift = shift&7;
+	if(byteshift >= a->length_B){ /* we would shift out more than we have */
+		bigint_set_zero(a);
+		return;
+	}
+	if(byteshift == a->length_B-1 && bitshift>GET_FBS(a)){
+		bigint_set_zero(a);
+		return;
+	}
+	if(byteshift){
+		memmove(a->wordv, a->wordv+byteshift, a->length_B-byteshift);
+		memset(a->wordv+a->length_B-byteshift, 0,  byteshift);
+	}
+	if(bitshift!=0){
+	 /* shift to the right */
+		for(i=a->length_B-byteshift-1; i>0; --i){
+			t |= (a->wordv[i])<<(8-bitshift);
+			a->wordv[i] = (uint8_t)(t>>8);
+			t <<= 8;
+		}
+		t |= (a->wordv[0])<<(8-bitshift);
+		a->wordv[0] = (uint8_t)(t>>8);
+	}
+	a->length_B -= byteshift;
+	bigint_adjust(a);
+}
+
+/******************************************************************************/
+
+void bigint_xor(bigint_t* dest, const bigint_t* a){
+	uint16_t i;
+	for(i=0; i<a->length_B; ++i){
+		dest->wordv[i] ^= a->wordv[i];
+	}
+	bigint_adjust(dest);
+}
+
+/******************************************************************************/
+
+void bigint_set_zero(bigint_t* a){
+	a->length_B=0;
+}
+
+/******************************************************************************/
+
+/* using the Karatsuba-Algorithm */
+/* x*y = (xh*yh)*b**2n + ((xh+xl)*(yh+yl) - xh*yh - xl*yl)*b**n + yh*yl */
+void bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	if(a->length_B==0 || b->length_B==0){
+		bigint_set_zero(dest);
+		return;
+	}
+	if(dest==a || dest==b){
+		bigint_t d;
+		uint8_t d_b[a->length_B+b->length_B];
+		d.wordv = d_b;
+		bigint_mul_u(&d, a, b);
+		bigint_copy(dest, &d);
+		return;
+	}
+	if(a->length_B==1 || b->length_B==1){
+		if(a->length_B!=1){
+			XCHG_PTR(a,b);
+		}
+		uint16_t i, t=0;
+		uint8_t x = a->wordv[0];
+		for(i=0; i<b->length_B; ++i){
+			t += b->wordv[i]*x;
+			dest->wordv[i] = (uint8_t)t;
+			t>>=8;
+		}
+		dest->wordv[i] = (uint8_t)t;
+		dest->length_B=i+1;
+		bigint_adjust(dest);
+		return;
+	}
+	if(a->length_B<=4 && b->length_B<=4){
+		uint32_t p=0, q=0;
+		uint64_t r;
+		memcpy(&p, a->wordv, a->length_B);
+		memcpy(&q, b->wordv, b->length_B);
+		r = (uint64_t)p*(uint64_t)q;
+		memcpy(dest->wordv, &r, a->length_B+b->length_B);
+		dest->length_B =  a->length_B+b->length_B;
+		bigint_adjust(dest);
+		return;
+	}
+	bigint_set_zero(dest);
+	/* split a in xh & xl; split b in yh & yl */
+	uint16_t n;
+	n=(MAX(a->length_B, b->length_B)+1)/2;
+	bigint_t xl, xh, yl, yh;
+	xl.wordv = a->wordv;
+	yl.wordv = b->wordv;
+	if(a->length_B<=n){
+		xh.info=0;
+		xh.length_B = 0;
+		xl.length_B = a->length_B;
+		xl.info = 0;
+	}else{
+		xl.length_B=n;
+		xl.info = 0;
+		bigint_adjust(&xl);
+		xh.wordv = a->wordv+n;
+		xh.length_B = a->length_B-n;
+		xh.info = 0;
+	}
+	if(b->length_B<=n){
+		yh.info=0;
+		yh.length_B = 0;
+		yl.length_B = b->length_B;
+		yl.info = b->info;
+	}else{
+		yl.length_B=n;
+		yl.info = 0;
+		bigint_adjust(&yl);
+		yh.wordv = b->wordv+n;
+		yh.length_B = b->length_B-n;
+		yh.info = 0;
+	}
+	/* now we have split up a and b */
+	uint8_t  tmp_b[2*n+2], m_b[2*(n+1)];
+	bigint_t tmp, tmp2, m;
+	tmp.wordv = tmp_b;
+	tmp2.wordv = tmp_b+n+1;
+	m.wordv = m_b;
+
+	bigint_mul_u(dest, &xl, &yl);  /* dest <= xl*yl     */
+	bigint_add_u(&tmp2, &xh, &xl); /* tmp2 <= xh+xl     */
+	bigint_add_u(&tmp, &yh, &yl);  /* tmp  <= yh+yl     */
+	bigint_mul_u(&m, &tmp2, &tmp); /* m    <= tmp2*tmp  */
+	bigint_mul_u(&tmp, &xh, &yh);  /* h    <= xh*yh     */
+	bigint_sub_u(&m, &m, dest);    /* m    <= m-dest    */
+    bigint_sub_u(&m, &m, &tmp);    /* m    <= m-h       */
+	bigint_add_scale_u(dest, &m, n);
+	bigint_add_scale_u(dest, &tmp, 2*n);
+}
+
+/******************************************************************************/
+
+void bigint_mul_s(bigint_t* dest, const bigint_t* a, const bigint_t* b){
+	uint8_t s;
+	s  = GET_SIGN(a)?2:0;
+	s |= GET_SIGN(b)?1:0;
+	switch(s){
+		case 0: /* both positive */
+			bigint_mul_u(dest, a,b);
+			SET_POS(dest);
+			break;
+		case 1: /* a positive, b negative */
+			bigint_mul_u(dest, a,b);
+			SET_NEG(dest);
+			break;
+		case 2: /* a negative, b positive */
+			bigint_mul_u(dest, a,b);
+			SET_NEG(dest);
+			break;
+		case 3: /* both negative */
+			bigint_mul_u(dest, a,b);
+			SET_POS(dest);
+			break;
+		default: /* how can this happen?*/
+			break;
+	}
+}
+
+/******************************************************************************/
+
+/* square */
+/* (xh*b^n+xl)^2 = xh^2*b^2n + 2*xh*xl*b^n + xl^2 */
+void bigint_square(bigint_t* dest, const bigint_t* a){
+	if(a->length_B<=4){
+		uint64_t r=0;
+		memcpy(&r, a->wordv, a->length_B);
+		r = r*r;
+		memcpy(dest->wordv, &r, 2*a->length_B);
+		SET_POS(dest);
+		dest->length_B=2*a->length_B;
+		bigint_adjust(dest);
+		return;
+	}
+	if(dest==a){
+		bigint_t d;
+		uint8_t d_b[a->length_B*2];
+		d.wordv = d_b;
+		bigint_square(&d, a);
+		bigint_copy(dest, &d);
+		return;
+	}
+	uint16_t n;
+	n=(a->length_B+1)/2;
+	bigint_t xh, xl, tmp; /* x-high, x-low, temp */
+	uint8_t buffer[2*n+1];
+	xl.wordv = a->wordv;
+	xl.length_B = n;
+	xh.wordv = a->wordv+n;
+	xh.length_B = a->length_B-n;
+	tmp.wordv = buffer;
+	bigint_square(dest, &xl);
+	bigint_square(&tmp, &xh);
+	bigint_add_scale_u(dest, &tmp, 2*n);
+	bigint_mul_u(&tmp, &xl, &xh);
+	bigint_shiftleft(&tmp, 1);
+	bigint_add_scale_u(dest, &tmp, n);
+}
+
+/******************************************************************************/
+
+void bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale){
+	bigint_t tmp;
+	uint8_t tmp_b[b->length_B+1];
+	uint16_t i,j,byteshift=bitscale/8;
+	uint8_t borrow=0;
+	int16_t t;
+
+	if(a->length_B < b->length_B+byteshift){
+		bigint_set_zero(a);
+		return;
+	}
+
+	tmp.wordv = tmp_b;
+	bigint_copy(&tmp, b);
+	bigint_shiftleft(&tmp, bitscale&7);
+
+	for(j=0,i=byteshift; i<tmp.length_B+byteshift; ++i, ++j){
+		t = a->wordv[i] - tmp.wordv[j] - borrow;
+		a->wordv[i] = (uint8_t)t;
+		if(t<0){
+			borrow = 1;
+		}else{
+			borrow = 0;
+		}
+	}
+	while(borrow){
+		if(i+1 > a->length_B){
+			bigint_set_zero(a);
+			return;
+		}
+		a->wordv[i] -= borrow;
+		if(a->wordv[i]!=0xff){
+			borrow=0;
+		}
+		++i;
+	}
+	bigint_adjust(a);
+}
+
+/******************************************************************************/
+
+void bigint_reduce(bigint_t* a, const bigint_t* r){
+//	bigint_adjust(r);
+	uint8_t rfbs = GET_FBS(r);
+
+	if(r->length_B==0 || a->length_B==0){
+		return;
+	}
+	while(a->length_B > r->length_B){
+		bigint_sub_u_bitscale(a, r, (a->length_B-r->length_B)*8+GET_FBS(a)-rfbs-1);
+	}
+	while((GET_FBS(a) > rfbs+1) && (a->length_B == r->length_B)){
+		bigint_sub_u_bitscale(a, r, GET_FBS(a)-rfbs-1);
+	}
+	while(bigint_cmp_u(a,r)>=0){
+		bigint_sub_u(a,a,r);
+	}
+	bigint_adjust(a);
+}
+
+/******************************************************************************/
+
+/* calculate dest = a**exp % r */
+/* using square&multiply */
+void bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r){
+	if(a->length_B==0 || r->length_B==0){
+		return;
+	}
+
+	bigint_t res, base;
+	uint8_t base_b[MAX(a->length_B,r->length_B*2)], res_b[r->length_B*2];
+	uint16_t i;
+	uint8_t j, t;
+	res.wordv = res_b;
+	base.wordv = base_b;
+	bigint_copy(&base, a);
+	bigint_reduce(&base, r);
+	res.wordv[0]=1;
+	res.length_B=1;
+	res.info = 0;
+	bigint_adjust(&res);
+	for(i=0; i+1<exp->length_B; ++i){
+		t=exp->wordv[i];
+		for(j=0; j<8; ++j){
+			if(t&1){
+				bigint_mul_u(&res, &res, &base);
+				bigint_reduce(&res, r);
+			}
+			bigint_square(&base, &base);
+			bigint_reduce(&base, r);
+			t>>=1;
+		}
+	}
+	t=exp->wordv[i];
+	while(t){
+		if(t&1){
+			bigint_mul_u(&res, &res, &base);
+			bigint_reduce(&res, r);
+		}
+		bigint_square(&base, &base);
+		bigint_reduce(&base, r);
+		t>>=1;
+	}
+	SET_POS(&res);
+	bigint_copy(dest, &res);
+}
+
+/******************************************************************************/
+/* gcd <-- gcd(x,y) a*x+b*y=gcd */
+void bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, const bigint_t* y){
+	 bigint_t g, x_, y_, u, v, a_, b_, c_, d_;
+	 volatile uint16_t i=0;
+	 if(x->length_B==0 || y->length_B==0){
+		 return;
+	 }
+	 while(x->wordv[i]==0 && y->wordv[i]==0){
+		 ++i;
+	 }
+	 uint8_t g_b[i+2], x_b[x->length_B-i], y_b[y->length_B-i];
+	 uint8_t u_b[x->length_B-i], v_b[y->length_B-i];
+	 uint8_t a_b[y->length_B+2], c_b[y->length_B+2];
+	 uint8_t b_b[x->length_B+2], d_b[x->length_B+2];
+
+	 g.wordv = g_b;
+	 x_.wordv = x_b;
+	 y_.wordv = y_b;
+	 memset(g_b, 0, i);
+	 g_b[i]=1;
+	 g.length_B = i+1;
+	 g.info=0;
+	 x_.info = y_.info = 0;
+	 x_.length_B = x->length_B-i;
+	 y_.length_B = y->length_B-i;
+	 memcpy(x_.wordv, x->wordv+i, x_.length_B);
+	 memcpy(y_.wordv, y->wordv+i, y_.length_B);
+	 for(i=0; (x_.wordv[0]&(1<<i))==0 && (y_.wordv[0]&(1<<i))==0; ++i){
+	 }
+
+	 bigint_adjust(&x_);
+	 bigint_adjust(&y_);
+
+	 if(i){
+		 bigint_shiftleft(&g, i);
+		 bigint_shiftright(&x_, i);
+		 bigint_shiftright(&y_, i);
+	 }
+	 u.wordv = u_b;
+	 v.wordv = v_b;
+	 a_.wordv = a_b;
+	 b_.wordv = b_b;
+	 c_.wordv = c_b;
+	 d_.wordv = d_b;
+
+	 bigint_copy(&u, &x_);
+	 bigint_copy(&v, &y_);
+	 a_.wordv[0] = 1;
+	 a_.length_B = 1;
+	 a_.info = 0;
+	 d_.wordv[0] = 1;
+	 d_.length_B = 1;
+	 d_.info = 0;
+	 bigint_set_zero(&b_);
+	 bigint_set_zero(&c_);
+	 do{
+		 while((u.wordv[0]&1)==0){
+			 bigint_shiftright(&u, 1);
+			 if((a_.wordv[0]&1) || (b_.wordv[0]&1)){
+				 bigint_add_s(&a_, &a_, &y_);
+				 bigint_sub_s(&b_, &b_, &x_);
+			 }
+			 bigint_shiftright(&a_, 1);
+			 bigint_shiftright(&b_, 1);
+		 }
+		 while((v.wordv[0]&1)==0){
+			 bigint_shiftright(&v, 1);
+			 if((c_.wordv[0]&1) || (d_.wordv[0]&1)){
+				 bigint_add_s(&c_, &c_, &y_);
+				 bigint_sub_s(&d_, &d_, &x_);
+			 }
+			 bigint_shiftright(&c_, 1);
+			 bigint_shiftright(&d_, 1);
+
+		 }
+		 if(bigint_cmp_u(&u, &v)>=0){
+			bigint_sub_u(&u, &u, &v);
+			bigint_sub_s(&a_, &a_, &c_);
+			bigint_sub_s(&b_, &b_, &d_);
+		 }else{
+			bigint_sub_u(&v, &v, &u);
+			bigint_sub_s(&c_, &c_, &a_);
+			bigint_sub_s(&d_, &d_, &b_);
+		 }
+	 }while(u.length_B);
+	 if(gcd){
+		 bigint_mul_s(gcd, &v, &g);
+	 }
+	 if(a){
+		bigint_copy(a, &c_);
+	 }
+	 if(b){
+		 bigint_copy(b, &d_);
+	 }
+}
+
+/******************************************************************************/
+
+void bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m){
+	bigint_gcdext(NULL, dest, NULL, a, m);
+	while(dest->info&BIGINT_NEG_MASK){
+		bigint_add_s(dest, dest, m);
+	}
+}
+
+/******************************************************************************/
+
+void bigint_changeendianess(bigint_t* a){
+	uint8_t t, *p, *q;
+	p = a->wordv;
+	q = p+a->length_B-1;
+	while(p<q){
+		t = *p;
+		*p = *q;
+		*q = t;
+		++p; --q;
+	}
+}
+
+/******************************************************************************/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dsa/bigint.h b/dsa/bigint.h
new file mode 100644
index 0000000..642b4e0
--- /dev/null
+++ b/dsa/bigint.h
@@ -0,0 +1,68 @@
+/* bigint.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file		bigint.h
+ * \author		Daniel Otte
+ * \date		2010-02-22
+ * 
+ * \license	    GPLv3 or later
+ * 
+ */
+
+#ifndef BIGINT_H_
+#define BIGINT_H_
+
+#include <stdint.h>
+
+#define BIGINT_FBS_MASK 0x07 /* the last three bits indicate which is the first bit set */
+#define BIGINT_NEG_MASK 0x80 /* this bit indicates a negative value */
+typedef struct{
+	uint16_t length_B;
+	uint8_t info;
+	uint8_t *wordv; /* word vector, pointing to the LSB */
+}bigint_t;
+
+
+/******************************************************************************/
+
+void   bigint_adjust(bigint_t* a);
+void   bigint_copy(bigint_t* dest, const bigint_t* src);
+void   bigint_add_u(bigint_t* dest, const bigint_t* a, const bigint_t* b);
+void   bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale);
+void   bigint_sub_u(bigint_t* dest, const bigint_t* a, const bigint_t* b);
+int8_t bigint_cmp_u(const bigint_t * a, const bigint_t * b);
+void   bigint_add_s(bigint_t* dest, const bigint_t* a, const bigint_t* b);
+void   bigint_sub_s(bigint_t* dest, const bigint_t* a, const bigint_t* b);
+int8_t bigint_cmp_s(const bigint_t* a, const bigint_t* b);
+void   bigint_shiftleft(bigint_t* a, uint16_t shift);
+void   bigint_shiftright(bigint_t* a, uint16_t shift);
+void   bigint_xor(bigint_t* dest, const bigint_t* a);
+void   bigint_set_zero(bigint_t* a);
+void   bigint_mul_u(bigint_t* dest, const bigint_t* a, const bigint_t* b);
+void   bigint_mul_s(bigint_t* dest, const bigint_t* a, const bigint_t* b);
+void   bigint_square(bigint_t* dest, const bigint_t* a);
+void   bigint_sub_u_bitscale(bigint_t* a, const bigint_t* b, uint16_t bitscale);
+void   bigint_reduce(bigint_t* a, const bigint_t* r);
+void   bigint_expmod_u(bigint_t* dest, const bigint_t* a, const bigint_t* exp, const bigint_t* r);
+void   bigint_gcdext(bigint_t* gcd, bigint_t* a, bigint_t* b, const bigint_t* x, const bigint_t* y);
+void   bigint_inverse(bigint_t* dest, const bigint_t* a, const bigint_t* m);
+void   bigint_changeendianess(bigint_t* a);
+/******************************************************************************/
+
+#endif /*BIGINT_H_*/
diff --git a/dsa/bigint_add_u.S b/dsa/bigint_add_u.S
new file mode 100644
index 0000000..7c34f1a
--- /dev/null
+++ b/dsa/bigint_add_u.S
@@ -0,0 +1,137 @@
+/* bigint_add_u.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * \file     bigint_add_u.S
+ * \email    daniel.otte@rub.de
+ * \author   Daniel Otte
+ * \date     2010-03-01
+ * \license  GPLv3 or later
+ *
+ */
+
+#include "avr-asm-macros.S"
+
+/*
+ param dest: r24:r25
+ param a:    r22:r23
+ param b:    r20:r21
+*/
+LEN_A_0 = 22
+LEN_A_1 = 23
+LEN_B_0 = 20
+LEN_B_1 = 21
+
+
+.global bigint_add_u
+bigint_add_u:
+	push_range 28, 29
+	push_range 24, 25
+	movw r26, r24 ; X is our destination pointer
+	movw r30, r22 ; Z = a
+	movw r28, r20 ; Y = b
+	ldd LEN_A_0, Z+0
+	ldd LEN_A_1, Z+1
+	ldd LEN_B_0, Y+0
+	ldd LEN_B_1, Y+1
+	cp LEN_A_0, LEN_B_0
+	cpc LEN_A_1, LEN_B_1
+	brsh 3f
+	movw r18, LEN_A_0    ; swap length values
+	movw LEN_A_0, LEN_B_0
+	movw LEN_B_0, r18
+	movw r18, r30        ; swap pointers
+	movw r30, r28
+	movw r28, r18
+3:	; now a is the longer integer
+    movw r24, LEN_A_0
+    adiw r24, 0
+    brne 4f
+    st X+, r1 ; store length
+    st X+, r1
+    st X+, r1 ; store 0 in info field
+    rjmp 9f
+4:
+    adiw r24, 1
+    st X+, r24 ; store length
+    st X+, r25
+    st X+, r1 ; store 0 in info field
+    ld r18, X+
+    ld r19, X+
+    movw r26, r18
+    adiw r30, 3 ; adjust pointers to point at wordv
+    ld r18, Z+
+    ld r19, Z+
+    movw r30, r18
+    adiw r28, 3
+    ld r18, Y+
+    ld r19, Y+
+    movw r28, r18
+
+    sub LEN_A_0, LEN_B_0
+    sbc LEN_A_1, LEN_B_1
+    movw r24, LEN_B_0
+    clr r0
+    adiw r24, 0
+    breq 6f
+    clc
+5:
+    ld r0, Z+
+    ld r1, Y+
+    adc r0, r1
+    st X+, r0
+    dec r24
+	brne 5b
+	rol r0 ; store carry bit
+	tst r25
+	breq 6f
+	dec r25
+	dec r24
+	ror r0 ; write carry back
+	rjmp 5b
+6: /* the main part is done */
+	movw r24, LEN_A_0
+	clr r1
+	adiw r24, 0
+	breq 8f
+62:
+	ror r0 ; write carry back
+7:
+    ld r0, Z+
+    adc r0, r1
+    st X+, r0
+    dec r24
+	brne 7b
+	rol r0 ; store carry bit
+	tst r25
+	breq 8f
+	dec r25
+	dec r24
+	rjmp 62b
+8:
+	ror r0
+	clr r0
+	rol r0
+	st X+, r0
+9:
+	pop_range 24, 25
+	pop_range 28, 29
+	jmp bigint_adjust
+
+
diff --git a/dsa/bigint_io.c b/dsa/bigint_io.c
new file mode 100644
index 0000000..221c61c
--- /dev/null
+++ b/dsa/bigint_io.c
@@ -0,0 +1,135 @@
+/* bigint_io.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "cli.h"
+#include "hexdigit_tab.h"
+#include "bigint.h"
+#include <avr/pgmspace.h>
+#include <stdlib.h>
+#include <string.h>
+
+void bigint_print_hex(const bigint_t* a){
+	if(a->length_B==0){
+		cli_putc('0');
+		return;
+	}
+	if(a->info&BIGINT_NEG_MASK){
+		cli_putc('-');
+	}
+//	cli_putc((a->info&BIGINT_NEG_MASK)?'-':'+'); /* print sign */
+	if(a->wordv[a->length_B-1]<0x10){
+		cli_putc(pgm_read_byte(hexdigit_tab_uc_P+a->wordv[a->length_B-1]));
+		cli_hexdump_rev(a->wordv, a->length_B-1);
+	} else {
+		cli_hexdump_rev(a->wordv, a->length_B);
+	}
+}
+
+#define BLOCKSIZE 20
+
+static uint8_t char2nibble(char c){
+	if(c>='0' && c <='9'){
+		return c-'0';
+	}
+	c |= 'A'^'a'; /* to lower case */
+	if(c>='a' && c <='f'){
+		return c-'a'+10;
+	}
+	return 0xff;
+}
+
+static uint16_t read_byte(void){
+	uint8_t t1, t2;
+	char c;
+	c = cli_getc_cecho();
+	if(c=='-'){
+		return 0x0500;
+	}
+	t1 = char2nibble(c);
+	if(t1 == 0xff){
+		return 0x0100;
+	}
+	c = cli_getc_cecho();
+	t2 = char2nibble(c);
+	if(t2 == 0xff){
+		return 0x0200|t1;
+	}
+	return (t1<<4)|t2;
+}
+
+uint8_t bigint_read_hex_echo(bigint_t* a){
+	uint16_t allocated=0;
+	uint8_t  shift4=0;
+	uint16_t  t;
+	a->length_B = 0;
+	a->wordv = NULL;
+	a->info = 0;
+	for(;;){
+		if(allocated-a->length_B < 1){
+			uint8_t *p;
+			p = realloc(a->wordv, allocated+=BLOCKSIZE);
+			if(p==NULL){
+				cli_putstr_P(PSTR("\r\nERROR: Out of memory!"));
+				free(a->wordv);
+				return 0xff;
+			}
+			a->wordv=p;
+		}
+		t = read_byte();
+		if(a->length_B==0){
+			if(t&0x0400){
+				/* got minus */
+				a->info |= BIGINT_NEG_MASK;
+				continue;
+			}else{
+				if(t==0x0100){
+					free(a->wordv);
+					a->wordv=NULL;
+					return 1;
+				}
+			}
+		}
+		if(t<=0x00ff){
+			a->wordv[a->length_B++] = (uint8_t)t;
+		}else{
+			if(t&0x0200){
+				shift4 = 1;
+				a->wordv[a->length_B++] = (uint8_t)((t&0x0f)<<4);
+			}
+			break;
+		}
+	}
+	/* we have to reverse the byte array */
+	uint8_t tmp;
+	uint8_t *p, *q;
+	p = a->wordv;
+	q = a->wordv+a->length_B-1;
+	while(q>p){
+		tmp = *p;
+		*p = *q;
+		*q = tmp;
+		p++; q--;
+	}
+	if(shift4){
+		bigint_adjust(a);
+		bigint_shiftright(a, 4);
+	}
+	bigint_adjust(a);
+	return 0;
+}
diff --git a/dsa/bigint_io.h b/dsa/bigint_io.h
new file mode 100644
index 0000000..e47391d
--- /dev/null
+++ b/dsa/bigint_io.h
@@ -0,0 +1,28 @@
+/* bigint_io.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef BIGINT_IO_H_
+#define BIGINT_IO_H_
+
+#include "bigint.h"
+
+void    bigint_print_hex(const bigint_t* a);
+uint8_t bigint_read_hex_echo(bigint_t* a);
+
+#endif /* BIGINT_IO_H_ */
diff --git a/dsa/dsa.h b/dsa/dsa.h
new file mode 100644
index 0000000..6a894f6
--- /dev/null
+++ b/dsa/dsa.h
@@ -0,0 +1,59 @@
+/* dsa.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef DSA_H_
+#define DSA_H_
+
+#include <stdint.h>
+#include "hfal-basic.h"
+#include "bigint.h"
+
+typedef struct{
+	bigint_t p;
+	bigint_t q;
+	bigint_t g;
+} dsa_domainparameters_t;
+
+typedef bigint_t dsa_pubkey_t;
+typedef bigint_t dsa_privkey_t;
+typedef struct{
+	bigint_t r;
+	bigint_t s;
+} dsa_signature_t;
+
+typedef struct{
+	dsa_privkey_t priv;
+	dsa_pubkey_t  pub;
+	dsa_domainparameters_t domain;
+} dsa_ctx_t;
+
+#define DSA_SIGNATURE_OK 1
+#define DSA_SIGNATURE_FAIL 0
+
+uint8_t dsa_sign_bigint(dsa_signature_t* s, const bigint_t* m,
+		                const dsa_ctx_t* ctx, const bigint_t* k);
+uint8_t dsa_sign_message(dsa_signature_t* s, const void* m, uint16_t m_len_b,
+		                const hfdesc_t* hash_desc, const dsa_ctx_t* ctx,
+		                const uint8_t(*rand_in)(void));
+uint8_t dsa_verify_bigint(const dsa_signature_t* s, const bigint_t* m,
+		                  const dsa_ctx_t* ctx);
+uint8_t dsa_verify_message(const dsa_signature_t* s, const void* m, uint16_t m_len_b,
+						  const hfdesc_t* hash_desc, const dsa_ctx_t* ctx);
+
+#endif /* DSA_H_ */
diff --git a/dsa/dsa_key_blob.c b/dsa/dsa_key_blob.c
new file mode 100644
index 0000000..fe31e53
--- /dev/null
+++ b/dsa/dsa_key_blob.c
@@ -0,0 +1,304 @@
+/* dsa_key_blob.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <avr/pgmspace.h>
+#include "cli.h"
+#include "dsa.h"
+#include "bigint.h"
+
+
+#define DSA_KEY_BLOB_SIZE 1024
+
+#if DSA_KEY_BLOB_SIZE == 512
+
+#define PRIV_LEN_B (20)
+#define PUB_LEN_B  (8*8)
+#define P_LEN_B (8*8)
+#define Q_LEN_B (20)
+#define G_LEN_B (8*8)
+#define ALL_LEN_B (PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B+G_LEN_B)
+
+
+static uint8_t dsa_key_blob[] PROGMEM = {
+
+//	priv:
+	0xac, 0xe6, 0xef, 0x99, 0x08, 0xe8, 0x5f, 0xc8,
+	0xc7, 0x51, 0x97, 0x99, 0xf4, 0xd3, 0x00, 0x0f,
+	0x49, 0x72, 0xee, 0x46,
+//	pub:
+	0x18, 0x02, 0x53, 0x09, 0x61, 0xad, 0x0b, 0x9d,
+	0x44, 0x80, 0x8d, 0xb4, 0x52, 0xdc, 0xb5, 0xf2,
+	0x11, 0x20, 0x41, 0xc7, 0xd9, 0x7c, 0x7c, 0x6c,
+	0xa0, 0x9e, 0xca, 0x0d, 0xff, 0x28, 0x3c, 0x64,
+	0xfa, 0x92, 0xbb, 0x2c, 0xe9, 0x9b, 0x10, 0x94,
+	0xa5, 0x8d, 0x03, 0x50, 0xa5, 0x59, 0xd4, 0x3f,
+	0x57, 0x78, 0x8d, 0xcf, 0x0a, 0x99, 0x5d, 0xa3,
+	0x2e, 0x80, 0xfa, 0x99, 0x44, 0x73, 0x6d, 0x9a,
+//	P:
+	0x9f, 0x2d, 0xc8, 0x3c, 0x34, 0xf9, 0xa1, 0xbc,
+	0x6a, 0xa7, 0x49, 0x17, 0xd3, 0x82, 0xa2, 0xe2,
+	0x25, 0x31, 0xc4, 0x3d, 0x1a, 0x3f, 0x0f, 0x8a,
+	0x8b, 0x84, 0x3c, 0x6c, 0x9c, 0xdd, 0x42, 0xd9,
+	0x1a, 0x22, 0xf2, 0x55, 0x98, 0xee, 0x7d, 0x98,
+	0x9c, 0x9a, 0x91, 0x42, 0x5f, 0x4f, 0xa8, 0x32,
+	0xa0, 0xb0, 0x0f, 0x79, 0xe2, 0x76, 0x08, 0x78,
+	0x6e, 0xba, 0xf7, 0x74, 0x43, 0x4a, 0xf2, 0xdf,
+//	Q:
+	0xdb, 0x30, 0x69, 0xe6, 0x59, 0x77, 0xee, 0x38,
+	0xea, 0xf7, 0xcc, 0x18, 0x83, 0xcf, 0xb4, 0x21,
+	0xbc, 0xcf, 0x9a, 0x77,
+//	G:
+	0x73, 0x90, 0x27, 0x68, 0xe7, 0xe9, 0x3a, 0x45,
+	0x6f, 0x7f, 0x95, 0xca, 0x9b, 0xfd, 0x33, 0x75,
+	0x75, 0xff, 0x0f, 0xe7, 0x69, 0xfd, 0xb7, 0x07,
+	0x0f, 0x6c, 0x3a, 0xec, 0x47, 0x82, 0x78, 0xb2,
+	0xb3, 0x0b, 0x7f, 0x11, 0x9d, 0x34, 0x3e, 0xff,
+	0xb8, 0x09, 0x42, 0x82, 0x81, 0x21, 0xad, 0x2b,
+	0x51, 0x20, 0xec, 0x9e, 0xf8, 0x15, 0xaa, 0x3d,
+	0x5f, 0x29, 0x2d, 0xb5, 0xc5, 0x64, 0x53, 0x2d
+};
+
+#endif
+
+#if DSA_KEY_BLOB_SIZE == 1024
+
+#define PRIV_LEN_B (20)
+#define PUB_LEN_B  (16*8)
+#define P_LEN_B (16*8)
+#define Q_LEN_B (20)
+#define G_LEN_B (16*8)
+#define ALL_LEN_B (PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B+G_LEN_B)
+
+static uint8_t dsa_key_blob[] PROGMEM = {
+	// priv:
+	0x03, 0xad, 0x17, 0x81, 0x0f, 0x70, 0x7f, 0x89,
+	0xa2, 0x0a, 0x70, 0x1c, 0x3b, 0x24, 0xff, 0xd2,
+	0x39, 0x93, 0xd7, 0x8d,
+	// pub:
+	0x42, 0x1c, 0xb2, 0x03, 0xe5, 0xc6, 0x69, 0x81,
+	0x1e, 0x35, 0x85, 0x86, 0xd7, 0x94, 0xd2, 0x1f,
+	0x77, 0x05, 0x2f, 0xcc, 0xa5, 0x69, 0x46, 0x8f,
+	0xe1, 0x9f, 0x82, 0xf6, 0x24, 0x2c, 0x64, 0x1b,
+	0x29, 0x63, 0xd5, 0xb3, 0x32, 0xdc, 0xd9, 0x5a,
+	0x4e, 0x92, 0xd9, 0x69, 0xcc, 0x51, 0x81, 0xc2,
+	0xa3, 0x7e, 0xd7, 0xf8, 0x72, 0x1f, 0x8d, 0xd4,
+	0xe8, 0x59, 0xb0, 0xaa, 0xdd, 0xa0, 0x73, 0xe6,
+	0xc4, 0x50, 0x7f, 0x4c, 0x7c, 0xde, 0x35, 0x27,
+	0x49, 0x36, 0x23, 0x36, 0xe4, 0x90, 0x54, 0x24,
+	0x45, 0x99, 0xa3, 0x10, 0xc3, 0x59, 0x2f, 0x61,
+	0xff, 0x75, 0xf0, 0x51, 0x1d, 0xa0, 0x8f, 0x69,
+	0xc1, 0x1e, 0x3e, 0x65, 0xaf, 0x82, 0x9e, 0xa9,
+	0x91, 0x17, 0x04, 0x7c, 0x56, 0xd1, 0x68, 0x8a,
+	0x4b, 0xc9, 0x48, 0x92, 0xaf, 0x72, 0xca, 0xbf,
+	0xf2, 0x2b, 0x9e, 0x42, 0x92, 0x46, 0x19, 0x64,
+	// P:
+	0x97, 0x40, 0xda, 0x05, 0x19, 0x77, 0xb7, 0x17,
+	0x4b, 0x7d, 0xc0, 0x5b, 0x81, 0xdd, 0xcc, 0x0b,
+	0x86, 0xe0, 0x3c, 0x4d, 0xab, 0x3d, 0x43, 0xe4,
+	0xe3, 0x5f, 0xf3, 0x56, 0xcd, 0x5c, 0xf2, 0x85,
+	0x00, 0x45, 0x3c, 0xba, 0xf0, 0x56, 0xb3, 0x8b,
+	0x29, 0xc3, 0x55, 0x7b, 0xb6, 0xfb, 0x68, 0xca,
+	0x35, 0xe5, 0x0e, 0x46, 0xd6, 0xff, 0xc9, 0xbd,
+	0x08, 0x71, 0x65, 0x3b, 0xf7, 0xab, 0xb1, 0x96,
+	0x9b, 0x70, 0xdc, 0x8e, 0xf3, 0x02, 0xa4, 0x0f,
+	0xc6, 0xcd, 0x70, 0xe5, 0xeb, 0xd3, 0x07, 0xb5,
+	0x7d, 0x40, 0x8c, 0xfd, 0x33, 0x45, 0x8f, 0x9c,
+	0x7f, 0xa1, 0x69, 0xcb, 0xe6, 0x73, 0x1d, 0x37,
+	0xc7, 0x5f, 0x18, 0x57, 0x38, 0x96, 0x46, 0x24,
+	0xad, 0xa6, 0x59, 0x3d, 0x7a, 0x74, 0x6e, 0x88,
+	0x57, 0x18, 0x86, 0x7b, 0x07, 0x79, 0x52, 0xdd,
+	0xbc, 0xa7, 0x40, 0x88, 0xa6, 0x66, 0x50, 0x49,
+	// Q:
+	0xb4, 0x6d, 0x89, 0x7a, 0x72, 0xdb, 0x8c, 0x92,
+	0x60, 0xf9, 0x95, 0x47, 0x81, 0x57, 0xe8, 0x6b,
+	0xb4, 0xf9, 0xde, 0x51,
+	// G:
+	0x76, 0x1e, 0x1b, 0xd2, 0x5c, 0x5f, 0x92, 0x96,
+	0x42, 0x18, 0xba, 0x8d, 0xe1, 0x24, 0x12, 0x24,
+	0x6f, 0x3f, 0xb8, 0x05, 0xf9, 0x72, 0x74, 0xfa,
+	0xef, 0xc3, 0x1e, 0xd5, 0xa5, 0x93, 0x28, 0x07,
+	0xc0, 0x7b, 0x47, 0xef, 0x15, 0x13, 0x68, 0x18,
+	0xfb, 0x0d, 0x69, 0xea, 0xcc, 0x5a, 0x43, 0x08,
+	0x75, 0xec, 0xe4, 0x5e, 0x8e, 0xa9, 0x61, 0xe1,
+	0xcd, 0x27, 0x8c, 0x55, 0xc9, 0x42, 0x11, 0x11,
+	0x7f, 0x20, 0x4d, 0x70, 0x34, 0x49, 0x00, 0x8c,
+	0x79, 0x95, 0x79, 0x0b, 0xfd, 0x8d, 0xda, 0xe3,
+	0x0c, 0x27, 0x7a, 0x35, 0xe5, 0x35, 0xc9, 0x73,
+	0x31, 0xaa, 0xed, 0xbe, 0x81, 0x89, 0x67, 0x06,
+	0xf6, 0x97, 0x0d, 0x44, 0x07, 0xac, 0x09, 0xac,
+	0x44, 0xf3, 0xc6, 0x8b, 0x30, 0x4c, 0x76, 0x0b,
+	0x55, 0x74, 0x10, 0x06, 0xda, 0xd4, 0x3d, 0x96,
+	0x7e, 0xc3, 0xf8, 0x22, 0x9c, 0x71, 0x1d, 0x9c
+};
+#endif
+
+#if DSA_KEY_BLOB_2048
+
+#define PRIV_LEN_B (20)
+#define PUB_LEN_B  (32*8)
+#define P_LEN_B (32*8)
+#define Q_LEN_B (20)
+#define G_LEN_B (32*8)
+#define ALL_LEN_B (PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B+G_LEN_B)
+
+static uint8_t dsa_key_blob[] PROGMEM = {
+/* priv: */
+	0x1d, 0xe4, 0x81, 0x02, 0x52, 0x6b, 0x2b, 0x0e,
+	0x98, 0x08, 0xc8, 0xb9, 0x81, 0x40, 0xd1, 0x1e,
+	0x86, 0x69, 0x0d, 0xa9,
+/* pub: */
+	0x70, 0xc4, 0x44, 0x28, 0x91, 0x77, 0x2b, 0x09,
+	0xde, 0xe8, 0x66, 0x0b, 0xa5, 0xc8, 0x05, 0xb4,
+	0x0a, 0x2d, 0x4f, 0x45, 0x8e, 0x0c, 0x8c, 0x38,
+	0x61, 0xf3, 0x77, 0x05, 0x64, 0xf7, 0xe6, 0xe9,
+	0x0b, 0x1f, 0x9b, 0x9f, 0x1f, 0xa1, 0x7e, 0x8f,
+	0x5b, 0x14, 0x70, 0x1d, 0x4d, 0x1c, 0xdc, 0x9d,
+	0xe0, 0x0a, 0xc4, 0x7b, 0x70, 0xfd, 0xef, 0xe6,
+	0x20, 0x2d, 0x17, 0x13, 0xd7, 0x1c, 0xc0, 0xbb,
+	0x5b, 0xce, 0x84, 0x6a, 0xa5, 0x4e, 0x27, 0x1c,
+	0x9e, 0xaa, 0xb2, 0xdc, 0xc1, 0xec, 0x74, 0x93,
+	0x67, 0xdb, 0xe1, 0xaa, 0x5a, 0x86, 0x1d, 0x8a,
+	0xa9, 0x28, 0x7e, 0xfc, 0xd5, 0x72, 0x94, 0x6c,
+	0x1d, 0x71, 0x85, 0x92, 0xa7, 0x6e, 0x84, 0x4f,
+	0x27, 0xf3, 0x7e, 0x04, 0x7d, 0xf2, 0x7c, 0x07,
+	0xa0, 0x7d, 0x02, 0x7c, 0x30, 0x70, 0xb5, 0x87,
+	0xc3, 0xf0, 0xc2, 0x0c, 0xdb, 0x26, 0x72, 0x33,
+	0x20, 0xca, 0xf0, 0x8b, 0x05, 0x20, 0x70, 0x98,
+	0x65, 0x03, 0xd7, 0xd4, 0x47, 0xf0, 0xb2, 0x6e,
+	0x2a, 0xbe, 0xcc, 0x83, 0x0d, 0xab, 0x60, 0x61,
+	0x26, 0x7b, 0xaf, 0xae, 0x18, 0x9e, 0x20, 0xeb,
+	0x12, 0x31, 0x18, 0x2e, 0x73, 0xca, 0xd4, 0x5e,
+	0x66, 0x74, 0x61, 0x07, 0x9b, 0x20, 0x68, 0x12,
+	0x88, 0xb1, 0xc5, 0x0f, 0x85, 0x9b, 0x45, 0x40,
+	0x7d, 0x76, 0x62, 0x73, 0xba, 0x41, 0x7b, 0xaf,
+	0xc7, 0xb9, 0x19, 0x7a, 0xd0, 0x55, 0xe6, 0xfd,
+	0xb5, 0xb9, 0xc4, 0x1b, 0x22, 0x47, 0x8f, 0x7b,
+	0xd7, 0x75, 0xe8, 0x7f, 0x01, 0xa2, 0x9b, 0x79,
+	0xde, 0xea, 0x55, 0x3c, 0x61, 0x4d, 0xcd, 0xce,
+	0x89, 0x8c, 0x76, 0x62, 0x12, 0x4d, 0xd4, 0x47,
+	0x03, 0x0e, 0xe8, 0xe2, 0xb8, 0xda, 0xca, 0x20,
+	0xb3, 0x64, 0xb6, 0x07, 0x06, 0x1b, 0xcb, 0x91,
+	0x51, 0x2c, 0x2e, 0xfa, 0xe1, 0xee, 0x1e, 0x78,
+/* P: */
+	0x8d, 0x09, 0x00, 0x56, 0x63, 0x39, 0x42, 0x8d,
+	0x15, 0xd5, 0x1d, 0x86, 0x10, 0xde, 0xc7, 0xf4,
+	0x07, 0xe5, 0x81, 0xbe, 0x67, 0xee, 0xc5, 0x33,
+	0xd3, 0x41, 0x1b, 0xba, 0xd8, 0xa6, 0x61, 0x49,
+	0x2d, 0x66, 0xcf, 0x60, 0x9f, 0x52, 0x60, 0x6e,
+	0x0a, 0x16, 0xdc, 0x0b, 0x24, 0x1b, 0x62, 0x32,
+	0xc4, 0xab, 0x52, 0x17, 0xbf, 0xc5, 0xa2, 0x2a,
+	0xa4, 0x5e, 0x8c, 0xff, 0x97, 0x45, 0x51, 0xd9,
+	0xc3, 0xf2, 0x32, 0x4a, 0xb9, 0x08, 0xc1, 0x6a,
+	0x7b, 0x82, 0x93, 0x2a, 0x60, 0x29, 0x55, 0x1a,
+	0x36, 0x1f, 0x05, 0x4f, 0xf1, 0x43, 0x12, 0xb2,
+	0x73, 0x4e, 0xf6, 0x37, 0x65, 0x3d, 0x0b, 0x70,
+	0x08, 0xc7, 0x34, 0x0b, 0x4d, 0xc9, 0x08, 0x70,
+	0xaf, 0x4b, 0x95, 0x0b, 0x7c, 0x9f, 0xcf, 0xfc,
+	0x57, 0x94, 0x47, 0x6d, 0xd1, 0xaf, 0xc6, 0x52,
+	0xd9, 0xe2, 0x05, 0xce, 0xb2, 0xb8, 0x91, 0x6f,
+	0x5a, 0x77, 0x6b, 0x1b, 0xff, 0x97, 0x8c, 0x5e,
+	0x33, 0xfc, 0x80, 0x29, 0xdf, 0x83, 0x91, 0x0c,
+	0x28, 0x1b, 0x00, 0xb4, 0xc9, 0x3e, 0xb7, 0x67,
+	0xca, 0xab, 0x63, 0xd4, 0x48, 0xfe, 0xd2, 0xfd,
+	0x65, 0x57, 0x33, 0x25, 0xbd, 0xf1, 0xa5, 0x51,
+	0x51, 0x50, 0xf6, 0xcf, 0xfa, 0x0d, 0x67, 0x4e,
+	0x90, 0x08, 0x87, 0x34, 0xf6, 0x33, 0xc9, 0x58,
+	0xb1, 0x87, 0xf8, 0x5d, 0x73, 0x80, 0xde, 0x51,
+	0xcd, 0x17, 0x70, 0x3e, 0xa4, 0xa8, 0x4f, 0xda,
+	0xcd, 0xa2, 0x66, 0x0f, 0x95, 0xa7, 0xc6, 0xf7,
+	0x12, 0x2e, 0x27, 0x94, 0xa9, 0x26, 0x1b, 0x25,
+	0x16, 0x18, 0x99, 0x3b, 0x32, 0xaf, 0x71, 0x13,
+	0x35, 0xda, 0xb8, 0x71, 0x5b, 0x50, 0x7c, 0x7a,
+	0x9d, 0xcc, 0x0d, 0x95, 0xef, 0x6f, 0x64, 0x3c,
+	0x28, 0x4b, 0x15, 0xe9, 0xd4, 0xad, 0xcc, 0x56,
+	0xcb, 0x24, 0xf9, 0x61, 0x79, 0xd7, 0x56, 0xd3,
+/* Q: */
+	0xf7, 0xdf, 0x85, 0xf5, 0x63, 0x36, 0x63, 0x71,
+	0x74, 0x34, 0x98, 0x19, 0xff, 0x79, 0xf2, 0xe2,
+	0x15, 0x75, 0x3c, 0x95,
+/* G: */
+	0x0c, 0xf6, 0x8b, 0x1a, 0xbe, 0x66, 0x84, 0x98,
+	0xae, 0xcb, 0xb0, 0xd9, 0x75, 0x75, 0x32, 0x4b,
+	0xa3, 0xf2, 0x28, 0xa6, 0x6d, 0x13, 0xf2, 0xf3,
+	0xfd, 0x93, 0x91, 0xb1, 0x21, 0x1e, 0xcc, 0x08,
+	0x87, 0xce, 0x74, 0xb1, 0xd0, 0x19, 0x50, 0xff,
+	0xac, 0xef, 0x9f, 0x82, 0xda, 0x75, 0xda, 0x6d,
+	0x89, 0xf3, 0x0b, 0xdc, 0x27, 0x98, 0x85, 0x01,
+	0x68, 0xb7, 0xbd, 0x98, 0x83, 0xb1, 0xb0, 0x65,
+	0x31, 0x71, 0x43, 0x05, 0xa7, 0x76, 0x63, 0xe4,
+	0x7d, 0x61, 0x53, 0xc7, 0x3e, 0x3b, 0x82, 0x28,
+	0x65, 0x07, 0xfe, 0x9e, 0xa3, 0x35, 0x2c, 0xdc,
+	0x9e, 0x05, 0x7c, 0x9a, 0x69, 0xc6, 0x9f, 0xc2,
+	0x3f, 0x94, 0x6b, 0xad, 0xa4, 0x2b, 0x5d, 0x48,
+	0x9e, 0x2c, 0xad, 0xd2, 0x89, 0x49, 0xdc, 0xdb,
+	0x55, 0x49, 0x56, 0xaf, 0xe9, 0x0e, 0x37, 0xe7,
+	0x1f, 0x42, 0x6a, 0x7c, 0xac, 0xe8, 0x1b, 0xbb,
+	0x21, 0x82, 0x14, 0x72, 0x17, 0x64, 0xf0, 0x3c,
+	0x3d, 0xc1, 0x43, 0x27, 0x27, 0x9f, 0xe9, 0x21,
+	0xf2, 0x2f, 0xf7, 0xfa, 0x3c, 0xed, 0xbf, 0xab,
+	0xab, 0xb7, 0x3c, 0x6d, 0x1e, 0x85, 0x9f, 0x77,
+	0x4f, 0x69, 0x09, 0x4e, 0xed, 0x13, 0x84, 0x40,
+	0x1a, 0xc6, 0xa1, 0xd9, 0x68, 0xb6, 0x18, 0x32,
+	0x79, 0x25, 0x9e, 0xa6, 0x41, 0x30, 0xd1, 0xc2,
+	0x7a, 0x8f, 0x0d, 0x46, 0xee, 0xda, 0xb0, 0xbf,
+	0x64, 0x42, 0x59, 0x7e, 0x22, 0x88, 0xd6, 0x52,
+	0xec, 0xed, 0xc4, 0x13, 0xb1, 0x7f, 0x5c, 0x77,
+	0x4c, 0xfd, 0x22, 0x90, 0xd3, 0xe3, 0xa9, 0xc1,
+	0x0f, 0x25, 0xac, 0xd5, 0x04, 0x84, 0xe6, 0xa8,
+	0xc7, 0xb4, 0x4f, 0xa2, 0x67, 0xae, 0xaa, 0x92,
+	0xe9, 0x0a, 0xed, 0x45, 0x5b, 0xf0, 0x1b, 0x69,
+	0xec, 0xaf, 0x7d, 0xf2, 0x71, 0x25, 0xbf, 0x92,
+	0xd4, 0xd0, 0x5b, 0xde, 0x5a, 0x2d, 0x18, 0x8e
+};
+#endif
+
+void load_dsa_key_blob(dsa_ctx_t* ctx){
+	if(ctx->priv.wordv){
+		free(ctx->priv.wordv);
+	}
+	ctx->priv.wordv = malloc(ALL_LEN_B);
+	if(ctx->priv.wordv==NULL){
+		cli_putstr_P(PSTR("\r\nERROR: OUT OF MEMORY!!!"));
+		return;
+	}
+	memcpy_P(ctx->priv.wordv, dsa_key_blob, ALL_LEN_B);
+	ctx->priv.length_B=PRIV_LEN_B;
+	ctx->pub.wordv = ctx->priv.wordv+PRIV_LEN_B;
+	ctx->pub.length_B = PUB_LEN_B;
+	ctx->domain.p.wordv = ctx->priv.wordv+PRIV_LEN_B+PUB_LEN_B;
+	ctx->domain.p.length_B = P_LEN_B;
+	ctx->domain.q.wordv = ctx->priv.wordv+PRIV_LEN_B+PUB_LEN_B+P_LEN_B;
+	ctx->domain.q.length_B = Q_LEN_B;
+	ctx->domain.g.wordv = ctx->priv.wordv+PRIV_LEN_B+PUB_LEN_B+P_LEN_B+Q_LEN_B;
+	ctx->domain.g.length_B = G_LEN_B;
+
+	bigint_changeendianess(&(ctx->priv));
+	bigint_changeendianess(&(ctx->pub));
+	bigint_changeendianess(&(ctx->domain.p));
+	bigint_changeendianess(&(ctx->domain.q));
+	bigint_changeendianess(&(ctx->domain.g));
+
+	bigint_adjust(&(ctx->priv));
+	bigint_adjust(&(ctx->pub));
+	bigint_adjust(&(ctx->domain.p));
+	bigint_adjust(&(ctx->domain.q));
+	bigint_adjust(&(ctx->domain.g));
+}
diff --git a/dsa/dsa_key_blob.h b/dsa/dsa_key_blob.h
new file mode 100644
index 0000000..8099a1e
--- /dev/null
+++ b/dsa/dsa_key_blob.h
@@ -0,0 +1,25 @@
+/* dsa_key_blob.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef DSA_KEY_BLOB_H_
+#define DSA_KEY_BLOB_H_
+
+void load_dsa_key_blob(dsa_ctx_t* ctx);
+
+#endif /* DSA_KEY_BLOB_H_ */
diff --git a/dsa/dsa_sign.c b/dsa/dsa_sign.c
new file mode 100644
index 0000000..e9def2b
--- /dev/null
+++ b/dsa/dsa_sign.c
@@ -0,0 +1,73 @@
+/* dsa_sign.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdint.h>
+#include "cli.h"
+#include "bigint.h"
+#include "dsa.h"
+#include "hashfunction_descriptor.h"
+#include "hfal-basic.h"
+
+uint8_t dsa_sign_bigint(dsa_signature_t* s, const bigint_t* m,
+		                const dsa_ctx_t* ctx, const bigint_t* k){
+	bigint_t tmp, tmp2;
+	uint8_t tmp_b[ctx->domain.p.length_B+5], tmp2_b[ctx->domain.q.length_B+5];
+	tmp.wordv= tmp_b;
+	tmp2.wordv = tmp2_b;
+	bigint_expmod_u(&tmp, &(ctx->domain.g), k, &(ctx->domain.p));
+	bigint_reduce(&tmp, &(ctx->domain.q));
+	bigint_copy(&(s->r), &tmp);
+	bigint_mul_u(&tmp, &tmp, &(ctx->priv));
+	bigint_add_u(&tmp, &tmp, m);
+	bigint_inverse(&tmp2, k, &(ctx->domain.q));
+	bigint_mul_u(&tmp, &tmp, &tmp2);
+	bigint_reduce(&tmp, &(ctx->domain.q));
+	bigint_copy(&(s->s), &tmp);
+
+	if(s->s.length_B==0 || s->r.length_B==0){
+		return 1;
+	}
+
+	return 0;
+}
+
+uint8_t dsa_sign_message(dsa_signature_t* s, const void* m, uint16_t m_len_b,
+		                const hfdesc_t* hash_desc, const dsa_ctx_t* ctx,
+		                const uint8_t(*rand_in)(void)){
+	bigint_t z, k;
+	uint8_t i, n_B = ctx->domain.q.length_B;
+	uint8_t hash_value[(n_B>(hfal_hash_getHashsize(hash_desc)+7)/8)?n_B:(hfal_hash_getHashsize(hash_desc)+7)/8];
+	uint8_t k_b[n_B];
+	hfal_hash_mem(hash_desc, hash_value, m, m_len_b);
+	z.wordv = hash_value;
+	z.length_B = n_B;
+	bigint_changeendianess(&z);
+	k.wordv = k_b;
+	k.length_B = n_B;
+	do{
+		for(i=0; i<n_B; ++i){
+			k_b[i] = rand_in();
+		}
+		k.length_B = n_B;
+		bigint_adjust(&k);
+	}while(dsa_sign_bigint(s, &z, ctx, &k));
+	cli_putstr_P(PSTR("\r\nsignature computed"));
+	return 0;
+}
+
diff --git a/dsa/dsa_verify.c b/dsa/dsa_verify.c
new file mode 100644
index 0000000..1faa68e
--- /dev/null
+++ b/dsa/dsa_verify.c
@@ -0,0 +1,75 @@
+/* dsa_verify.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdint.h>
+#include "bigint.h"
+#include "dsa.h"
+#include "hfal-basic.h"
+
+uint8_t dsa_verify_bigint(const dsa_signature_t* s, const bigint_t* m,
+		                  const dsa_ctx_t* ctx){
+	if(s->r.length_B==0 || s->s.length_B==0){
+		return DSA_SIGNATURE_FAIL;
+	}
+	if(bigint_cmp_u(&(s->r), &(ctx->domain.q))>=0 || bigint_cmp_u(&(s->s), &(ctx->domain.q))>=0){
+		return DSA_SIGNATURE_FAIL;
+	}
+	bigint_t w, u1, u2, v1, v2;
+	uint8_t w_b[ctx->domain.q.length_B], u1_b[ctx->domain.q.length_B*2], u2_b[ctx->domain.q.length_B*2];
+	uint8_t v1_b[ctx->domain.p.length_B*2], v2_b[ctx->domain.p.length_B];
+	w.wordv = w_b;
+	u1.wordv = u1_b;
+	u2.wordv = u2_b;
+	v1.wordv = v1_b;
+	v2.wordv = v2_b;
+	bigint_inverse(&w, &(s->s), &(ctx->domain.q));
+	bigint_mul_u(&u1, &w, m);
+	bigint_reduce(&u1, &(ctx->domain.q));
+	bigint_mul_u(&u2, &w, &(s->r));
+	bigint_reduce(&u2, &(ctx->domain.q));
+	bigint_expmod_u(&v1, &(ctx->domain.g), &u1, &(ctx->domain.p));
+	bigint_expmod_u(&v2, &(ctx->pub), &u2, &(ctx->domain.p));
+	bigint_mul_u(&v1, &v1, &v2);
+	bigint_reduce(&v1, &(ctx->domain.p));
+	bigint_reduce(&v1, &(ctx->domain.q));
+	if(bigint_cmp_u(&v1, &(s->r))==0){
+		return DSA_SIGNATURE_OK;
+	}
+	return DSA_SIGNATURE_FAIL;
+}
+
+uint8_t dsa_verify_message(const dsa_signature_t* s, const void* m, uint16_t m_len_b,
+						  const hfdesc_t* hash_desc, const dsa_ctx_t* ctx){
+	bigint_t z;
+	uint8_t n_B = ctx->domain.q.length_B;
+	uint8_t hash_value[(hfal_hash_getHashsize(hash_desc)+7)/8];
+	hfal_hash_mem(hash_desc, hash_value, m, m_len_b);
+	z.wordv=hash_value;
+	z.length_B=n_B;
+	bigint_changeendianess(&z);
+	bigint_adjust(&z);
+	return dsa_verify_bigint(s, &z, ctx);
+}
+
+
+
+
+
+
+
diff --git a/dsa/memxor.S b/dsa/memxor.S
new file mode 100644
index 0000000..a32058b
--- /dev/null
+++ b/dsa/memxor.S
@@ -0,0 +1,66 @@
+/* memxor.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * File:        memxor.S
+ * Author:      Daniel Otte
+ * Date:        2008-08-07
+ * License:     GPLv3 or later
+ * Description: memxor, XORing one block into another
+ *
+ */
+
+/*
+ * void memxor(void* dest, const void* src, uint16_t n);
+ */
+ /*
+  * param dest is passed in r24:r25
+  * param src  is passed in r22:r23
+  * param n    is passed in r20:r21
+  */
+.global memxor
+memxor:
+	movw r30, r24
+	movw r26, r22
+	movw r24, r20
+	adiw r24, 0
+	breq 2f
+1:
+	ld r20, X+
+	ld r21, Z
+	eor r20, r21
+	st Z+, r20
+	sbiw r24, 1
+	brne 1b
+2:
+	ret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dsa/memxor.h b/dsa/memxor.h
new file mode 100644
index 0000000..a62a616
--- /dev/null
+++ b/dsa/memxor.h
@@ -0,0 +1,7 @@
+#ifndef MEMXOR_H_
+#define MEMXOR_H_
+#include <stdint.h>
+
+void memxor(void* dest, const void* src, uint16_t n);
+
+#endif
diff --git a/dsa/noekeon.h b/dsa/noekeon.h
new file mode 100644
index 0000000..9c046f3
--- /dev/null
+++ b/dsa/noekeon.h
@@ -0,0 +1,85 @@
+/* noekeon.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef NOEKEON_H_
+#define NOEKEON_H_
+
+/**
+ * \file    noekeon.h
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date    2008-04-11
+ * \license GPLv3 or later
+ * \brief Implementation of the Noekeon block cipher
+ * \ingroup Noekeon
+ * This is an implementation of the Noekeon block cipher.
+ * For more details on Noekeon see http://gro.noekeon.org/
+ */
+
+#include <stdint.h>
+
+/** \typedef noekeon_ctx_t
+ * \brief holds key data for indirect mode
+ *  
+ * A variable of this type may hold the key data for the indirect mode.
+ * For direct mode simply pass the key directly to the encryption or
+ * decryption function.
+ */
+typedef uint8_t noekeon_ctx_t[16];
+
+/** \fn void noekeon_enc(void* buffer, const void* key)
+ * \brief noekeon encrytion funtion
+ * 
+ * This function encrypts a block (64 bit = 8 byte) with the noekeon encrytion
+ * algorithm. Due to the two modes of noekeon (direct mode and indirect mode)
+ * the second parameter either points directly to the key (direct mode) or to a
+ * context generated by the noekeon_init() function (indirect mode).
+ * \param buffer pointer to the 64 bit (8 byte) block to encrypt
+ * \param key    pointer to either the key (128 bit = 16 byte; direct mode) or 
+ * to the context (indirect mode)
+ */
+void noekeon_enc(void* buffer, const void* key);
+
+/** \fn void noekeon_dec(void* buffer, const void* key)
+ * \brief noekeon encrytion funtion
+ * 
+ * This function decrypts a block (64 bit = 8 byte) encrypted with the noekeon 
+ * encrytion algorithm. Due to the two modes of noekeon (direct mode and 
+ * indirect mode) the second parameter either points directly to the key 
+ * (direct mode) or to a context generated by the noekeon_init() function 
+ * (indirect mode).
+ * \param buffer pointer to the 64 bit (8 byte) block to decrypt
+ * \param key    pointer to either the key (128 bit = 16 byte; direct mode) or 
+ * to the context (indirect mode)
+ */
+void noekeon_dec(void* buffer, const void* key);
+
+
+/** \fn void noekeon_init(const void* key, noekeon_ctx_t* ctx)
+ * \brief noekeon context generation function for indirect mode
+ * 
+ * This function generates a context from the supplied key for using
+ * noekeon in indirect mode. For using noekeon in direct mode supply the key
+ * direct to the noekeon_enc() and noekeon_dec() functions.
+ * \param key pointer to the key (128 bit = 16 byte)
+ * \param ctx pointer to the context to fill with key material 
+ * to the context (indirect mode)
+ */
+void noekeon_init(const void* key, noekeon_ctx_t* ctx);
+
+#endif /*NOEKEON_H_*/
diff --git a/dsa/noekeon_asm.S b/dsa/noekeon_asm.S
new file mode 100644
index 0000000..b0a2a16
--- /dev/null
+++ b/dsa/noekeon_asm.S
@@ -0,0 +1,641 @@
+/* noekeon_asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * noekeon assembler implementation for avr
+ * author: Daniel Otte
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ */
+
+#include <avr/io.h>
+
+.macro push_all
+	push r2
+	push r3
+	push r4
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+.endm
+
+.macro pop_all
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4
+	pop r3
+	pop r2
+	clr r1
+.endm
+
+push_all_func:
+	pop r31
+	pop r30
+	push_all
+	ijmp
+
+pop_all_func:
+    pop r31
+    pop r30
+    pop_all
+    ijmp
+
+.macro xchg a b
+	eor \a, \b
+	eor \b, \a
+	eor \a, \b
+.endm
+
+.macro op32 op a b
+	\op \a\()_0, \b\()_0
+	\op \a\()_1, \b\()_1
+	\op \a\()_2, \b\()_2
+	\op \a\()_3, \b\()_3
+.endm
+
+
+.macro op32_4t op a b c d w x y z
+	\op \a, \w
+	\op \b, \x
+	\op \c, \y
+	\op \d, \z
+.endm
+
+
+.macro op32_prefix op p q a b c d w x y z
+	\op \p\()\a, \q\()\w
+	\op \p\()\b, \q\()\x
+	\op \p\()\c, \q\()\y
+	\op \p\()\d, \q\()\z
+.endm
+
+; === bigendian_rotl32 ===
+; this function rotates a 32bit bigendian word n bits to the left
+;  param1: the 32-bit value
+;	given in r25,r24,r23,r22 (r22 is most significant)
+;  param2: the 8-bit parameter giving the number of bits to rotate
+;	given in r20
+;  return: the rotatet 32-bit word
+;   given in r25,r24,r23,r22   
+
+bigendian_rotl32:
+	/* copy high bit of r22 to carry */
+	mov r1, r22
+2:
+	rol r1
+
+	rol r25
+	rol r24
+	rol r23
+	rol r22
+	
+	dec r20
+	brne 2b
+bigendian_rotl32_exit:
+	clr r1
+	ret
+	
+	
+/******************************************************************************/
+
+; === bigendian_rotl32 ===
+; this function rotates a 32bit bigendian word n bits to the right
+;  param1: the 32-bit value
+;	given in r25,r24,r23,r22 (r22 is most significant)
+;  param2: the 8-bit parameter giving the number of bits to rotate
+;	given in r20
+;  return: the rotatet 32-bit word
+;   given in r25,r24,r23,r22   
+
+bigendian_rotr32:
+	/* copy high bit of r25 to carry */
+
+	mov r1, r25
+2:
+	ror r1
+	
+	ror r22
+	ror r23
+	ror r24
+	ror r25
+	dec r20
+	brne 2b
+bigendian_rotr32_exit:
+	clr r1
+	ret
+
+/******************************************************************************/
+/*
+void theta(uint32_t* k, uint32_t* a){
+	uint32_t temp;
+	temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
+	a[1] ^= temp;
+	a[3] ^= temp;
+	
+	a[0] ^= k[0];
+	a[1] ^= k[1];
+	a[2] ^= k[2];
+	a[3] ^= k[3];
+
+	temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
+	a[0] ^= temp;
+	a[2] ^= temp;	
+}
+*/
+
+round_const: .byte    0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \
+	            0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \
+                0xD4
+
+;-- a[0]
+state0_0 =  2
+state0_1 =  3
+state0_2 =  4
+state0_3 =  5
+;-- a[1]
+state1_0 =  6
+state1_1 =  7
+state1_2 =  8
+state1_3 =  9
+;-- a[2]
+state2_0 = 10
+state2_1 = 11
+state2_2 = 12
+state2_3 = 13
+;-- a[3]
+state3_0 = 14
+state3_1 = 15
+state3_2 = 16
+state3_3 = 17
+
+; === theta ===
+;
+;  param1: the state in r2-r17
+;  param2: pointer to k in X (r26,r27)
+;
+temp_a = 18
+temp_b = 19
+temp_c = 20
+temp_d = 21
+
+theta:
+	/* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */
+	op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3
+	op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3
+
+	mov r1, temp_a
+	eor r1, temp_b
+	eor r1, temp_c
+	eor r1, temp_d
+	
+	op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1
+
+	/* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
+	/* a[1] ^= temp */
+	eor state1_0, temp_c 
+	eor state1_1, temp_d 
+	eor state1_2, temp_a 
+	eor state1_3, temp_b
+	/* a[3] ^= temp */
+	eor state3_0, temp_c 
+	eor state3_1, temp_d 
+	eor state3_2, temp_a 
+	eor state3_3, temp_b
+	
+	/* state ^ k (X points to K) */
+	ldi r28, 2
+	clr r29 /* Y points to r2 aka state0_0 */
+	ldi temp_a, 16
+1:	
+	ld r1, X+
+	ld r0, Y
+	eor r1, r0
+	st Y+, r1
+	dec temp_a
+	brne 1b
+	sbiw r26, 16 /* set X back to key */ 
+	
+	mov temp_a, state1_0
+	mov temp_b, state1_1
+	mov temp_c, state1_2
+	mov temp_d, state1_3
+	eor temp_a, state3_0
+	eor temp_b, state3_1
+	eor temp_c, state3_2
+	eor temp_d, state3_3
+	mov r1, temp_a
+	eor r1, temp_b
+	eor r1, temp_c
+	eor r1, temp_d
+	eor temp_a, r1
+	eor temp_b, r1
+	eor temp_c, r1
+	eor temp_d, r1
+	/* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
+	/* a[0] ^= temp */
+	eor state0_0, temp_c 
+	eor state0_1, temp_d 
+	eor state0_2, temp_a 
+	eor state0_3, temp_b
+	/* a[2] ^= temp */
+	eor state2_0, temp_c 
+	eor state2_1, temp_d 
+	eor state2_2, temp_a 
+	eor state2_3, temp_b
+	
+	clr r1
+	ret
+
+/******************************************************************************/
+#ifndef NOEKEON_NO_ENC
+; === noekeon_enc ===
+;
+;  param1: pointer to buffer (r24,r25)
+;  param2: pointer to k (r22,r23) 
+;
+.global noekeon_enc
+noekeon_enc:
+	rcall push_all_func
+	/* load state */
+	movw r26, r22
+	ldi r28, 2
+	clr r29	/* Y points at r2 aka state0_0 */
+	movw r30, r24 /* Z points at state */
+	push r30
+	push r31
+	ldi r22, 16
+	push r22 /* 16 is also the number of rounds and gets pushed here */
+1:	
+	ld r0, Z+
+	st Y+, r0
+	dec r22
+	brne 1b
+	/* state loaded */
+	push r1 /* push round constan2 (0x00) */
+	ldi r20, 0x80
+	push r20 /* push round constan2 (0x00) */
+	rjmp 3f
+2:
+	ldi r30, lo8(round_const+15)
+	ldi r31, hi8(round_const+15)
+	sub r30, r22
+	sbci r31, 0
+	clr r1
+	push r1
+	lpm r0, Z
+	push r0
+3:
+	rcall round /* pops rc2 & rc1 */
+	pop r22
+	dec r22
+	push r22
+	brne 2b
+
+	pop r22
+
+	ldi r22, 0xD4
+	eor state0_3, r22
+	rcall theta
+
+	pop r31
+	pop r30
+	clr r29
+	ldi r28, 2
+	ldi r22, 16
+1:	
+	ld r0, Y+
+	st Z+, r0
+	dec r22
+	brne 1b
+	
+	rcall pop_all_func
+	ret
+#endif
+/******************************************************************************/
+/******************************************************************************/
+#ifndef NOEKEON_NO_DEC
+
+; === noekeon_dec ===
+;
+;  param1: pointer to buffer/state (r24,r25)
+;  param2: pointer to k  (r22,r23) 
+;
+.global noekeon_dec
+noekeon_dec:
+	rcall push_all_func
+	/* allocate 16 bytes on the stack */
+	in r30, _SFR_IO_ADDR(SPL)
+	in r31, _SFR_IO_ADDR(SPH)
+	sbiw r30, 16 
+	out  _SFR_IO_ADDR(SPH), r31
+	out  _SFR_IO_ADDR(SPL), r30
+
+	adiw r30, 1
+	/* push state pointer */
+	push r24
+	push r25
+	movw r26, r22 /* move key ptr to X */
+
+	/* set stackkey to zero */
+	ldi r22, 16
+1:	st Z+, r1
+	dec r22
+	brne 1b
+		
+	/* copy key to state */
+	clr r29
+	ldi r28, 2
+	ldi r22, 16
+1:  ld r0, X+
+	st Y+, r0
+	dec r22
+	brne 1b	
+
+	movw r26, r30
+	sbiw r26, 16 /* set X back to begining of stack key */
+	rcall theta
+	
+	/* mov state to stackkey */
+	clr r29
+	ldi r28,  2
+	ldi r22, 16
+1:	ld r0, Y+
+	st X+, r0	
+	dec r22
+	brne 1b
+	sbiw r26, 16 /* set X back to begining of stack key */
+	
+	/* move data from stateptr to state */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	clr r29
+	ldi r28,  2
+	ldi r22, 16
+	push r22
+1:	ld r0, Z+
+	st Y+, r0
+	dec r22
+	brne 1b	
+	
+;--- snip 8< ----
+	
+	ldi r20, 0xD4
+	push r20 /* push round constant2 (0xD4) */
+	push r22 /* push round constan1 (0x00) */
+	rjmp 3f
+2:
+	ldi r30, lo8(round_const-1)
+	ldi r31, hi8(round_const-1)
+	clr r1
+	add r30, r22
+	adc r31, r1
+	lpm r0, Z
+	push r0
+	push r1
+3:
+	rcall round /* pops rc2 & rc1 */
+	pop r22
+	dec r22
+	push r22
+	brne 2b
+;----
+	pop r22
+
+	rcall theta
+	ldi r22, 0x80
+	eor state0_3, r22
+	
+write_state_back:	
+	/* write state back */
+	pop r31 /* pop state pointer */
+	pop r30
+	clr r29
+	ldi r28, 2
+	ldi r22, 16
+1:	
+	ld r0, Y+
+	st Z+, r0
+	dec r22
+	brne 1b
+	
+	/* remove key from stack */
+	in r30, _SFR_IO_ADDR(SPL)
+	in r31, _SFR_IO_ADDR(SPH)
+	adiw r30, 16 
+	out  _SFR_IO_ADDR(SPH), r31
+	out  _SFR_IO_ADDR(SPL), r30
+	rcall pop_all_func
+	ret
+#endif
+/******************************************************************************/
+	
+	
+round:	
+	pop r24
+	pop r25
+	pop r1
+	eor state0_3, r1
+	rcall theta
+	pop r1
+	eor state0_3, r1
+	push r25
+	push r24
+pi_gamma_pi:
+	ldi r30, pm_lo8(bigendian_rotl32)
+	ldi r31, pm_hi8(bigendian_rotl32)
+	rcall pi
+	/* pi1 done; now gamma */
+	rcall gamma_1
+	/* a[0] <-> a[3] */
+	xchg state0_0, state3_0
+	xchg state0_1, state3_1
+	xchg state0_2, state3_2
+	xchg state0_3, state3_3
+	/* a[2] ^= a[0] ^ a[1] ^ a[3] */
+	op32 eor, state2, state0
+	op32 eor, state2, state1
+	op32 eor, state2, state3
+
+	rcall gamma_1
+	ldi r30, pm_lo8(bigendian_rotr32)
+	ldi r31, pm_hi8(bigendian_rotr32)
+	rcall pi
+	ret
+	
+gamma_1:
+	/* a[1] ^= ~(a[3]|a[2])*/
+	mov r1, state3_0
+	or  r1, state2_0
+	com r1
+	eor state1_0, r1
+
+	mov r1, state3_1
+	or  r1, state2_1
+	com r1
+	eor state1_1, r1
+
+	mov r1, state3_2
+	or  r1, state2_2
+	com r1
+	eor state1_2, r1
+	
+	mov r1, state3_3
+	or  r1, state2_3
+	com r1
+	eor state1_3, r1
+	
+	/* a[0] ^= a[2]&a[1] */
+	mov r1, state2_0
+	and r1, state1_0
+	eor state0_0, r1
+	
+	mov r1, state2_1
+	and r1, state1_1
+	eor state0_1, r1
+	
+	mov r1, state2_2
+	and r1, state1_2
+	eor state0_2, r1
+	
+	mov r1, state2_3
+	and r1, state1_3
+	eor state0_3, r1
+	ret
+	
+pi:	
+	/* a[1] <<<= 1*/
+	mov r22, state1_0
+	mov r23, state1_1
+	mov r24, state1_2
+	mov r25, state1_3
+	ldi r20, 1
+	icall
+	mov state1_0, r22
+	mov state1_1, r23
+	mov state1_2, r24
+	mov state1_3, r25
+	/* a[2] <<<= 5*/
+	mov r22, state2_0
+	mov r23, state2_1
+	mov r24, state2_2
+	mov r25, state2_3
+	ldi r20, 5
+	icall
+	mov state2_0, r22
+	mov state2_1, r23
+	mov state2_2, r24
+	mov state2_3, r25
+	/* a[3] <<<= 2*/
+	mov r22, state3_0
+	mov r23, state3_1
+	mov r24, state3_2
+	mov r25, state3_3
+	ldi r20, 2
+	icall
+	mov state3_0, r22
+	mov state3_1, r23
+	mov state3_2, r24
+	mov state3_3, r25
+	ret
+
+/******************************************************************************/
+	
+/*
+void noekeon_init(void* key, noekeon_ctx_t* ctx){
+	uint8_t nullv[16];
+	
+	memset(nullv, 0, 16);
+	memcpy(ctx, key, 16);
+	noekeon_enc(ctx, nullv);
+}
+*/
+
+#ifndef NOEKEON_NO_INIT
+
+.global noekeon_init
+noekeon_init:
+; === noekeon_init ===
+;
+;  param1: pointer to key (r24,r25)
+;  param2: pointer to context  (r22,r23) 
+;
+	in r30, _SFR_IO_ADDR(SPL)
+	in r31, _SFR_IO_ADDR(SPH)
+	sbiw r30, 16 
+	out  _SFR_IO_ADDR(SPH), r31
+	out  _SFR_IO_ADDR(SPL), r30
+
+	movw r26, r22
+	adiw r30, 1
+	movw r22, r30
+	/* set nullv(stack) to zero */
+	ldi r20, 16
+1:	st Z+, r1
+	dec r20
+	brne 1b
+
+	/* copy key data to ctx */
+	movw r30, r24
+	ldi r20, 16
+1:	ld r1, Z+
+	st X+, r1
+	dec r20
+	brne 1b
+	clr r1
+	
+	sbiw r26, 16
+	movw r24, r26
+	rcall noekeon_enc
+	
+	in r30, _SFR_IO_ADDR(SPL)
+	in r31, _SFR_IO_ADDR(SPH)
+	adiw r30, 16 
+	out  _SFR_IO_ADDR(SPH), r31
+	out  _SFR_IO_ADDR(SPL), r30	
+	ret
+	
+#endif
+
+
diff --git a/dsa/noekeon_prng.c b/dsa/noekeon_prng.c
new file mode 100644
index 0000000..5b5bc29
--- /dev/null
+++ b/dsa/noekeon_prng.c
@@ -0,0 +1,75 @@
+/* noekeon_prng.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/**
+ * \author	Daniel Otte
+ * \date	2008-08-24
+ * \license GPLv3 or later
+ * \brief   random number generator based on noekeon running in CFB-mode
+ * 
+ */
+
+#include "noekeon.h"
+#include "memxor.h"
+#include <stdint.h>
+#include <string.h>
+
+static uint8_t random_state[16];
+static uint8_t random_key[16];
+static uint8_t i=0;
+
+uint8_t random8(void){
+	static uint8_t sr[16];
+	
+	if(i==0){
+		noekeon_enc(random_state, random_key);
+		memcpy(sr, random_state, 16);
+		i=15;
+		return sr[15];
+	}
+	--i;
+	return sr[i];
+}
+
+void random_block(void* dest){
+	i=0;
+	noekeon_enc(random_state, random_key);
+	memcpy(dest, random_state, 16);
+}
+
+void srandom32(uint32_t seed){
+	memcpy(random_key, &seed, 4);
+	memset(random_key+4, 0, 12);
+	memset(random_state, 0, 16);
+	i=0;
+}
+
+void random_seed(const void* buffer){
+	memcpy(random_key, buffer, 16);
+	memset(random_state, 0, 16);
+	i=0;
+}
+
+void random_add(const void* buffer){
+	i=0;
+	noekeon_enc(random_state, random_key);
+	memxor(random_key, random_state, 16);
+	memxor(random_key, buffer, 16);
+}
+
+
diff --git a/dsa/noekeon_prng.h b/dsa/noekeon_prng.h
new file mode 100644
index 0000000..ff9ca03
--- /dev/null
+++ b/dsa/noekeon_prng.h
@@ -0,0 +1,40 @@
+/* noekeon_prng.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/**
+ * \author	Daniel Otte
+ * \date	2008-08-24
+ * \license GPLv3 or later
+ * \brief   random number generator based on noekeon running in CFB-mode
+ * 
+ */
+
+#ifndef PRNG_H_
+#define PRNG_H_
+
+#include <stdint.h>
+
+uint8_t random8(void);
+void random_block(void* dest);
+void srandom32(uint32_t seed);
+void random_seed(const void* buffer);
+void random_add(const void* buffer);
+
+#endif /* PRNG_H_*/
+
+
diff --git a/dsa/sha1-asm.S b/dsa/sha1-asm.S
new file mode 100644
index 0000000..4d185f9
--- /dev/null
+++ b/dsa/sha1-asm.S
@@ -0,0 +1,883 @@
+/* sha1-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; SHA1 implementation in assembler for AVR
+SHA1_BLOCK_BITS = 512
+SHA1_HASH_BITS = 160
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+.macro delay
+/*
+	push r0
+	push r1
+	clr r0
+1:	clr r1
+2:	dec r1
+	brne 2b
+	dec r0
+	brne 1b
+	pop r1
+	pop r0  // */
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+/*
+	precall
+	hexdump \length
+	postcall
+	// */
+.endm
+
+
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha1_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha1_ctx2hash
+; === sha1_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha1_ctx structure
+;	given in r23,r22
+sha1_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 5
+	sbiw r26, 4
+1:
+	ldi r20, 4
+	adiw r26, 8
+2:
+		ld r0, -X
+		st Z+, r0
+	dec r20
+	brne 2b
+
+	dec r21
+	brne 1b
+
+	ret
+
+;###########################################################
+
+.global sha1
+; === sha1 ===
+; this function calculates SHA-1 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha1:
+sha1_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r30, SPL
+	in r31, SPH
+	sbiw r30, 5*4+8
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SREG, r0
+	out SPH, r31
+
+	push r25
+	push r24
+	adiw r30, 1
+	movw r16, r30
+
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+
+	movw r12, r22	/* backup pf msg-ptr */
+
+	movw r24, r16
+	rcall sha1_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 2f
+	tst r10
+	breq 4f
+2:
+	movw r24, r16
+	movw r22, r12
+	rcall sha1_nextBlock
+	ldi r19, 64
+	add r12, r19
+	adc r13, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha1_lastBlock
+
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha1_ctx2hash
+
+sha1_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 5*4+8
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SREG, r0
+	out SPH, r31
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha1_lastBlock
+; === sha1_lastBlock ===
+; this function does padding & Co. for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
+
+
+sha1_lastBlock:
+	cpi r21, 0x02
+	brlo sha1_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 2
+	ldi r19, 64
+	sub r22, r19
+	sbc r23, r1
+	rjmp sha1_lastBlock
+sha1_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r0, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64) /* ??? */
+	cli
+	out SPL, r30
+	out SREG, r0
+	out SPH, r31
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+
+
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha1_lastBlock_post_copy
+	mov r1, r18
+sha1_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha1_lastBlock_copy_loop
+sha1_lastBlock_post_copy:
+sha1_lastBlock_insert_stuffing_bit:
+	ldi r19, 0x80
+	mov r0,r19
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f
+	rjmp sha1_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:
+	st Z+, r1
+	dec r19
+	brne 1b
+2:
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*5+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha1_lastBlock_insert_zeros:
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha1_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha1_lastBlock_epilog
+sha1_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 5*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha1_nextBlock
+
+sha1_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r0, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SREG, r0
+	out SPH, r31
+	clr r1
+	ret
+
+/**/
+;###########################################################
+
+.global sha1_nextBlock
+; === sha1_nextBlock ===
+; this is the core function for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
+
+xtmp = 0
+xNULL = 1
+W1 = 10
+W2 = 11
+T1	= 12
+T2	= 13
+T3	= 14
+T4	= 15
+LoopC = 16
+S	  = 17
+tmp1 = 18
+tmp2 = 19
+tmp3 = 20
+tmp4 = 21
+F1 = 22
+F2 = 23
+F3 = 24
+F4 = 25
+
+/* byteorder: high number <--> high significance */
+sha1_nextBlock:
+ ; initial, let's make some space ready for local vars
+ 			 /* replace push & pop by mem ops? */
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack /* maybe removeable? */
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha1_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SREG, r0
+	out SPH, r21
+
+	push r18
+	push r19 /* push old SP on new stack */
+	push r24
+	push r25 /* param1 will be needed later */
+
+	/* load a[] with state */
+	movw 28, r24 /* load pointer to state in Y */
+	adiw r26, 1 ; X++
+
+	ldi LoopC, 5*4
+1:	ld tmp1, Y+
+	st X+, tmp1
+	dec LoopC
+	brne 1b
+
+	movw W1, r26 /* save pointer to w[0] */
+	/* load w[] with endian fixed message */
+		/* we might also use the changeendian32() function at bottom */
+	movw r30, r22 /* mv param2 (ponter to msg) to Z */
+	ldi LoopC, 16
+1:
+	ldd tmp1, Z+3
+	st X+, tmp1
+	ldd tmp1, Z+2
+	st X+, tmp1
+	ldd tmp1, Z+1
+	st X+, tmp1
+	ld tmp1, Z
+	st X+, tmp1
+	adiw r30, 4
+	dec LoopC
+	brne 1b
+
+	;clr LoopC /* LoopC is named t in FIPS 180-2 */
+	clr xtmp
+sha1_nextBlock_mainloop:
+	mov S, LoopC
+	lsl S
+	lsl S
+	andi S, 0x3C /* S is a bytepointer so *4 */
+	/* load w[s] */
+	movw r26, W1
+	add r26, S /* X points at w[s] */
+	adc r27, xNULL
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	ld T4, X+
+
+/*
+	push r26
+	push r27
+	push T4
+	push T3
+	push T2
+	push T1
+	in r26, SPL
+	in r27, SPH
+	adiw r26, 1
+	dbg_hexdump 4
+	pop T1
+	pop T2
+	pop T3
+	pop T4
+	pop r27
+	pop r26
+*/
+
+	cpi LoopC, 16
+	brlt sha1_nextBlock_mainloop_core
+	/* update w[s] */
+	ldi tmp1, 2*4
+	rcall 1f
+	ldi tmp1, 8*4
+	rcall 1f
+	ldi tmp1, 13*4
+	rcall 1f
+	rjmp 2f
+1:		/* this might be "outsourced" to save the jump above */
+	add tmp1, S
+	andi tmp1, 0x3f
+	movw r26, W1
+	add r26, tmp1
+	adc r27, xNULL
+	ld tmp2, X+
+	eor T1, tmp2
+	ld tmp2, X+
+	eor T2, tmp2
+	ld tmp2, X+
+	eor T3, tmp2
+	ld tmp2, X+
+	eor T4, tmp2
+	ret
+2:	/* now we just hav to do a ROTL(T) and save T back */
+	mov tmp2, T4
+	rol tmp2
+	rol T1
+	rol T2
+	rol T3
+	rol T4
+	movw r26, W1
+	add r26, S
+	adc r27, xNULL
+	st X+, T1
+	st X+, T2
+	st X+, T3
+	st X+, T4
+
+sha1_nextBlock_mainloop_core:	/* ther core function; T=ROTL5(a) ....*/
+								/* T already contains w[s] */
+	movw r26, W1
+	sbiw r26, 4*1		/* X points at a[4] aka e */
+	ld tmp1, X+
+	add T1, tmp1
+	ld tmp1, X+
+	adc T2, tmp1
+	ld tmp1, X+
+	adc T3, tmp1
+	ld tmp1, X+
+	adc T4, tmp1		/* T = w[s]+e */
+	sbiw r26, 4*5		/* X points at a[0] aka a */
+	ld F1, X+
+	ld F2, X+
+	ld F3, X+
+	ld F4, X+
+	mov tmp1, F4		/* X points at a[1] aka b */
+	ldi tmp2, 5
+1:
+	rol tmp1
+	rol F1
+	rol F2
+	rol F3
+	rol F4
+	dec tmp2
+	brne 1b
+
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
+
+	/* now we have to do this fucking conditional stuff */
+	ldi r30, lo8(sha1_nextBlock_xTable)
+	ldi r31, hi8(sha1_nextBlock_xTable)
+	add r30, xtmp
+	adc r31, xNULL
+	lpm tmp1, Z
+	cp tmp1, LoopC
+	brne 1f
+	inc xtmp
+1:	ldi r30, lo8(sha1_nextBlock_KTable)
+	ldi r31, hi8(sha1_nextBlock_KTable)
+	lsl xtmp
+	lsl xtmp
+	add r30, xtmp
+	adc r31, xNULL
+	lsr xtmp
+	lsr xtmp
+
+	lpm tmp1, Z+
+	add T1, tmp1
+	lpm tmp1, Z+
+	adc T2, tmp1
+	lpm tmp1, Z+
+	adc T3, tmp1
+	lpm tmp1, Z+
+	adc T4, tmp1
+			/* T = ROTL(a,5) + e + kt + w[s] */
+
+	/* Z-4 is just pointing to kt ... */
+	movw r28, r26 /* copy X in Y */
+	adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
+	lsr r31
+	ror r30
+
+	icall
+	mov F1, tmp1
+	icall
+	mov F2, tmp1
+	icall
+	mov F3, tmp1
+	icall
+
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
+				 /* X points still at a[1] aka b, Y points at a[2] aka c */
+	/* update a[] */
+sha1_nextBlock_update_a:
+	/*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
+	//adiw r28, 3*4  /* Y should point at a[4] aka e */
+	movw r28, W1
+	sbiw r28, 4
+
+	ldi tmp2, 4*4
+1:
+	ld tmp1, -Y
+	std Y+4, tmp1
+	dec tmp2
+	brne 1b
+	/* Y points at a[0] aka a*/
+
+	movw r28, W1
+	sbiw r28, 5*4
+	/* store T in a[0] aka a */
+	st Y+, T1
+	st Y+, T2
+	st Y+, T3
+	st Y+, T4
+	/* Y points at a[1] aka b*/
+
+	/* rotate c */
+	ldd T1, Y+1*4
+	ldd T2, Y+1*4+1
+	ldd T3, Y+1*4+2
+	ldd T4, Y+1*4+3
+	mov tmp1, T1
+	ldi tmp2, 2
+1:	ror tmp1
+	ror T4
+	ror T3
+	ror T2
+	ror T1
+	dec tmp2
+	brne 1b
+	std Y+1*4+0, T1
+	std Y+1*4+1, T2
+	std Y+1*4+2, T3
+	std Y+1*4+3, T4
+/*
+	push r27
+	push r26
+	movw r26, W1
+	sbiw r26, 4*5
+	dbg_hexdump 4*5
+	pop r26
+	pop r27
+*/
+	inc LoopC
+	cpi LoopC, 80
+	brge 1f
+	rjmp sha1_nextBlock_mainloop
+/**************************************/
+1:
+   /* littel patch */
+	sbiw r28, 4
+
+/* add a[] to state and inc length */
+	pop r27
+	pop r26		/* now X points to state (and Y still at a[0]) */
+	ldi tmp4, 5
+1:	clc
+	ldi tmp3, 4
+2:	ld tmp1, X
+	ld tmp2, Y+
+	adc tmp1, tmp2
+	st X+, tmp1
+	dec tmp3
+	brne 2b
+	dec tmp4
+	brne 1b
+
+	/* now length += 512 */
+	adiw r26, 1 /* we skip the least significant byte */
+	ld tmp1, X
+	ldi tmp2, hi8(512) /* 2 */
+	add tmp1, tmp2
+	st X+, tmp1
+	ldi tmp2, 6
+1:
+	ld tmp1, X
+	adc tmp1, xNULL
+	st X+, tmp1
+	dec tmp2
+	brne 1b
+
+; EPILOG
+sha1_nextBlock_epilog:
+/* now we should clean up the stack */
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SREG, r0
+	out SPH, r21
+
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	ret
+
+sha1_nextBlock_xTable:
+.byte 20,40,60,0
+sha1_nextBlock_KTable:
+.int	0x5a827999
+.int	0x6ed9eba1
+.int	0x8f1bbcdc
+.int	0xca62c1d6
+sha1_nextBlock_JumpTable:
+rjmp sha1_nextBlock_Ch
+	nop
+rjmp sha1_nextBlock_Parity
+	nop
+rjmp sha1_nextBlock_Maj
+	nop
+rjmp sha1_nextBlock_Parity
+
+	 /* X and Y still point at a[1] aka b ; return value in tmp1 */
+sha1_nextBlock_Ch:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	com tmp2
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp3, Y+7	/* load from d */
+	and tmp2, tmp3
+	eor tmp1, tmp2
+	ret
+
+sha1_nextBlock_Maj:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp4, Y+7	/* load from d */
+	and tmp2, tmp4
+	eor tmp1, tmp2
+	and tmp3, tmp4
+	eor tmp1, tmp3
+	ret
+
+sha1_nextBlock_Parity:
+	ld tmp1, Y+
+	ldd tmp2, Y+3	/* load from c */
+	eor tmp1, tmp2
+	ldd tmp2, Y+7	/* load from d */
+	eor tmp1, tmp2
+	ret
+/*
+ch_str:			.asciz "\r\nCh"
+maj_str:		.asciz "\r\nMaj"
+parity_str:	.asciz "\r\nParity"
+*/
+;###########################################################
+
+.global sha1_init
+;void sha1_init(sha1_ctx_t *state){
+;	DEBUG_S("\r\nSHA1_INIT");
+;	state->h[0] = 0x67452301;
+;	state->h[1] = 0xefcdab89;
+;	state->h[2] = 0x98badcfe;
+;	state->h[3] = 0x10325476;
+;	state->h[4] = 0xc3d2e1f0;
+;	state->length = 0;
+;}
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha1_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha1_init_vector))
+	ldi r31, hi8((sha1_init_vector))
+	ldi r22, 5*4 /* bytes to copy */
+sha1_init_vloop:
+	lpm r23, Z+
+	st X+, r23
+	dec r22
+	brne sha1_init_vloop
+	ldi r22, 8
+sha1_init_lloop:
+	st X+, r1
+	dec r22
+	brne sha1_init_lloop
+	ret
+
+sha1_init_vector:
+.int 0x67452301;
+.int 0xefcdab89;
+.int 0x98badcfe;
+.int 0x10325476;
+.int 0xc3d2e1f0;
+
diff --git a/dsa/sha1.h b/dsa/sha1.h
new file mode 100644
index 0000000..6675d20
--- /dev/null
+++ b/dsa/sha1.h
@@ -0,0 +1,117 @@
+/* sha1.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha1.h
+ * \author	Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date	2006-10-08
+ * \license GPLv3 or later
+ * \brief   SHA-1 declaration.
+ * \ingroup SHA-1
+ * 
+ */
+ 
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include <stdint.h>
+/** \def SHA1_HASH_BITS
+ * definees the size of a SHA-1 hash in bits 
+ */
+
+/** \def SHA1_HASH_BYTES
+ * definees the size of a SHA-1 hash in bytes 
+ */
+
+/** \def SHA1_BLOCK_BITS
+ * definees the size of a SHA-1 input block in bits 
+ */
+
+/** \def SHA1_BLOCK_BYTES
+ * definees the size of a SHA-1 input block in bytes 
+ */
+#define SHA1_HASH_BITS  160
+#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
+#define SHA1_BLOCK_BITS 512
+#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
+
+/** \typedef sha1_ctx_t
+ * \brief SHA-1 context type
+ * 
+ * A vatiable of this type may hold the state of a SHA-1 hashing process
+ */
+typedef struct {
+	uint32_t h[5];
+	uint64_t length;
+} sha1_ctx_t;
+
+/** \typedef sha1_hash_t
+ * \brief hash value type
+ * A variable of this type may hold a SHA-1 hash value 
+ */
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+/** \fn sha1_init(sha1_ctx_t *state)
+ * \brief initializes a SHA-1 context
+ * This function sets a ::sha1_ctx_t variable to the initialization vector
+ * for SHA-1 hashing.
+ * \param state pointer to the SHA-1 context variable
+ */
+void sha1_init(sha1_ctx_t *state);
+
+/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
+ *  \brief process one input block
+ * This function processes one input block and updates the hash context 
+ * accordingly
+ * \param state pointer to the state variable to update
+ * \param block pointer to the message block to process
+ */
+void sha1_nextBlock (sha1_ctx_t *state, const void* block);
+
+/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
+ * \brief processes the given block and finalizes the context
+ * This function processes the last block in a SHA-1 hashing process.
+ * The block should have a maximum length of a single input block.
+ * \param state pointer to the state variable to update and finalize
+ * \param block pointer to themessage block to process
+ * \param length_b length of the message block in bits  
+ */
+void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
+
+/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
+ * \brief convert a state variable into an actual hash value
+ * Writes the hash value corresponding to the state to the memory pointed by dest.
+ * \param dest pointer to the hash value destination
+ * \param state pointer to the hash context
+ */ 
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+
+/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
+ * \brief hashing a message which in located entirely in RAM
+ * This function automatically hashes a message which is entirely in RAM with
+ * the SHA-1 hashing algorithm.
+ * \param dest pointer to the hash value destination
+ * \param msg  pointer to the message which should be hashed
+ * \param length_b length of the message in bits
+ */ 
+void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
+
+
+
+#endif /*SHA1_H_*/
diff --git a/dsa/sha256-asm.S b/dsa/sha256-asm.S
new file mode 100644
index 0000000..e9729a1
--- /dev/null
+++ b/dsa/sha256-asm.S
@@ -0,0 +1,1040 @@
+/* sha256-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+	precall
+	hexdump \length
+	postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha256_ctx structure
+;	given in r23,r22
+sha256_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 8
+	sbiw r26, 4
+1:
+	ldi r20, 4
+	adiw r26, 8
+2:
+		ld r0, -X
+		st Z+, r0
+	dec r20
+	brne 2b
+
+	dec r21
+	brne 1b
+
+	ret
+
+;###########################################################
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r30, SPL
+	in r31, SPH
+	sbiw r30, 8*4+8
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SREG, r0
+	out SPH, r31
+
+	push r25
+	push r24
+	adiw r30, 1
+	movw r16, r30
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+
+	movw r12, r22	/* backup pf msg-ptr */
+
+	movw r24, r16
+	rcall sha256_init
+	/* if length > 0xffff */
+1:
+	tst r11
+	brne 2f
+	tst r10
+	breq 4f
+2:
+	movw r24, r16
+	movw r22, r12
+	rcall sha256_nextBlock
+	ldi r19, 64
+	add r12, r19
+	adc r13, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha256_lastBlock
+
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha256_ctx2hash
+
+sha256_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 8*4+8
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SREG, r0
+	out SPH, r31
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+	cpi r21, 0x02
+	brlo sha256_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 0x02
+	ldi r19, 64
+	add r22, r19
+	adc r23, r1
+	rjmp sha256_lastBlock
+sha256_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r0, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64)
+	cli
+	out SPL, r30
+	out SREG,r0
+	out SPH, r31
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+
+
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha256_lastBlock_post_copy
+	mov r1, r18
+sha256_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:
+sha256_lastBlock_insert_stuffing_bit:
+	ldi r19, 0x80
+	mov r0,r19
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f
+	rjmp sha256_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:
+	st Z+, r1
+	dec r19
+	brne 1b
+2:
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*8+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha256_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 8*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r0, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SREG,r0
+	out SPH, r31
+	clr r1
+	ret
+
+/**/
+;###########################################################
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1	= 4
+T2	= 5
+T3	= 6
+T4	= 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+	push r4 /* replace push & pop by mem ops? */
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha256_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SREG, r0
+	out SPH, r21
+	push r18
+	push r19
+	push r24
+	push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ 	adiw r26, 1 ; X++
+ 	ldi r20, 16
+sha256_nextBlock_wcpyloop:
+ 	ld r23, Z+
+ 	ld r22, Z+
+ 	ld r19, Z+
+ 	ld r18, Z+
+ 	st X+, r18
+ 	st X+, r19
+ 	st X+, r22
+	st X+, r23
+	dec r20
+	brne sha256_nextBlock_wcpyloop
+/*	for (i=16; i<64; ++i){
+		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
+	} */
+	/* r25,r24,r23,r24 (r21,r20) are function values
+	   r19,r18,r17,r16 are the accumulator
+	   r15,r14,r13,rBck1 are backup1
+	   r11,r10,r9 ,r8  are xor accu
+	   r1 is round counter 								*/
+
+	ldi r20, 64-16
+	mov LoopC, r20
+sha256_nextBlock_wcalcloop:
+	movw r30, r26 ; cp X to Z
+	sbiw r30, 63
+	sbiw r30, 1 		; substract 64 = 16*4
+	ld Accu1, Z+
+	ld Accu2, Z+
+	ld Accu3, Z+
+	ld Accu4, Z+ /* w[i] = w[i-16] */
+	ld Bck1, Z+
+	ld Bck2, Z+
+	ld Bck3, Z+
+	ld Bck4, Z+ /* backup = w[i-15] */
+	/* now sigma 0 */
+	mov Func1, Bck2
+	mov Func2, Bck3
+	mov Func3, Bck4
+	mov Func4, Bck1  /* prerotated by 8 */
+	ldi r20, 1
+	rcall bitrotl
+	movw XAccu1, Func1
+	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	ror Bck1
+	dec Func2
+	brne sigma0_shr
+	eor XAccu1, Bck1
+	eor XAccu2, Bck2
+	eor XAccu3, Bck3
+	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	ldd Func1, Z+7*4  /* now accu += w[i-7] */
+	ldd Func2, Z+7*4+1
+	ldd Func3, Z+7*4+2
+	ldd Func4, Z+7*4+3
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+	ldd Bck2, Z+12*4+1
+	ldd Bck3, Z+12*4+2
+	ldd Bck4, Z+12*4+3
+	/* now sigma 1 */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 1
+	rcall bitrotr
+	movw XAccu3, Func3
+	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
+;	movw Func1, Bck3
+;	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	dec Func2
+	brne sigma1_shr
+	eor XAccu1, Bck2
+	eor XAccu2, Bck3
+	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	/* now let's store the shit */
+	st X+, Accu1
+	st X+, Accu2
+	st X+, Accu3
+	st X+, Accu4
+	dec LoopC
+	breq 3f  ; skip if zero
+	rjmp sha256_nextBlock_wcalcloop
+3:
+	/* we are finished with w array X points one byte post w */
+/* init a array */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:
+	ld r1, Z+
+	st X+, r1
+	dec r25
+	brne init_a_array
+
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
+			a[4] += t1;
+			a[0] = t1 + t2;
+		} */
+	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+	sbiw r26, 8*4  /* X still points at a[7]+1*/
+	movw r28, r26
+	ldi r30, lo8(sha256_kv)
+	ldi r31, hi8(sha256_kv)
+	dec r27  /* X - (64*4 == 256) */
+	ldi r25, 64
+	mov LoopC, r25
+sha256_main_loop:
+	/* now calculate t1 */
+	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
+	ldd T1, Y+5*4
+	ldd T2, Y+5*4+1
+	ldd T3, Y+5*4+2
+	ldd T4, Y+5*4+3 /* y in T */
+	ldd Func1, Y+4*4
+	ldd Func2, Y+4*4+1
+	ldd Func3, Y+4*4+2
+	ldd Func4, Y+4*4+3  /* x in Func */
+	ldd Bck1, Y+6*4
+	ldd Bck2, Y+6*4+1
+	ldd Bck3, Y+6*4+2
+	ldd Bck4, Y+6*4+3 /* z in Bck */
+	and T1, Func1
+	and T2, Func2
+	and T3, Func3
+	and T4, Func4
+	com Func1
+	com Func2
+	com Func3
+	com Func4
+	and Bck1, Func1
+	and Bck2, Func2
+	and Bck3, Func3
+	and Bck4, Func4
+	eor T1, Bck1
+	eor T2, Bck2
+	eor T3, Bck3
+	eor T4, Bck4 /* done, CH(x,y,z) is in T */
+	/* now SIGMA1(a[4]) */
+	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
+	ldd Bck1, Y+4*4+1
+	ldd Bck2, Y+4*4+2
+	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotl		/* rotr(x,6) */
+	movw XAccu1, Func1
+	movw XAccu3, Func3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 3
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+	ldi r20, 1
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4
+	/* now we've to add a[7], w[i] and k[i] */
+	ldd XAccu1, Y+4*7
+	ldd XAccu2, Y+4*7+1
+	ldd XAccu3, Y+4*7+2
+	ldd XAccu4, Y+4*7+3
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add a[7] */
+	ld XAccu1, X+
+	ld XAccu2, X+
+	ld XAccu3, X+
+	ld XAccu4, X+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add w[i] */
+	lpm XAccu1, Z+
+	lpm XAccu2, Z+
+	lpm XAccu3, Z+
+	lpm XAccu4, Z+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+		/* starting with MAJ(x,y,z) */
+	ldd Func1, Y+4*0+0
+	ldd Func2, Y+4*0+1
+	ldd Func3, Y+4*0+2
+	ldd Func4, Y+4*0+3 /* load x=a[0] */
+	ldd XAccu1, Y+4*1+0
+	ldd XAccu2, Y+4*1+1
+	ldd XAccu3, Y+4*1+2
+	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+	and XAccu1, Func1
+	and XAccu2, Func2
+	and XAccu3, Func3
+	and XAccu4, Func4	/* XAccu == (x & y) */
+	ldd Bck1, Y+4*2+0
+	ldd Bck2, Y+4*2+1
+	ldd Bck3, Y+4*2+2
+	ldd Bck4, Y+4*2+3 /* load z=a[2] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
+	ldd Func1, Y+4*1+0
+	ldd Func2, Y+4*1+1
+	ldd Func3, Y+4*1+2
+	ldd Func4, Y+4*1+3 /* load y=a[1] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+   	/* SIGMA0(a[0]) */
+	ldd Bck1, Y+4*0+0 /* we should combine this with above */
+	ldd Bck2, Y+4*0+1
+	ldd Bck3, Y+4*0+2
+	ldd Bck4, Y+4*0+3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotr
+	movw Accu1, Func1
+	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotate by 16 bits */
+	ldi r20, 3
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+	mov Func1, Bck4
+	mov Func2, Bck1
+	mov Func3, Bck2
+	mov Func4, Bck3  /* prerotate by 24 bits */
+	ldi r20, 2
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+	add Accu1, XAccu1 /* add previous result (MAJ)*/
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4
+	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+	ldi r21, 7*4
+	adiw r28, 7*4
+a_shift_loop:
+	ld  r25, -Y /* warning: this is PREdecrement */
+	std Y+4, r25
+	dec r21
+	brne a_shift_loop
+
+	ldd Bck1, Y+4*4+0
+	ldd Bck2, Y+4*4+1
+	ldd Bck3, Y+4*4+2
+	ldd Bck4, Y+4*4+3
+	add Bck1, T1
+	adc Bck2, T2
+	adc Bck3, T3
+	adc Bck4, T4
+	std Y+4*4+0, Bck1
+	std Y+4*4+1, Bck2
+	std Y+4*4+2, Bck3
+	std Y+4*4+3, Bck4
+	add Accu1, T1
+	adc Accu2, T2
+	adc Accu3, T3
+	adc Accu4, T4
+	std Y+4*0+0, Accu1
+	std Y+4*0+1, Accu2
+	std Y+4*0+2, Accu3
+	std Y+4*0+3, Accu4 /* a array updated */
+
+
+	dec LoopC
+	breq update_state
+	rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:
+	/* update state */
+	/* pointers to state should still exist on the stack ;-) */
+	pop r31
+	pop r30
+	ldi r21, 8
+update_state_loop:
+	ldd Accu1, Z+0
+	ldd Accu2, Z+1
+	ldd Accu3, Z+2
+	ldd Accu4, Z+3
+	ld Func1, Y+
+	ld Func2, Y+
+	ld Func3, Y+
+	ld Func4, Y+
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	st Z+, Accu1
+	st Z+, Accu2
+	st Z+, Accu3
+	st Z+, Accu4
+	dec r21
+	brne update_state_loop
+	/* now we just have to update the length */
+	adiw r30, 1 /* since we add 512, we can simply skip the LSB */
+	ldi r21, 2
+	ldi r22, 6
+	ld r20, Z
+	add r20, r21
+	st Z+, r20
+	clr r21
+sha256_nextBlock_fix_length:
+	brcc sha256_nextBlock_epilog
+	ld r20, Z
+	adc r20, r21
+	st Z+, r20
+	dec r22
+	brne sha256_nextBlock_fix_length
+
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SREG, r0
+	out SPH, r21
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4
+	ret
+
+sha256_kv: ; round-key-vector stored in ProgMem
+.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+
+;###########################################################
+
+.global sha256_init
+;uint32_t sha256_init_vector[]={
+;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+;	state->length=0;
+;	memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha256_init_vector))
+	ldi r31, hi8((sha256_init_vector))
+	ldi r22, 32+8
+sha256_init_vloop:
+	lpm r23, Z+
+	st X+, r23
+	dec r22
+	brne sha256_init_vloop
+	ret
+
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67
+.word 0xF372, 0x3C6E
+.word 0xF53A, 0xA54F
+.word 0x527F, 0x510E
+.word 0x688C, 0x9B05
+.word 0xD9AB, 0x1F83
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,r22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	clr r21
+	clc
+bitrotl_loop:
+	tst r20
+	breq fixrotl
+2:
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	rol r21
+	dec r20
+	brne 2b
+fixrotl:
+	or r22, r21
+	ret
+
+
+;###########################################################
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotr32:
+	cpi r20, 8
+	brlo bitrotr
+	mov r21, r22
+	mov r22, r23
+	mov r23, r24
+	mov r24, r25
+	mov r25, r21
+	subi r20, 8
+	rjmp rotr32
+bitrotr:
+	clr r21
+	clc
+bitrotr_loop:
+	tst r20
+	breq fixrotr
+2:
+	ror r25
+	ror r24
+	ror r23
+	ror r22
+	ror r21
+	dec r20
+	brne 2b
+fixrotr:
+	or r25, r21
+	ret
+
+
+;###########################################################
+
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+;  param1: the 32-bit word
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  modifys: r21, r22
+change_endian32:
+	movw r20,  r22 ; (r22,r23) --> (r20,r21)
+	mov r22, r25
+	mov r23, r24
+	mov r24, r21
+	mov r25, r20
+	ret
+
diff --git a/dsa/sha256.h b/dsa/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/dsa/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha256.h
+ * \author  Daniel Otte 
+ * \date    2006-05-16
+ * \license	GPLv3 or later
+ * 
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include <stdint.h>
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS  256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ * 
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+	uint32_t h[8];
+	uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ * 
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ * 
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ * 
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block 
+ * 
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ * 
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/hfal-performance.c b/hfal-performance.c
index 9bfaa85..47fccf8 100644
--- a/hfal-performance.c
+++ b/hfal-performance.c
@@ -27,6 +27,7 @@
 
 #include "hfal-performance.h"
 #include "hashfunction_descriptor.h"
+#include "stack_measuring.h"
 #include "cli.h"
 #include "performance_test.h"
 #include <stdint.h>
@@ -34,6 +35,8 @@
 #include <string.h>
 #include <avr/pgmspace.h>
 
+#define PATTERN_A 0xAA
+#define PATTERN_B 0x55
 
 static
 void printvalue(unsigned long v){
@@ -128,6 +131,87 @@ void hfal_performance(const hfdesc_t* hd){
 	}
 }
 
+void hfal_stacksize(const hfdesc_t* hd){
+	hfdesc_t hf;
+	stack_measuring_ctx_t smctx;
+	memcpy_P(&hf, hd, sizeof(hfdesc_t));
+	uint8_t ctx[hf.ctxsize_B];
+	uint8_t data[(hf.blocksize_b+7)/8];
+	uint8_t digest[(hf.hashsize_b+7)/8];
+	uint16_t t1, t2;
+	uint8_t i;
+
+	if(hf.type!=HFDESC_TYPE_HASHFUNCTION)
+		return;
+	cli_putstr_P(PSTR("\r\n\r\n === "));
+	cli_putstr_P(hf.name);
+	cli_putstr_P(PSTR(" stack-usage === "
+	                  "\r\n    type:             hashfunction"
+	                  "\r\n    hashsize (bits):    "));
+	printvalue(hf.hashsize_b);
+
+	cli_putstr_P(PSTR("\r\n    ctxsize (bytes):    "));
+	printvalue(hf.ctxsize_B);
+
+	cli_putstr_P(PSTR("\r\n    blocksize (bits):   "));
+	printvalue(hf.blocksize_b);
+
+	cli();
+	stack_measure_init(&smctx, PATTERN_A);
+	hf.init(&ctx);
+	t1 = stack_measure_final(&smctx);
+	stack_measure_init(&smctx, PATTERN_B);
+	hf.init(&ctx);
+	t1 = stack_measure_final(&smctx);
+	sei();
+
+	t1 = (t1>t2)?t1:t2;
+	cli_putstr_P(PSTR("\r\n    init (bytes):       "));
+	printvalue((unsigned long)t1);
+
+	cli();
+	stack_measure_init(&smctx, PATTERN_A);
+	hf.nextBlock(&ctx, data);
+	t1 = stack_measure_final(&smctx);
+	stack_measure_init(&smctx, PATTERN_B);
+	hf.nextBlock(&ctx, data);
+	t1 = stack_measure_final(&smctx);
+	sei();
+
+	t1 = (t1>t2)?t1:t2;
+	cli_putstr_P(PSTR("\r\n    nextBlock (bytes):  "));
+	printvalue((unsigned long)t1);
+
+	cli();
+	stack_measure_init(&smctx, PATTERN_A);
+	hf.lastBlock(&ctx, data, 0);
+	t1 = stack_measure_final(&smctx);
+	stack_measure_init(&smctx, PATTERN_B);
+	hf.lastBlock(&ctx, data, 0);
+	t1 = stack_measure_final(&smctx);
+	sei();
+
+	t1 = (t1>t2)?t1:t2;
+	cli_putstr_P(PSTR("\r\n    lastBlock (bytes):  "));
+	printvalue((unsigned long)t1);
+
+	cli();
+	stack_measure_init(&smctx, PATTERN_A);
+	hf.ctx2hash(digest, &ctx);
+	t1 = stack_measure_final(&smctx);
+	stack_measure_init(&smctx, PATTERN_B);
+	hf.ctx2hash(digest, &ctx);
+	t1 = stack_measure_final(&smctx);
+	sei();
+
+	t1 = (t1>t2)?t1:t2;
+	cli_putstr_P(PSTR("\r\n    ctx2hash (bytes):   "));
+	printvalue((unsigned long)t1);
+
+	if(hf.free){
+		hf.free(&ctx);
+	}
+}
 
 void hfal_performance_multiple(const hfdesc_t** hd_list){
 	const hfdesc_t* hd;
@@ -138,6 +222,7 @@ void hfal_performance_multiple(const hfdesc_t** hd_list){
 			return;
 		}
 		hfal_performance(hd);
+		hfal_stacksize(hd);
 		hd_list = (void*)((uint8_t*)hd_list + 2);
 	}
 }
diff --git a/host/get_performance.rb b/host/get_performance.rb
index 43096f2..b78ba99 100644
--- a/host/get_performance.rb
+++ b/host/get_performance.rb
@@ -79,27 +79,26 @@ end
 
 def readPerformanceVector(param)
   lb=""
-  buffer=""
   fname=""
   fout=0
   begin
     lb = read_line()
     if lb.match(/End of performance figures/)
       return false
-	end
-	if m=lb.match(/=== (.*) performance ===/) 
-	  fout.close if fout!=0
-	  fname=$dir+m[1]
-	  fname+="."+param if param != ""
-	  fname+=".txt"
-	  fout = File.open(fname, "w+")
-	  printf("> %s \n", fname)	
-	  fout.write(lb)
-    else
-	  if fout!=0 && lb!=""
-	    fout.write(lb)
-	  end	
-	end
+	  end
+  	if m=lb.match(/=== (.*) performance ===/) 
+  	  fout.close if fout!=0
+  	  fname=$dir+m[1]
+  	  fname+="."+param if param != ""
+  	  fname+=".txt"
+  	  fout = File.open(fname, "w+")
+  	  printf("> %s \n", fname)	
+  	  fout.write(lb)
+      else
+  	  if fout!=0 && lb!=""
+  	    fout.write(lb)
+  	  end	
+  	end
   end while true
 end
 
diff --git a/host/performance2wiki.rb b/host/performance2wiki.rb
index a60cde1..cfdc3ce 100644
--- a/host/performance2wiki.rb
+++ b/host/performance2wiki.rb
@@ -56,10 +56,26 @@ def process_hashfunction(fin, name)
   lb = fin.readline()
   m = lb.match(/ctx2hash \(cycles\):[\s]*([\d]*)/)
   convtime = m[1].to_i()
+  begin
+    lb = fin.readline()
+  end until m = lb.match(/init \(bytes\):[\s]*([\d]*)/)
+  initstack = m[1].to_i()
+  lb = fin.readline()
+  m = lb.match(/nextBlock \(bytes\):[\s]*([\d]*)/)
+  nextblockstack = m[1].to_i()
+  lb = fin.readline()
+  m = lb.match(/lastBlock \(bytes\):[\s]*([\d]*)/)
+  lastblockstack = m[1].to_i()
+  lb = fin.readline()
+  m = lb.match(/ctx2hash \(bytes\):[\s]*([\d]*)/)
+  convstack = m[1].to_i()
+  s1 = (initstack>nextblockstack)?initstack:nextblockstack
+  s2 = (lastblockstack>convstack)?lastblockstack:convstack
+  stack = (s1>s2)?s1:s2
   
-  printf("| %20s || %3s || %3s || || %4d || || %4d || %4d ||" +
+  printf("| %20s || %3s || %3s || || %4d || %4d || %4d || %4d ||" +
          " %6d || %6d || %7.2f || %6d || || || \n|-\n" , 
-        name, $lang, $lang ,ctxsize, hashsize, blocksize, 
+        name, $lang, $lang ,ctxsize, stack, hashsize, blocksize, 
 	    inittime, nextblocktime, nextblocktime.to_f/(blocksize/8),
 		lastblocktime+convtime)
 end
diff --git a/mkfiles/001_hfal_std.mk b/mkfiles/001_hfal_std.mk
index 0404803..eed080c 100644
--- a/mkfiles/001_hfal_std.mk
+++ b/mkfiles/001_hfal_std.mk
@@ -1,2 +1,3 @@
 HFAL_STD = nessie_common.o nessie_hash_test.o performance_test.o \
-           hfal-basic.o hfal-performance.o hfal-nessie.o hfal-test.o shavs.o
+           hfal-basic.o hfal-performance.o hfal-nessie.o hfal-test.o shavs.o \
+           stack_measuring.o
diff --git a/mkfiles/bigint.mk b/mkfiles/bigint.mk
index de7100e..7e3a4b7 100644
--- a/mkfiles/bigint.mk
+++ b/mkfiles/bigint.mk
@@ -1,8 +1,8 @@
 # Makefile for BigInt
 ALGO_NAME := BIGINT
 
-# comment out the following line for removement of base64 from the build process
-ENCODINGS += $(ALGO_NAME)
+# comment out the following line for removement of BigInt from the build process
+AUX += $(ALGO_NAME)
 
 $(ALGO_NAME)_DIR      := bigint/
 $(ALGO_NAME)_OBJ      := bigint.o bigint_io.o bigint_add_u.o 
diff --git a/mkfiles/dsa.mk b/mkfiles/dsa.mk
new file mode 100644
index 0000000..b3b64c1
--- /dev/null
+++ b/mkfiles/dsa.mk
@@ -0,0 +1,13 @@
+# Makefile for DSA
+ALGO_NAME := DSA
+
+# comment out the following line for removement of DSA from the build process
+SIGNATURE += $(ALGO_NAME)
+
+$(ALGO_NAME)_DIR      := dsa/
+$(ALGO_NAME)_OBJ      := bigint.o bigint_io.o bigint_add_u.o sha1-asm.o dsa_sign.o dsa_verify.o dsa_key_blob.o base64_enc.o
+$(ALGO_NAME)_TEST_BIN := main-dsa-test.o $(CLI_STD) hfal_sha1.o $(HFAL_STD) \
+                         noekeon_asm.o noekeon_prng.o memxor.o 
+			 
+$(ALGO_NAME)_PERFORMANCE_TEST := performance
+
diff --git a/mkfiles/shabal.mk b/mkfiles/shabal.mk
index e2600d4..853b88b 100644
--- a/mkfiles/shabal.mk
+++ b/mkfiles/shabal.mk
@@ -1,7 +1,7 @@
-# Makefile for Skein
+# Makefile for Shabal
 ALGO_NAME := SHABAL
 
-# comment out the following line for removement of Skein from the build process
+# comment out the following line for removement of Shabal from the build process
 HASHES += $(ALGO_NAME)
 
 $(ALGO_NAME)_DIR      := shabal/
diff --git a/mkfiles/shabal_c.mk b/mkfiles/shabal_c.mk
index 5e8082c..d55c033 100644
--- a/mkfiles/shabal_c.mk
+++ b/mkfiles/shabal_c.mk
@@ -1,7 +1,7 @@
-# Makefile for Skein
+# Makefile for Shabal
 ALGO_NAME := SHABAL_C
 
-# comment out the following line for removement of Skein from the build process
+# comment out the following line for removement of Shabal from the build process
 HASHES += $(ALGO_NAME)
 
 $(ALGO_NAME)_DIR      := shabal/
diff --git a/stack_measuring.S b/stack_measuring.S
new file mode 100644
index 0000000..a70f32d
--- /dev/null
+++ b/stack_measuring.S
@@ -0,0 +1,96 @@
+/* stack_measuring.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <avr/io.h>
+
+.extern __brkval
+
+.global stack_measure_init
+stack_measure_init:
+	movw r30, r24
+	lds r20, __brkval
+	lds r21, __brkval+1
+	in r0, _SFR_IO_ADDR(SREG)
+	cli
+	in r26, _SFR_IO_ADDR(SPL)
+	out _SFR_IO_ADDR(SREG), r0
+	in r27, _SFR_IO_ADDR(SPH)
+	st Z+, r20
+	st Z+, r21
+	st Z+, r26
+	st Z+, r27
+	st Z, r22
+	movw r24, r26
+	sub r24, r20
+	sbc r25, r21
+	sbiw r24, 1
+	st X, r22
+1:  st -X, r22
+	sbiw r24, 1
+	brne 1b
+	ret
+
+.global stack_measure_final
+stack_measure_final:
+	movw r30, r24
+	lds r20, __brkval
+	lds r21, __brkval+1
+	ld r26, Z+
+	ld r27, Z+
+	cp r20, r26
+	cpc r21, r27
+	brlo 1f
+	movw r26, r20
+1:
+	adiw r26, 1
+	ld r24, Z+
+	ld r25, Z+
+	ld r22, Z
+	sub r24, r26
+	sbc r25, r27
+	adiw r24, 2
+1:
+	sbiw r24, 1
+	breq 2f
+	ld r20, X+
+	cpse r20, r22
+2:  ret
+	rjmp 1b
+
+/* for testing only
+.global stack_measure_usestack
+stack_measure_usestack:
+	adiw r24, 0
+	brne 2f
+1:	ret
+2:
+	in r0, _SFR_IO_ADDR(SREG)
+	cli
+	in r26, _SFR_IO_ADDR(SPL)
+	out _SFR_IO_ADDR(SREG), r0
+	in r27, _SFR_IO_ADDR(SPH)
+    st X, r22
+	sbiw r24, 1
+	breq 1b
+3:  st -X, r22
+	sbiw r24, 1
+	breq 1b
+	rjmp 3b
+
+	*/
diff --git a/stack_measuring.h b/stack_measuring.h
new file mode 100644
index 0000000..23ca63e
--- /dev/null
+++ b/stack_measuring.h
@@ -0,0 +1,36 @@
+/* stack_measuring.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef STACK_MEASURING_H_
+#define STACK_MEASURING_H_
+
+#include <stdint.h>
+
+typedef struct {
+	uint16_t heap_top;
+	uint16_t stack_bottom;
+	uint8_t pattern;
+} stack_measuring_ctx_t;
+
+void stack_measure_init(stack_measuring_ctx_t* ctx, uint8_t pattern);
+uint16_t stack_measure_final(const stack_measuring_ctx_t* ctx);
+/* for testing only
+ void stack_measure_usestack(uint16_t size, uint8_t value); */
+
+#endif /* STACK_MEASURING_H_ */
diff --git a/test_src/main-bigint-test.c b/test_src/main-bigint-test.c
index 5b65d9e..8fd4136 100644
--- a/test_src/main-bigint-test.c
+++ b/test_src/main-bigint-test.c
@@ -1,7 +1,7 @@
-/* main-base64-test.c */
+/* main-bigint-test.c */
 /*
     This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008, 2009  Daniel Otte (daniel.otte@rub.de)
+    Copyright (C) 2008, 2009, 2010  Daniel Otte (daniel.otte@rub.de)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -17,7 +17,7 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 /*
- * base64 test-suit
+ * bigint test-suit
  * 
 */
 
diff --git a/test_src/main-bmw-test.c b/test_src/main-bmw-test.c
index 1aadbf1..2da30a0 100644
--- a/test_src/main-bmw-test.c
+++ b/test_src/main-bmw-test.c
@@ -157,19 +157,19 @@ const char shavs_test1_str[] PROGMEM = "shavs_test1";
 const char shavs_test3_str[] PROGMEM = "shavs_test3";
 
 cmdlist_entry_t cmdlist[] PROGMEM = {
-	{ nessie_str,          NULL, testrun_nessie_bmw},
-	{ test_str,            NULL, testrun_stdtest_bmw},
-	{ testshort_str,       NULL, testshort},
-	{ testlshort_str,      NULL, testlshort},
-	{ test506_str,         NULL, test506},
-	{ test507_str,         NULL, test507},
-	{ performance_str,     NULL, performance_bmw},
-	{ shavs_list_str,      NULL, shavs_listalgos},
-	{ shavs_set_str,   (void*)1, (void_fpt)shavs_setalgo},
-	{ shavs_test1_str,     NULL, shavs_test1},
-	{ shavs_test3_str,     NULL, shavs_test3},
-	{ echo_str,        (void*)1, (void_fpt)echo_ctrl},
-	{ NULL,                NULL, NULL}
+	{ nessie_str,                  NULL, testrun_nessie_bmw            },
+	{ test_str,                    NULL, testrun_stdtest_bmw           },
+	{ testshort_str,               NULL, testshort                     },
+	{ testlshort_str,              NULL, testlshort                    },
+	{ test506_str,                 NULL, test506                       },
+	{ test507_str,                 NULL, test507                       },
+	{ performance_str,             NULL, performance_bmw               },
+	{ shavs_list_str,              NULL, shavs_listalgos               },
+	{ shavs_set_str,           (void*)1, (void_fpt)shavs_setalgo       },
+	{ shavs_test1_str,             NULL, shavs_test1                   },
+	{ shavs_test3_str,             NULL, shavs_test3                   },
+	{ echo_str,                (void*)1, (void_fpt)echo_ctrl           },
+	{ NULL,                        NULL, NULL                          }
 };
 
 int main (void){
diff --git a/test_src/main-dsa-test.c b/test_src/main-dsa-test.c
new file mode 100644
index 0000000..acbffa9
--- /dev/null
+++ b/test_src/main-dsa-test.c
@@ -0,0 +1,191 @@
+/* main-dsa-test.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * DSA test-suit
+ *
+*/
+
+#include "config.h"
+
+#include "uart_i.h"
+#include "debug.h"
+
+#include "noekeon.h"
+#include "noekeon_prng.h"
+#include "bigint.h"
+#include "bigint_io.h"
+#include "dsa.h"
+#include "dsa_key_blob.h"
+
+#include "cli.h"
+#include "performance_test.h"
+#include "hfal_sha1.h"
+#include "base64_enc.h"
+#include "base64_dec.h"
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+char* algo_name = "DSA";
+
+/*****************************************************************************
+ *  additional validation-functions											 *
+ *****************************************************************************/
+
+dsa_ctx_t dsa_ctx;
+
+void load_fix_dsa(void){
+	load_dsa_key_blob(&dsa_ctx);
+}
+
+void dsa_print_item(bigint_t* a, PGM_P pstr){
+	uint8_t *p;
+	cli_putstr_P(PSTR("\r\n"));
+	cli_putstr_P(pstr);
+	cli_putstr_P(PSTR(": "));
+	uint16_t i;
+	p = a->wordv + a->length_B -1;
+	for(i=0; i<a->length_B-1; ++i){
+		if(i%16==0){
+			cli_putstr_P(PSTR("\r\n    "));
+		}
+		cli_hexdump(p, 1);
+		cli_putc(':');
+		--p;
+	}
+	if(i%16==0){
+		cli_putstr_P(PSTR("\r\n    "));
+	}
+	cli_hexdump(p, 1);
+}
+
+void dsa_print_signature_b64(dsa_signature_t* s){
+	uint16_t size_r, size_s, size_o, i,j;
+	size_r = s->r.length_B +2;
+	size_s = s->s.length_B +2;
+	size_o = size_r + size_s +2;
+	uint8_t bin_b[size_o];
+	bin_b[0] = 0x30;
+	bin_b[1] = size_o -2;
+	bin_b[2] = 0x02;
+	bin_b[3] = size_r-2;
+	j=4;
+	for(i=s->r.length_B; i>0;  --i){
+		bin_b[j++] = s->r.wordv[i-1];
+	}
+	bin_b[j++] = 0x02;
+	bin_b[j++] = size_s -2;
+	for(i=s->s.length_B; i>0;  --i){
+		bin_b[j++] = s->s.wordv[i-1];
+	}
+	char b64_b[size_o*4/3+5];
+	base64enc(b64_b, bin_b, size_o);
+	cli_putstr(b64_b);
+}
+
+void dsa_print_ctx(dsa_ctx_t* ctx){
+	dsa_print_item(&(ctx->priv), PSTR("private"));
+	dsa_print_item(&(ctx->pub), PSTR("public"));
+	dsa_print_item(&(ctx->domain.p), PSTR("P"));
+	dsa_print_item(&(ctx->domain.q), PSTR("Q"));
+	dsa_print_item(&(ctx->domain.g), PSTR("G"));
+}
+
+void dsa_print_signature(const dsa_signature_t* sig){
+	cli_putstr_P(PSTR("\r\nDSA-Signature:\r\n r:"));
+	bigint_print_hex(&(sig->r));
+	cli_putstr_P(PSTR("\r\n s:"));
+	bigint_print_hex(&(sig->s));
+}
+
+void quick_test(void){
+	dsa_signature_t dsa_sig;
+	uint8_t i, t=0, message[] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef};
+	load_fix_dsa();
+	uint8_t dsa_sig_s_b[dsa_ctx.domain.q.length_B],
+	        dsa_sig_r_b[dsa_ctx.domain.q.length_B];
+	dsa_print_ctx(&dsa_ctx);
+	dsa_sig.r.wordv = dsa_sig_r_b;
+	dsa_sig.s.wordv = dsa_sig_s_b;
+	cli_putstr_P(PSTR("\r\n\r\n=== DSA QUICK TEST ==="));
+	for(i=0; i<8; ++i){
+		cli_putstr_P(PSTR("\r\n"));
+		cli_putc('1'+i);
+		cli_putstr_P(PSTR(": message: "));
+		if (i){
+			cli_hexdump(message, i);
+		}else{
+			cli_putstr_P(PSTR("<empty>"));
+		}
+		cli_putstr_P(PSTR("\r\n computing signature ... "));
+		dsa_sign_message(&dsa_sig, message, i*8, &sha1_desc, &dsa_ctx, random8);
+		dsa_print_signature(&dsa_sig);
+		cli_putstr_P(PSTR("\r\n base64:\r\n--- SIGNATURE ---\r\n "));
+		dsa_print_signature_b64(&dsa_sig);
+		cli_putstr_P(PSTR("\r\n verifying signature ... "));
+		t = dsa_verify_message(&dsa_sig, message, i*8, &sha1_desc, &dsa_ctx);
+		cli_putstr_P(PSTR("\r\n verification: "));
+		if(t==DSA_SIGNATURE_OK){
+			cli_putstr_P(PSTR("[PASS]"));
+		}else{
+			cli_putstr_P(PSTR("[FAIL]"));
+		}
+	}
+}
+
+void reset_prng(void){
+	uint8_t buf[16];
+	memset(buf, 0, 16);
+	random_seed(buf);
+	cli_putstr_P(PSTR("\r\nPRNG reset"));
+}
+
+void testrun_performance_bigint(void){
+
+}
+/*****************************************************************************
+ *  main																	 *
+ *****************************************************************************/
+
+const char echo_test_str[]        PROGMEM = "echo-test";
+const char reset_prng_str[]       PROGMEM = "reset-prng";
+const char quick_test_str[]       PROGMEM = "quick-test";
+const char performance_str[]      PROGMEM = "performance";
+const char echo_str[]             PROGMEM = "echo";
+
+cmdlist_entry_t cmdlist[] PROGMEM = {
+	{ reset_prng_str,       NULL, reset_prng                    },
+	{ quick_test_str,       NULL, quick_test                    },
+	{ performance_str,      NULL, testrun_performance_bigint    },
+	{ echo_str,         (void*)1, (void_fpt)echo_ctrl           },
+	{ NULL,                 NULL, NULL                          }
+};
+
+int main (void){
+	DEBUG_INIT();
+
+	cli_rx = (cli_rx_fpt)uart0_getc;
+	cli_tx = (cli_tx_fpt)uart0_putc;
+	for(;;){
+		cli_putstr_P(PSTR("\r\n\r\nCrypto-VS ("));
+		cli_putstr(algo_name);
+		cli_putstr_P(PSTR(")\r\nloaded and running\r\n"));
+		cmd_interface(cmdlist);
+	}
+}