diff --git a/arcfour-asm.S b/arcfour/arcfour-asm.S
similarity index 100%
rename from arcfour-asm.S
rename to arcfour/arcfour-asm.S
diff --git a/arcfour.c b/arcfour/arcfour.c
similarity index 100%
rename from arcfour.c
rename to arcfour/arcfour.c
diff --git a/arcfour.h b/arcfour/arcfour.h
similarity index 100%
rename from arcfour.h
rename to arcfour/arcfour.h
diff --git a/base64_dec.c b/base64/base64_dec.c
similarity index 100%
rename from base64_dec.c
rename to base64/base64_dec.c
diff --git a/base64_dec.h b/base64/base64_dec.h
similarity index 100%
rename from base64_dec.h
rename to base64/base64_dec.h
diff --git a/base64_enc.c b/base64/base64_enc.c
similarity index 100%
rename from base64_enc.c
rename to base64/base64_enc.c
diff --git a/base64_enc.h b/base64/base64_enc.h
similarity index 100%
rename from base64_enc.h
rename to base64/base64_enc.h
diff --git a/bmw_large.c b/bmw/bmw_large.c
similarity index 100%
rename from bmw_large.c
rename to bmw/bmw_large.c
diff --git a/bmw_large.h b/bmw/bmw_large.h
similarity index 100%
rename from bmw_large.h
rename to bmw/bmw_large.h
diff --git a/bmw_small.c b/bmw/bmw_small.c
similarity index 100%
rename from bmw_small.c
rename to bmw/bmw_small.c
diff --git a/bmw_small.h b/bmw/bmw_small.h
similarity index 100%
rename from bmw_small.h
rename to bmw/bmw_small.h
diff --git a/cast5-sbox.h b/cast5/cast5-sbox.h
similarity index 100%
rename from cast5-sbox.h
rename to cast5/cast5-sbox.h
diff --git a/cast5.c b/cast5/cast5.c
similarity index 100%
rename from cast5.c
rename to cast5/cast5.c
diff --git a/cast5.h b/cast5/cast5.h
similarity index 100%
rename from cast5.h
rename to cast5/cast5.h
diff --git a/cast6.c b/cast6/cast6.c
similarity index 100%
rename from cast6.c
rename to cast6/cast6.c
diff --git a/cast6.h b/cast6/cast6.h
similarity index 100%
rename from cast6.h
rename to cast6/cast6.h
diff --git a/cast6_sboxes.h b/cast6/cast6_sboxes.h
similarity index 100%
rename from cast6_sboxes.h
rename to cast6/cast6_sboxes.h
diff --git a/des.c b/des/des.c
similarity index 100%
rename from des.c
rename to des/des.c
diff --git a/des.h b/des/des.h
similarity index 100%
rename from des.h
rename to des/des.h
diff --git a/entropium.c b/entropium/entropium.c
similarity index 100%
rename from entropium.c
rename to entropium/entropium.c
diff --git a/entropium.h b/entropium/entropium.h
similarity index 100%
rename from entropium.h
rename to entropium/entropium.h
diff --git a/sha256-asm.S b/entropium/sha256-asm.S
similarity index 100%
rename from sha256-asm.S
rename to entropium/sha256-asm.S
diff --git a/sha256.h b/entropium/sha256.h
similarity index 100%
rename from sha256.h
rename to entropium/sha256.h
diff --git a/grain.c b/grain/grain.c
similarity index 100%
rename from grain.c
rename to grain/grain.c
diff --git a/grain.h b/grain/grain.h
similarity index 100%
rename from grain.h
rename to grain/grain.h
diff --git a/hmac-md5/base64_dec.c b/hmac-md5/base64_dec.c
new file mode 100644
index 0000000..f057f54
--- /dev/null
+++ b/hmac-md5/base64_dec.c
@@ -0,0 +1,246 @@
+/* base64_dec.c */
+/*
+ * This file is part of the AVR-Crypto-Lib.
+ * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+
+/**
+ * base64 decoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ *
+ *
+ */
+
+#include
+#include "base64_dec.h"
+
+#include "test_src/cli.h"
+
+/*
+ #define USE_GCC_EXTENSION
+*/
+#if 1
+
+#ifdef USE_GCC_EXTENSION
+
+static
+int ascii2bit6(char a){
+ switch(a){
+ case 'A'...'Z':
+ return a-'A';
+ case 'a'...'z':
+ return a-'a'+26;
+ case '0'...'9':
+ return a-'0'+52;
+ case '+':
+ case '-':
+ return 62;
+ case '/':
+ case '_':
+ return 63;
+ default:
+ return -1;
+ }
+}
+
+#else
+
+static
+uint8_t ascii2bit6(char a){
+ int r;
+ switch(a>>4){
+ case 0x5:
+ case 0x4:
+ r=a-'A';
+ if(r<0 || r>25){
+ return -1;
+ } else {
+ return r;
+ }
+ case 0x7:
+ case 0x6:
+ r=a-'a';
+ if(r<0 || r>25){
+ return -1;
+ } else {
+ return r+26;
+ }
+ break;
+ case 0x3:
+ if(a>'9')
+ return -1;
+ return a-'0'+52;
+ default:
+ break;
+ }
+ switch (a){
+ case '+':
+ case '-':
+ return 62;
+ case '/':
+ case '_':
+ return 63;
+ default:
+ return 0xff;
+ }
+}
+
+#endif
+
+#else
+
+static
+uint8_t ascii2bit6(uint8_t a){
+ if(a>='A' && a<='Z'){
+ return a-'A';
+ } else {
+ if(a>='a' && a<= 'z'){
+ return a-'a'+26;
+ } else {
+ if(a>='0' && a<='9'){
+ return a-'0'+52;
+ } else {
+ if(a=='+' || a=='-'){
+ return 62;
+ } else {
+ if(a=='/' || a=='_'){
+ return 63;
+ } else {
+ return 0xff;
+ }
+ }
+ }
+ }
+ }
+}
+
+#endif
+
+int base64_binlength(char* str, uint8_t strict){
+ int l=0;
+ uint8_t term=0;
+ for(;;){
+ if(*str=='\0')
+ break;
+ if(*str=='\n' || *str=='\r'){
+ str++;
+ continue;
+ }
+ if(*str=='='){
+ term++;
+ str++;
+ if(term==2){
+ break;
+ }
+ continue;
+ }
+ if(term)
+ return -1;
+ if(ascii2bit6(*str)==-1){
+ if(strict)
+ return -1;
+ } else {
+ l++;
+ }
+ str++;
+ }
+ switch(term){
+ case 0:
+ if(l%4!=0)
+ return -1;
+ return l/4*3;
+ case 1:
+ if(l%4!=3)
+ return -1;
+ return (l+1)/4*3-1;
+ case 2:
+ if(l%4!=2)
+ return -1;
+ return (l+2)/4*3-2;
+ default:
+ return -1;
+ }
+}
+
+/*
+ |543210543210543210543210|
+ |765432107654321076543210|
+
+ . . . .
+ |54321054|32105432|10543210|
+ |76543210|76543210|76543210|
+
+*/
+
+int base64dec(void* dest, char* b64str, uint8_t strict){
+ uint8_t buffer[4];
+ uint8_t idx=0;
+ uint8_t term=0;
+ for(;;){
+// cli_putstr_P(PSTR("\r\n DBG: got 0x"));
+// cli_hexdump(b64str, 1);
+ buffer[idx]= ascii2bit6(*b64str);
+// cli_putstr_P(PSTR(" --> 0x"));
+// cli_hexdump(buffer+idx, 1);
+
+ if(buffer[idx]==0xFF){
+ if(*b64str=='='){
+ term++;
+ b64str++;
+ if(term==2)
+ goto finalize; /* definitly the end */
+ }else{
+ if(*b64str == '\0'){
+ goto finalize; /* definitly the end */
+ }else{
+ if(*b64str == '\r' || *b64str == '\n' || !(strict)){
+ b64str++; /* charcters that we simply ignore */
+ }else{
+ return -1;
+ }
+ }
+ }
+ }else{
+ if(term)
+ return -1; /* this happens if we get a '=' in the stream */
+ idx++;
+ b64str++;
+ }
+ if(idx==4){
+ ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+ ((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;
+ ((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3];
+ dest = (uint8_t*)dest +3;
+ idx=0;
+ }
+ }
+ finalize:
+ /* the final touch */
+ if(idx==0)
+ return 0;
+ if(term==1){
+ ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+ ((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;
+ return 0;
+ }
+ if(term==2){
+ ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+ return 0;
+ }
+ return -1;
+}
diff --git a/hmac-md5/base64_dec.h b/hmac-md5/base64_dec.h
new file mode 100644
index 0000000..39beff8
--- /dev/null
+++ b/hmac-md5/base64_dec.h
@@ -0,0 +1,29 @@
+/* base64_dec.h */
+/*
+ * This file is part of the AVR-Crypto-Lib.
+ * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+
+#ifndef BASE64_DEC_H_
+#define BASE64_DEC_H_
+
+#include
+
+int base64_binlength(char* str, uint8_t strict);
+int base64dec(void* dest, char* b64str, uint8_t strict);
+
+#endif /*BASE64_DEC_H_*/
diff --git a/hmac-md5/base64_enc.c b/hmac-md5/base64_enc.c
new file mode 100644
index 0000000..400f25c
--- /dev/null
+++ b/hmac-md5/base64_enc.c
@@ -0,0 +1,117 @@
+/* base64_enc.c */
+/*
+ * This file is part of the AVR-Crypto-Lib.
+ * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+
+/**
+ * base64 encoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ *
+ *
+ */
+
+#include
+#include "base64_enc.h"
+
+#if 1
+#include
+
+char base64_alphabet[64] PROGMEM = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+ 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
+ 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+ 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+ 'w', 'x', 'y', 'z', '0', '1', '2', '3',
+ '4', '5', '6', '7', '8', '9', '+', '/' };
+
+static
+char bit6toAscii(uint8_t a){
+ a &= (uint8_t)0x3F;
+ return pgm_read_byte(base64_alphabet+a);
+}
+
+#else
+
+static
+char bit6toAscii(uint8_t a){
+ a &= (uint8_t)0x3F;
+
+ if(a<=25){
+ return a+'A';
+ } else {
+ if(a<=51){
+ return a-26+'a';
+ } else {
+ if(a<=61){
+ return a-52+'0';
+ } else {
+ if(a==62){
+ return '+';
+ } else {
+ return '/'; /* a == 63 */
+ }
+ }
+ }
+ }
+}
+
+#endif
+
+void base64enc(char* dest, void* src, uint16_t length){
+ uint16_t i,j;
+ uint8_t a[4];
+ for(i=0; i>2;
+ a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+ a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F;
+ a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F;
+ for(j=0; j<4; ++j){
+ *dest++=bit6toAscii(a[j]);
+ }
+ }
+ /* now we do the rest */
+ switch(length%3){
+ case 0:
+ break;
+ case 1:
+ a[0]=(((uint8_t*)src)[i*3+0])>>2;
+ a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F;
+ *dest++ = bit6toAscii(a[0]);
+ *dest++ = bit6toAscii(a[1]);
+ *dest++ = '=';
+ *dest++ = '=';
+ break;
+ case 2:
+ a[0]= (((uint8_t*)src)[i*3+0])>>2;
+ a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+ a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F;
+ *dest++ = bit6toAscii(a[0]);
+ *dest++ = bit6toAscii(a[1]);
+ *dest++ = bit6toAscii(a[2]);
+ *dest++ = '=';
+ break;
+ default: /* this will not happen! */
+ break;
+ }
+/* finalize: */
+ *dest='\0';
+}
+
diff --git a/hmac-md5/base64_enc.h b/hmac-md5/base64_enc.h
new file mode 100644
index 0000000..9065132
--- /dev/null
+++ b/hmac-md5/base64_enc.h
@@ -0,0 +1,28 @@
+/* base64_enc.h */
+/*
+ * This file is part of the AVR-Crypto-Lib.
+ * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+
+#ifndef BASE64_ENC_H_
+#define BASE64_ENC_H_
+
+#include
+
+void base64enc(char* dest, void* src, uint16_t length);
+
+#endif /*BASE64_ENC_H_*/
diff --git a/hmac-md5.c b/hmac-md5/hmac-md5.c
similarity index 100%
rename from hmac-md5.c
rename to hmac-md5/hmac-md5.c
diff --git a/hmac-md5.h b/hmac-md5/hmac-md5.h
similarity index 100%
rename from hmac-md5.h
rename to hmac-md5/hmac-md5.h
diff --git a/md5-asm.S b/hmac-md5/md5-asm.S
similarity index 100%
rename from md5-asm.S
rename to hmac-md5/md5-asm.S
diff --git a/md5.h b/hmac-md5/md5.h
similarity index 100%
rename from md5.h
rename to hmac-md5/md5.h
diff --git a/hmac-sha1.c b/hmac-sha1/hmac-sha1.c
similarity index 100%
rename from hmac-sha1.c
rename to hmac-sha1/hmac-sha1.c
diff --git a/hmac-sha1.h b/hmac-sha1/hmac-sha1.h
similarity index 100%
rename from hmac-sha1.h
rename to hmac-sha1/hmac-sha1.h
diff --git a/sha1-asm.S b/hmac-sha1/sha1-asm.S
similarity index 100%
rename from sha1-asm.S
rename to hmac-sha1/sha1-asm.S
diff --git a/sha1.h b/hmac-sha1/sha1.h
similarity index 100%
rename from sha1.h
rename to hmac-sha1/sha1.h
diff --git a/hmac-sha256.c b/hmac-sha256/hmac-sha256.c
similarity index 100%
rename from hmac-sha256.c
rename to hmac-sha256/hmac-sha256.c
diff --git a/hmac-sha256.h b/hmac-sha256/hmac-sha256.h
similarity index 100%
rename from hmac-sha256.h
rename to hmac-sha256/hmac-sha256.h
diff --git a/hmac-sha256/sha256-asm.S b/hmac-sha256/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/hmac-sha256/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+ /* push r18 - r27, r30 - r31*/
+ push r0
+ push r1
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+ push r26
+ push r27
+ push r30
+ push r31
+ clr r1
+.endm
+
+.macro postcall
+ pop r31
+ pop r30
+ pop r27
+ pop r26
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r1
+ pop r0
+.endm
+
+
+.macro hexdump length
+ push r27
+ push r26
+ ldi r25, '\r'
+ mov r24, r25
+ call uart_putc
+ ldi r25, '\n'
+ mov r24, r25
+ call uart_putc
+ pop r26
+ pop r27
+ movw r24, r26
+.if \length > 16
+ ldi r22, lo8(16)
+ ldi r23, hi8(16)
+ push r27
+ push r26
+ call uart_hexdump
+ pop r26
+ pop r27
+ adiw r26, 16
+ hexdump \length-16
+.else
+ ldi r22, lo8(\length)
+ ldi r23, hi8(\length)
+ call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+ precall
+ hexdump \length
+ postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+; param1: the 16-bit destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to sha256_ctx structure
+; given in r23,r22
+sha256_ctx2hash:
+ movw r26, r22
+ movw r30, r24
+ ldi r21, 8
+ sbiw r26, 4
+1:
+ ldi r20, 4
+ adiw r26, 8
+2:
+ ld r0, -X
+ st Z+, r0
+ dec r20
+ brne 2b
+
+ dec r21
+ brne 1b
+
+ ret
+
+;###########################################################
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+; param1: the 16-bit hash destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to message
+; given in r23,r22
+; param3: 32-bit length value (length of message in bits)
+; given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r16
+ push r17
+ in r16, SPL
+ in r17, SPH
+ subi r16, 8*4+8
+ sbci r17, 0
+ in r0, SREG
+ cli
+ out SPL, r16
+ out SPH, r17
+ out SREG, r0
+
+ push r25
+ push r24
+ inc r16
+ adc r17, r1
+
+ movw r8, r18 /* backup of length*/
+ movw r10, r20
+
+ movw r12, r22 /* backup pf msg-ptr */
+
+ movw r24, r16
+ rcall sha256_init
+ /* if length >= 512 */
+1:
+ tst r11
+ brne 4f
+ tst r10
+ brne 4f
+ mov r19, r9
+ cpi r19, 0x02
+ brlo 4f
+
+ movw r24, r16
+ movw r22, r12
+ rcall sha256_nextBlock
+ ldi r19, 0x64
+ add r22, r19
+ adc r23, r1
+ /* length -= 512 */
+ ldi r19, 0x02
+ sub r9, r19
+ sbc r10, r1
+ sbc r11, r1
+ rjmp 1b
+
+4:
+ movw r24, r16
+ movw r22, r12
+ movw r20, r8
+ rcall sha256_lastBlock
+
+ pop r24
+ pop r25
+ movw r22, r16
+ rcall sha256_ctx2hash
+
+sha256_epilog:
+ in r30, SPL
+ in r31, SPH
+ adiw r30, 8*4+8
+ in r0, SREG
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG, r0
+ pop r17
+ pop r16
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+; param3: an 16-bit integer specifing length of block in bits
+; given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+ cpi r21, 0x02
+ brlo sha256_lastBlock_prolog
+ push r25
+ push r24
+ push r23
+ push r22
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r22
+ pop r23
+ pop r24
+ pop r25
+ subi r21, 0x02
+ subi r23, -2
+ rjmp sha256_lastBlock
+sha256_lastBlock_prolog:
+ /* allocate space on stack */
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ subi r30, lo8(64)
+ sbci r31, hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+
+ adiw r30, 1 /* SP points to next free byte on stack */
+ mov r18, r20 /* r20 = LSB(length) */
+ lsr r18
+ lsr r18
+ lsr r18
+ bst r21, 0 /* may be we should explain this ... */
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
+
+
+ movw r26, r22 /* X points to begin of msg */
+ tst r18
+ breq sha256_lastBlock_post_copy
+ mov r1, r18
+sha256_lastBlock_copy_loop:
+ ld r0, X+
+ st Z+, r0
+ dec r1
+ brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:
+sha256_lastBlock_insert_stuffing_bit:
+ ldi r19, 0x80
+ mov r0,r19
+ ldi r19, 0x07
+ and r19, r20 /* if we are in bitmode */
+ breq 2f /* no bitmode */
+1:
+ lsr r0
+ dec r19
+ brne 1b
+ ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+ or r0, r19
+2:
+ st Z+, r0
+ inc r18
+
+/* checking stuff here */
+ cpi r18, 64-8+1
+ brsh 0f
+ rjmp sha256_lastBlock_insert_zeros
+0:
+ /* oh shit, we landed here */
+ /* first we have to fill it up with zeros */
+ ldi r19, 64
+ sub r19, r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r19
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r22, r30
+
+ push r31
+ push r30
+ push r25
+ push r24
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r24
+ pop r25
+ pop r30
+ pop r31
+
+ /* now we should subtract 512 from length */
+ movw r26, r24
+ adiw r26, 4*8+1 /* we can skip the lowest byte */
+ ld r19, X
+ subi r19, hi8(512)
+ st X+, r19
+ ldi r18, 6
+1:
+ ld r19, X
+ sbci r19, 0
+ st X+, r19
+ dec r18
+ brne 1b
+
+; clr r18 /* not neccessary ;-) */
+ /* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:
+ ldi r19, 64-8
+ sub r19, r18
+ breq sha256_lastBlock_insert_length
+ clr r1
+1:
+ st Z+, r1 /* r1 is still zero */
+ dec r19
+ brne 1b
+
+; rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+ movw r26, r24 /* X points to state */
+ adiw r26, 8*4 /* X points to (state.length) */
+ adiw r30, 8 /* Z points one after the last byte of block */
+ ld r0, X+
+ add r0, r20
+ st -Z, r0
+ ld r0, X+
+ adc r0, r21
+ st -Z, r0
+ ldi r19, 6
+1:
+ ld r0, X+
+ adc r0, r1
+ st -Z, r0
+ dec r19
+ brne 1b
+
+ sbiw r30, 64-8
+ movw r22, r30
+ rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ adiw r30, 63 ; lo8(64)
+ adiw r30, 1 ; hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+ clr r1
+ clr r0
+ ret
+
+/**/
+;###########################################################
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1 = 4
+T2 = 5
+T3 = 6
+T4 = 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+ push r4 /* replace push & pop by mem ops? */
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r20, SPL
+ in r21, SPH
+ movw r18, r20 ;backup SP
+; movw r26, r20 ; X points to free space on stack
+ movw r30, r22 ; Z points to message
+ subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+ sbci r21, hi8(sha256_nextBlock_localSpace)
+ movw r26, r20 ; X points to free space on stack
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+ push r18
+ push r19
+ push r24
+ push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ adiw r26, 1 ; X++
+ ldi r20, 16
+sha256_nextBlock_wcpyloop:
+ ld r23, Z+
+ ld r22, Z+
+ ld r19, Z+
+ ld r18, Z+
+ st X+, r18
+ st X+, r19
+ st X+, r22
+ st X+, r23
+ dec r20
+ brne sha256_nextBlock_wcpyloop
+/* for (i=16; i<64; ++i){
+ w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
+ } */
+ /* r25,r24,r23,r24 (r21,r20) are function values
+ r19,r18,r17,r16 are the accumulator
+ r15,r14,r13,rBck1 are backup1
+ r11,r10,r9 ,r8 are xor accu
+ r1 is round counter */
+
+ ldi r20, 64-16
+ mov LoopC, r20
+sha256_nextBlock_wcalcloop:
+ movw r30, r26 ; cp X to Z
+ sbiw r30, 63
+ sbiw r30, 1 ; substract 64 = 16*4
+ ld Accu1, Z+
+ ld Accu2, Z+
+ ld Accu3, Z+
+ ld Accu4, Z+ /* w[i] = w[i-16] */
+ ld Bck1, Z+
+ ld Bck2, Z+
+ ld Bck3, Z+
+ ld Bck4, Z+ /* backup = w[i-15] */
+ /* now sigma 0 */
+ mov Func1, Bck2
+ mov Func2, Bck3
+ mov Func3, Bck4
+ mov Func4, Bck1 /* prerotated by 8 */
+ ldi r20, 1
+ rcall bitrotl
+ movw XAccu1, Func1
+ movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ ror Bck1
+ dec Func2
+ brne sigma0_shr
+ eor XAccu1, Bck1
+ eor XAccu2, Bck2
+ eor XAccu3, Bck3
+ eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ ldd Func1, Z+7*4 /* now accu += w[i-7] */
+ ldd Func2, Z+7*4+1
+ ldd Func3, Z+7*4+2
+ ldd Func4, Z+7*4+3
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+ ldd Bck2, Z+12*4+1
+ ldd Bck3, Z+12*4+2
+ ldd Bck4, Z+12*4+3
+ /* now sigma 1 */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 1
+ rcall bitrotr
+ movw XAccu3, Func3
+ movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */
+; movw Func1, Bck3
+; movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ dec Func2
+ brne sigma1_shr
+ eor XAccu1, Bck2
+ eor XAccu2, Bck3
+ eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ /* now let's store the shit */
+ st X+, Accu1
+ st X+, Accu2
+ st X+, Accu3
+ st X+, Accu4
+ dec LoopC
+ breq 3f ; skip if zero
+ rjmp sha256_nextBlock_wcalcloop
+3:
+ /* we are finished with w array X points one byte post w */
+/* init a array */
+ pop r31
+ pop r30
+ push r30
+ push r31
+ ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:
+ ld r1, Z+
+ st X+, r1
+ dec r25
+ brne init_a_array
+
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+ t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+ t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+ memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
+ a[4] += t1;
+ a[0] = t1 + t2;
+ } */
+ /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+ sbiw r26, 8*4 /* X still points at a[7]+1*/
+ movw r28, r26
+ ldi r30, lo8(sha256_kv)
+ ldi r31, hi8(sha256_kv)
+ dec r27 /* X - (64*4 == 256) */
+ ldi r25, 64
+ mov LoopC, r25
+sha256_main_loop:
+ /* now calculate t1 */
+ /*CH(x,y,z) = (x&y)^((~x)&z)*/
+ ldd T1, Y+5*4
+ ldd T2, Y+5*4+1
+ ldd T3, Y+5*4+2
+ ldd T4, Y+5*4+3 /* y in T */
+ ldd Func1, Y+4*4
+ ldd Func2, Y+4*4+1
+ ldd Func3, Y+4*4+2
+ ldd Func4, Y+4*4+3 /* x in Func */
+ ldd Bck1, Y+6*4
+ ldd Bck2, Y+6*4+1
+ ldd Bck3, Y+6*4+2
+ ldd Bck4, Y+6*4+3 /* z in Bck */
+ and T1, Func1
+ and T2, Func2
+ and T3, Func3
+ and T4, Func4
+ com Func1
+ com Func2
+ com Func3
+ com Func4
+ and Bck1, Func1
+ and Bck2, Func2
+ and Bck3, Func3
+ and Bck4, Func4
+ eor T1, Bck1
+ eor T2, Bck2
+ eor T3, Bck3
+ eor T4, Bck4 /* done, CH(x,y,z) is in T */
+ /* now SIGMA1(a[4]) */
+ ldd Bck4, Y+4*4 /* think about using it from Func reg above*/
+ ldd Bck1, Y+4*4+1
+ ldd Bck2, Y+4*4+2
+ ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotl /* rotr(x,6) */
+ movw XAccu1, Func1
+ movw XAccu3, Func3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 3
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+ movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+ ldi r20, 1
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4
+ /* now we've to add a[7], w[i] and k[i] */
+ ldd XAccu1, Y+4*7
+ ldd XAccu2, Y+4*7+1
+ ldd XAccu3, Y+4*7+2
+ ldd XAccu4, Y+4*7+3
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add a[7] */
+ ld XAccu1, X+
+ ld XAccu2, X+
+ ld XAccu3, X+
+ ld XAccu4, X+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add w[i] */
+ lpm XAccu1, Z+
+ lpm XAccu2, Z+
+ lpm XAccu3, Z+
+ lpm XAccu4, Z+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+ /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+ /* starting with MAJ(x,y,z) */
+ ldd Func1, Y+4*0+0
+ ldd Func2, Y+4*0+1
+ ldd Func3, Y+4*0+2
+ ldd Func4, Y+4*0+3 /* load x=a[0] */
+ ldd XAccu1, Y+4*1+0
+ ldd XAccu2, Y+4*1+1
+ ldd XAccu3, Y+4*1+2
+ ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+ and XAccu1, Func1
+ and XAccu2, Func2
+ and XAccu3, Func3
+ and XAccu4, Func4 /* XAccu == (x & y) */
+ ldd Bck1, Y+4*2+0
+ ldd Bck2, Y+4*2+1
+ ldd Bck3, Y+4*2+2
+ ldd Bck4, Y+4*2+3 /* load z=a[2] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */
+ ldd Func1, Y+4*1+0
+ ldd Func2, Y+4*1+1
+ ldd Func3, Y+4*1+2
+ ldd Func4, Y+4*1+3 /* load y=a[1] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+ /* SIGMA0(a[0]) */
+ ldd Bck1, Y+4*0+0 /* we should combine this with above */
+ ldd Bck2, Y+4*0+1
+ ldd Bck3, Y+4*0+2
+ ldd Bck4, Y+4*0+3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotr
+ movw Accu1, Func1
+ movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotate by 16 bits */
+ ldi r20, 3
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+ mov Func1, Bck4
+ mov Func2, Bck1
+ mov Func3, Bck2
+ mov Func4, Bck3 /* prerotate by 24 bits */
+ ldi r20, 2
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+ add Accu1, XAccu1 /* add previous result (MAJ)*/
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4
+ /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+ /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+ ldi r21, 7*4
+ adiw r28, 7*4
+a_shift_loop:
+ ld r25, -Y /* warning: this is PREdecrement */
+ std Y+4, r25
+ dec r21
+ brne a_shift_loop
+
+ ldd Bck1, Y+4*4+0
+ ldd Bck2, Y+4*4+1
+ ldd Bck3, Y+4*4+2
+ ldd Bck4, Y+4*4+3
+ add Bck1, T1
+ adc Bck2, T2
+ adc Bck3, T3
+ adc Bck4, T4
+ std Y+4*4+0, Bck1
+ std Y+4*4+1, Bck2
+ std Y+4*4+2, Bck3
+ std Y+4*4+3, Bck4
+ add Accu1, T1
+ adc Accu2, T2
+ adc Accu3, T3
+ adc Accu4, T4
+ std Y+4*0+0, Accu1
+ std Y+4*0+1, Accu2
+ std Y+4*0+2, Accu3
+ std Y+4*0+3, Accu4 /* a array updated */
+
+
+ dec LoopC
+ breq update_state
+ rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:
+ /* update state */
+ /* pointers to state should still exist on the stack ;-) */
+ pop r31
+ pop r30
+ ldi r21, 8
+update_state_loop:
+ ldd Accu1, Z+0
+ ldd Accu2, Z+1
+ ldd Accu3, Z+2
+ ldd Accu4, Z+3
+ ld Func1, Y+
+ ld Func2, Y+
+ ld Func3, Y+
+ ld Func4, Y+
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ st Z+, Accu1
+ st Z+, Accu2
+ st Z+, Accu3
+ st Z+, Accu4
+ dec r21
+ brne update_state_loop
+ /* now we just have to update the length */
+ adiw r30, 1 /* since we add 512, we can simply skip the LSB */
+ ldi r21, 2
+ ldi r22, 6
+ ld r20, Z
+ add r20, r21
+ st Z+, r20
+ clr r21
+sha256_nextBlock_fix_length:
+ brcc sha256_nextBlock_epilog
+ ld r20, Z
+ adc r20, r21
+ st Z+, r20
+ dec r22
+ brne sha256_nextBlock_fix_length
+
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+
+ pop r21
+ pop r20
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ clr r1
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop r7
+ pop r6
+ pop r5
+ pop r4
+ ret
+
+sha256_kv: ; round-key-vector stored in ProgMem
+.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+
+;###########################################################
+
+.global sha256_init
+;uint32_t sha256_init_vector[]={
+; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+; state->length=0;
+; memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8((sha256_init_vector))
+ ldi r31, hi8((sha256_init_vector))
+ ldi r22, 32+8
+sha256_init_vloop:
+ lpm r23, Z+
+ st X+, r23
+ dec r22
+ brne sha256_init_vloop
+ ret
+
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67
+.word 0xF372, 0x3C6E
+.word 0xF53A, 0xA54F
+.word 0x527F, 0x510E
+.word 0x688C, 0x9B05
+.word 0xD9AB, 0x1F83
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,r22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotl32:
+ cpi r20, 8
+ brlo bitrotl
+ mov r21, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ subi r20, 8
+ rjmp rotl32
+bitrotl:
+ clr r21
+ clc
+bitrotl_loop:
+ tst r20
+ breq fixrotl
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ rol r21
+ dec r20
+ rjmp bitrotl_loop
+fixrotl:
+ or r22, r21
+ ret
+
+
+;###########################################################
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotr32:
+ cpi r20, 8
+ brlo bitrotr
+ mov r21, r22
+ mov r22, r23
+ mov r23, r24
+ mov r24, r25
+ mov r25, r21
+ subi r20, 8
+ rjmp rotr32
+bitrotr:
+ clr r21
+ clc
+bitrotr_loop:
+ tst r20
+ breq fixrotr
+ ror r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ dec r20
+ rjmp bitrotr_loop
+fixrotr:
+ or r25, r21
+ ret
+
+
+;###########################################################
+
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+; param1: the 32-bit word
+; given in r25,r24,r23,22 (r25 is most significant)
+; modifys: r21, r22
+change_endian32:
+ movw r20, r22 ; (r22,r23) --> (r20,r21)
+ mov r22, r25
+ mov r23, r24
+ mov r24, r21
+ mov r25, r20
+ ret
+
diff --git a/hmac-sha256/sha256.h b/hmac-sha256/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/hmac-sha256/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/**
+ * \file sha256.h
+ * \author Daniel Otte
+ * \date 2006-05-16
+ * \license GPLv3 or later
+ *
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS 256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ *
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+ uint32_t h[8];
+ uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ *
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ *
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ *
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block
+ *
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ *
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/md5/md5-asm.S b/md5/md5-asm.S
new file mode 100644
index 0000000..de3b170
--- /dev/null
+++ b/md5/md5-asm.S
@@ -0,0 +1,977 @@
+/* md5-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ * License: GPLv3 or later
+ * Date: 2008-11-15
+*/
+
+
+#include "avr-asm-macros.S"
+
+;###########################################################
+; S-BOX
+
+T_table:
+.hword 0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c
+.hword 0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44
+.hword 0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679
+.hword 0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6
+.hword 0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1
+.hword 0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef
+.hword 0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d
+.hword 0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf
+.hword 0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4
+.hword 0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a
+.hword 0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef
+.hword 0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08
+.hword 0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86
+
+
+#define MD5_init_fast
+
+.global md5_init
+#ifndef MD5_init_fast
+;###########################################################
+;void md5_init(md5_ctx_t *state)
+; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), X(r25,r26)
+; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes
+md5_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8(md5_init_vector)
+ ldi r31, hi8(md5_init_vector)
+ ldi r24, 16+4
+md5_init_vloop:
+ lpm r0, Z+
+ st X+, r0
+ dec r24
+ brne md5_init_vloop
+ ret
+
+md5_init_vector:
+.hword 0x2301, 0x6745
+.hword 0xAB89, 0xEFCD
+.hword 0xDCFE, 0x98BA
+.hword 0x5476, 0x1032
+.hword 0x0000, 0x0000
+
+#else
+;###########################################################
+.global md5_init_fast
+;void md5_init(md5_ctx_t *state)
+; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: r23, r22
+; cycles = 1+16*3+4*2+4 = 1+48+12 = 61
+; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes
+md5_init:
+md5_init_fast:
+ movw r26, r24
+ ldi r24, 0x01
+ st X+, r24
+ ldi r24, 0x23
+ st X+, r24
+ ldi r24, 0x45
+ st X+, r24
+ ldi r24, 0x67
+ st X+, r24
+ ldi r24, 0x89
+ st X+, r24
+ ldi r24, 0xAB
+ st X+, r24
+ ldi r24, 0xCD
+ st X+, r24
+ ldi r24, 0xEF
+ st X+, r24
+ ldi r24, 0xFE
+ st X+, r24
+ ldi r24, 0xDC
+ st X+, r24
+ ldi r24, 0xBA
+ st X+, r24
+ ldi r24, 0x98
+ st X+, r24
+ ldi r24, 0x76
+ st X+, r24
+ ldi r24, 0x54
+ st X+, r24
+ ldi r24, 0x32
+ st X+, r24
+ ldi r24, 0x10
+ st X+, r24
+ st X+, r1
+ st X+, r1
+ st X+, r1
+ st X+, r1
+ ret
+#endif
+;###########################################################
+
+/*
+static
+uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
+ return ((x&y)|((~x)&z));
+}
+*/
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_F:
+ and r18, r22
+ and r19, r23
+ and r20, r24
+ and r21, r25
+ com r22
+ com r23
+ com r24
+ com r25
+ and r22, r14
+ and r23, r15
+ and r24, r16
+ and r25, r17
+ or r22, r18
+ or r23, r19
+ or r24, r20
+ or r25, r21
+ rjmp md5_core_F_exit
+
+/*
+static
+uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
+ return ((x&z)|((~z)&y));
+}
+*/
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_G:
+ and r22, r14
+ and r23, r15
+ and r24, r16
+ and r25, r17
+ com r14
+ com r15
+ com r16
+ com r17
+ and r18, r14
+ and r19, r15
+ and r20, r16
+ and r21, r17
+ or r22, r18
+ or r23, r19
+ or r24, r20
+ or r25, r21
+ rjmp md5_core_F_exit
+/*
+static
+uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
+ return (x^y^z);
+}
+*/
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_H:
+ eor r22, r18
+ eor r22, r14
+ eor r23, r19
+ eor r23, r15
+ eor r24, r20
+ eor r24, r16
+ eor r25, r21
+ eor r25, r17
+ rjmp md5_core_F_exit
+/*
+static
+uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
+ return (y ^ (x | (~z)));
+}
+*/
+
+jump_table:
+ rjmp md5_F
+ rjmp md5_G
+ rjmp md5_H
+; rjmp md5_I
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_I:
+ com r14
+ com r15
+ com r16
+ com r17
+ or r22, r14
+ or r23, r15
+ or r24, r16
+ or r25, r17
+ eor r22, r18
+ eor r23, r19
+ eor r24, r20
+ eor r25, r21
+ rjmp md5_core_F_exit
+
+as_table:
+; (as+0)&3 (as+3)&3 (as+1)&3 (as+2)&3
+; Z X Y
+; AS_SAVE0 AS_SAVE1 AS_SAVE2 AS_SAVE3
+.byte 1*4, 0*4, 2*4, 3*4 ;as=1
+.byte 2*4, 1*4, 3*4, 0*4 ;as=2
+.byte 3*4, 2*4, 0*4, 1*4 ;as=3
+.byte 0*4, 3*4, 1*4, 2*4 ;as=4
+
+;###########################################################
+.global md5_core
+md5_core:
+ mov r21, r20
+ mov r20, r18
+ mov r19, r16
+ mov r18, r14
+; rjmp md5_core_asm
+/*
+void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
+ uint32_t t;
+ md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
+ as &= 0x3;
+ / * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
+ t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
+ a[as]=a[(as+1)&3] + ROTL32(t, s);
+}
+*/
+; a: r24-r25
+; block: r22-r23
+; as: r21
+; s: r20
+; i: r19
+; fi: r18
+P_A0 = 24
+P_A1 = 25
+P_B0 = 22
+P_B1 = 23
+P_AS = 21
+P_S = 20
+P_I = 19
+P_FI = 18
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+
+
+AS_SAVE0 = 4
+AS_SAVE1 = 5
+AS_SAVE2 = 6
+AS_SAVE3 = 7
+FI_SAVE = 8
+S_SAVE = 9
+ACCU0 = 10
+ACCU1 = 11
+ACCU2 = 12
+ACCU3 = 13
+ARG_X0 = 22
+ARG_X1 = 23
+ARG_X2 = 24
+ARG_X3 = 25
+ARG_Y0 = 18
+ARG_Y1 = 19
+ARG_Y2 = 20
+ARG_Y3 = 21
+ARG_Z0 = 14
+ARG_Z1 = 15
+ARG_Z2 = 16
+ARG_Z3 = 17
+
+
+md5_core_asm:
+ push r16
+ push r17
+ push_range 4, 8
+ ldi r30, lo8(T_table)
+ ldi r31, hi8(T_table)
+ lsl P_I
+ rol r1
+ lsl P_I
+ rol r1
+ add r30, P_I
+ adc r31, r1
+ clr r1
+ mov FI_SAVE, r18
+ /* loading T[i] into ACCU */
+ lpm ACCU0, Z+
+ lpm ACCU1, Z+
+ lpm ACCU2, Z+
+ lpm ACCU3, Z
+ /* add *block to ACCU */
+ movw r30, P_B0
+ ld r0, Z+
+ add ACCU0, r0
+ ld r0, Z+
+ adc ACCU1, r0
+ ld r0, Z+
+ adc ACCU2, r0
+ ld r0, Z+
+ adc ACCU3, r0
+ /* add a[as+0&3] to ACCU */
+ ldi r30, lo8(as_table)
+ ldi r31, hi8(as_table)
+ dec P_AS
+ andi P_AS, 0x03
+ lsl P_AS
+ lsl P_AS
+ add r30, r21
+ adc r31, r1 ; Z points to the correct row in as_table
+ lpm AS_SAVE0, Z+
+ lpm AS_SAVE1, Z+
+ lpm AS_SAVE2, Z+
+ lpm AS_SAVE3, Z
+ movw r26, r24 ; X points to a[0]
+ add r26, AS_SAVE0
+ adc r27, r1 ; X points at a[as&3]
+ ld r0, X+
+ add ACCU0, r0
+ ld r0, X+
+ adc ACCU1, r0
+ ld r0, X+
+ adc ACCU2, r0
+ ld r0, X+
+ adc ACCU3, r0
+ mov S_SAVE, r20
+
+ movw r28, r24
+ /* loading z value */
+ movw r26, r28
+ add r26, AS_SAVE1
+ adc r27, r1
+ ld ARG_Z0, X+
+ ld ARG_Z1, X+
+ ld ARG_Z2, X+
+ ld ARG_Z3, X
+
+ /* loading x value */
+ movw r26, r28
+ add r26, AS_SAVE2
+ adc r27, r1
+ ld ARG_X0, X+
+ ld ARG_X1, X+
+ ld ARG_X2, X+
+ ld ARG_X3, X
+
+ /* loading y value */
+ movw r26, r28
+ add r26, AS_SAVE3
+ adc r27, r1
+ ldi r30, pm_lo8(jump_table)
+ ldi r31, pm_hi8(jump_table)
+ add r30, FI_SAVE
+ adc r31, r1 ; Z points to the correct entry in our jump table
+ ld ARG_Y0, X+
+ ld ARG_Y1, X+
+ ld ARG_Y2, X+
+ ld ARG_Y3, X
+
+ ijmp /* calls the function pointed by Z */
+md5_core_F_exit:
+
+ /* add ACCU to result of f() */
+ add r22, ACCU0
+ adc r23, ACCU1
+ adc r24, ACCU2
+ adc r25, ACCU3
+
+ /* rotate */
+ mov r20, S_SAVE
+rotl32:
+ cpi r20, 8
+ brlo bitrotl
+ mov r21, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ subi r20, 8
+ rjmp rotl32
+bitrotl:
+ mov r21, r25
+bitrotl_loop:
+ tst r20
+ breq fixrotl
+bitrotl_loop2:
+ lsl r21
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ dec r20
+ brne bitrotl_loop2
+fixrotl:
+
+ /* add a[(as+1)&3] */
+ movw r26, r28
+ add r26, AS_SAVE2
+ adc r27, r1
+ ld r0, X+
+ add r22, r0
+ ld r0, X+
+ adc r23, r0
+ ld r0, X+
+ adc r24, r0
+ ld r0, X
+ adc r25, r0
+
+ /* store result */
+ movw r26, r28
+ add r26, AS_SAVE0
+ adc r27, r1
+ st X+, r22
+ st X+, r23
+ st X+, r24
+ st X , r25
+md5_core_exit:
+ pop_range 4, 8
+ pop r17
+ pop r16
+ ret
+
+;###################################################################
+/*
+void md5_nextBlock(md5_ctx_t *state, void* block){
+ uint32_t a[4];
+ uint8_t m,n,i=0;
+
+ a[0]=state->a[0];
+ a[1]=state->a[1];
+ a[2]=state->a[2];
+ a[3]=state->a[3];
+
+ / * round 1 * /
+ uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
+ for(m=0;m<4;++m){
+ for(n=0;n<4;++n){
+ md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
+ }
+ }
+ / * round 2 * /
+ uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
+ for(m=0;m<4;++m){
+ for(n=0;n<4;++n){
+ md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
+ }
+ }
+ / * round 3 * /
+ uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
+ for(m=0;m<4;++m){
+ for(n=0;n<4;++n){
+ md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
+ }
+ }
+ / * round 4 * /
+ uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
+ for(m=0;m<4;++m){
+ for(n=0;n<4;++n){
+ md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
+ }
+ }
+ state->a[0] += a[0];
+ state->a[1] += a[1];
+ state->a[2] += a[2];
+ state->a[3] += a[3];
+ state->counter++;
+}
+*/
+
+shift_table_1: .byte 7,12,17,22
+shift_table_2: .byte 5, 9,14,20
+shift_table_3: .byte 4,11,16,23
+shift_table_4: .byte 6,10,15,21
+
+index_table_r2:
+;(1+m*4+n*5)&0xf:
+ .byte 0x04, 0x18, 0x2c, 0x00
+ .byte 0x14, 0x28, 0x3c, 0x10
+ .byte 0x24, 0x38, 0x0c, 0x20
+ .byte 0x34, 0x08, 0x1c, 0x30
+
+index_table_r3:
+;(5-m*4+n*3)&0xf:
+ .byte 0x14, 0x20, 0x2c, 0x38
+ .byte 0x04, 0x10, 0x1c, 0x28
+ .byte 0x34, 0x00, 0x0c, 0x18
+ .byte 0x24, 0x30, 0x3c, 0x08
+
+index_table_r4:
+;(0-m*4+n*7)&0xf:
+ .byte 0x00, 0x1c, 0x38, 0x14
+ .byte 0x30, 0x0c, 0x28, 0x04
+ .byte 0x20, 0x3c, 0x18, 0x34
+ .byte 0x10, 0x2c, 0x08, 0x24
+
+APTR_REG = 2
+BPTR_REG = 4
+N_REG = 6
+M_REG = 7
+I_REG = 8
+.global md5_nextBlock
+md5_nextBlock:
+ stack_alloc 16
+ push_range 2, 17
+ push r28
+ push r29
+ push r24
+ push r25
+ adiw r30, 1 /* Z now points to the beginning of the allocated memory */
+ movw r2, r30
+ movw r4, r22
+ movw r26, r24
+ ldi r20, 16
+1:
+ ld r0, X+
+ st Z+, r0
+ dec r20
+ brne 1b
+ /* state now copied to stack memory */
+ clr I_REG
+ /* Round 1 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ lsl r0
+ lsl r0
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_1)
+ ldi r31, hi8(shift_table_1)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 0
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+ /* Round 2 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ ldi r30, lo8(index_table_r2)
+ ldi r31, hi8(index_table_r2)
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ add r30, r0
+ adc r31, r1
+ lpm r0, Z
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_2)
+ ldi r31, hi8(shift_table_2)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 1
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+ /* Round 3 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ ldi r30, lo8(index_table_r3)
+ ldi r31, hi8(index_table_r3)
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ add r30, r0
+ adc r31, r1
+ lpm r0, Z
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_3)
+ ldi r31, hi8(shift_table_3)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 2
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+ /* Round 4 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ ldi r30, lo8(index_table_r4)
+ ldi r31, hi8(index_table_r4)
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ add r30, r0
+ adc r31, r1
+ lpm r0, Z
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_4)
+ ldi r31, hi8(shift_table_4)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 3
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+
+ pop r27
+ pop r26 /* X now points to the context */
+ movw r30, APTR_REG
+ ldi r16, 4
+1:
+ ld r0, X
+ ld r2, Z+
+ add r0, r2
+ st X+, r0
+ ld r0, X
+ ld r2, Z+
+ adc r0, r2
+ st X+, r0
+ ld r0, X
+ ld r2, Z+
+ adc r0, r2
+ st X+, r0
+ ld r0, X
+ ld r2, Z+
+ adc r0, r2
+ st X+, r0
+ dec r16
+ brne 1b
+
+ ld r0, X
+ inc r0
+ st X+, r0
+ brne 2f
+ ld r0, X
+ inc r0
+ st X+, r0
+ brne 2f
+ ld r0, X
+ inc r0
+ st X+, r0
+ brne 2f
+ ld r0, X
+ inc r0
+ st X+, r0
+2:
+
+ pop r29
+ pop r28
+ pop_range 2, 17
+ stack_free 16
+ ret
+
+;###############################################################################
+/*
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
+ uint16_t l;
+ uint8_t b[64];
+ while (length_b >= 512){
+ md5_nextBlock(state, block);
+ length_b -= 512;
+ block = ((uint8_t*)block) + 512/8;
+ }
+ memset(b, 0, 64);
+ memcpy(b, block, length_b/8);
+ / * insert padding one * /
+ l=length_b/8;
+ if(length_b%8){
+ uint8_t t;
+ t = ((uint8_t*)block)[l];
+ t |= (0x80>>(length_b%8));
+ b[l]=t;
+ }else{
+ b[l]=0x80;
+ }
+ / * insert length value * /
+ if(l+sizeof(uint64_t) >= 512/8){
+ md5_nextBlock(state, b);
+ state->counter--;
+ memset(b, 0, 64-8);
+ }
+ *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
+ md5_nextBlock(state, b);
+}
+*/
+; state_ptr : r24,r25
+; block_ptr : r22,r23
+; length_b : r20,r21
+.global md5_lastBlock
+md5_lastBlock:
+ stack_alloc_large 64
+ push_range 12, 17
+ push r30
+ push r31
+ movw r16, r20 /* length_b */
+ movw r14, r22 /* block_ptr */
+ movw r12, r24 /* state_ptr */
+ ldi r18, 64
+2:
+ cpi r17, 2 /* hi8(512) */
+ brlo 2f
+1:
+ movw r24, r12
+ movw r22, r14
+ rcall md5_nextBlock
+ add r14, r18
+ adc r15, r1
+ subi r17, 2
+ rjmp 2b
+2:
+ pop r31
+ pop r30
+
+ adiw r30, 1 /* adjust Z to point to buffer */
+ movw r26, r14
+ movw r24, r16
+ adiw r24, 7
+
+ lsr r25
+ ror r24
+ lsr r25
+ ror r24
+ lsr r24 /* r24 now holds how many bytes are to copy */
+ ldi r18, 64
+ sub r18, r24 /* r18 will hold the amount of used bytes in buffer */
+ tst r24
+4:
+ breq 5f
+ ld r0, X+
+ st Z+, r0
+ dec r24
+ rjmp 4b /* Z points to the byte after msg in buffer */
+5: /* append 1-bit */
+ mov r20, r16
+ ldi r19, 0x80
+ andi r20, 0x07
+ brne bit_fucking
+ st Z+, r19
+ dec r18 /* 'allocate' another byte in buffer */
+ rjmp after_bit_fucking
+bit_fucking:
+1:
+ lsr r19
+ dec r20
+ brne 1b
+ or r0, r19
+ st -Z, r0
+ adiw r30, 1
+after_bit_fucking:
+ clt
+ cpi r18, 8
+ brmi 2f
+ set /* store in t if the counter will also fit in this block (1 if fit)*/
+2:
+ tst r18
+ breq 2f
+1: /* fill remaning buffer with zeros */
+ st Z+, r1
+ dec r18
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r14, r30 /* r14:r15 now points to buffer */
+ brts load_counter
+ /* counter does not fit, finalize this block */
+ movw r24, r12
+ movw r22, r14
+ rcall md5_nextBlock
+ movw r30, r14
+ ldi r20, 64-8
+3:
+ st Z+, r1
+ dec r20
+ brne 3b
+
+load_counter:
+ movw r26, r12 /* X points to state */
+ adiw r26, 16
+ ld r19, X+
+ ld r20, X+
+ ld r21, X+
+ ld r22, X+
+ brts post_counter_decrement /* do not decremen because counter fits */
+counter_decrement:
+ subi r19, 1
+ sbci r20, 0
+ sbci r21, 0
+ sbci r22, 0
+post_counter_decrement:
+ clr r18
+ clr r23
+ lsl r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ mov r18, r16 /* r16:r17 length_b */
+ add r19, r17
+ adc r20, r1
+ adc r21, r1
+ adc r22, r1
+ adc r23, r1
+ movw r30, r14
+ adiw r30, 64-8
+ st Z+, r18
+ st Z+, r19
+ st Z+, r20
+ st Z+, r21
+ st Z+, r22
+ st Z+, r23
+ st Z+, r1
+ st Z, r1
+
+ sbiw r30, 63
+; sbiw r30, 1
+ movw r24, r12
+ movw r22, r30
+ rcall md5_nextBlock
+md5_lastBlock_exit:
+ pop_range 12, 17
+ stack_free_large 64
+ ret
+
+
+;###############################################################################
+
+
+.global md5_ctx2hash
+md5_ctx2hash:
+ movw r26, r24
+ movw r30, r22
+ ldi r22, 16
+1:
+ ld r0, Z+
+ st X+, r0
+ dec r22
+ brne 1b
+ ret
+
+
+;###############################################################################
+
+
+.global md5
+md5:
+ stack_alloc 20
+ push_range 8, 17
+ adiw r30, 1
+ movw r8, r30 /* ctx */
+ movw r10, r24 /* dest */
+ movw r12, r22 /* msg */
+ movw r14, r18 /* length (low) */
+ movw r16, r20 /* length (high) */
+ movw r24, r30
+ rcall md5_init
+1:
+ tst r16
+ brne next_round
+ tst r17
+ breq last_round
+next_round:
+ movw r24, r8
+ movw r22, r12
+ rcall md5_nextBlock
+ ldi r22, 64
+ add r12, r22
+ adc r13, r1
+ ldi r22, 2
+ sub r15, r22
+ sbci r16, 0
+ sbci r17, 0
+ rjmp 1b
+last_round:
+ movw r24, r8
+ movw r22, r12
+ movw r20, r14
+ rcall md5_lastBlock
+ movw r24, r10
+ movw r22, r8
+ rcall md5_ctx2hash
+ pop_range 8, 17
+ stack_free 20
+ ret
+
+
+
diff --git a/md5.c b/md5/md5.c
similarity index 100%
rename from md5.c
rename to md5/md5.c
diff --git a/md5/md5.h b/md5/md5.h
new file mode 100644
index 0000000..6b65c4a
--- /dev/null
+++ b/md5/md5.h
@@ -0,0 +1,55 @@
+/* md5.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * File: md5.h
+ * Author: Daniel Otte
+ * Date: 31.07.2006
+ * License: GPL
+ * Description: Implementation of the MD5 hash algorithm as described in RFC 1321
+ *
+ */
+
+
+#ifndef MD5_H_
+#define MD5_H_
+
+#include
+
+
+#define MD5_HASH_BITS 128
+#define MD5_HASH_BYTES (MD5_HASH_BITS/8)
+#define MD5_BLOCK_BITS 512
+#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8)
+
+
+typedef struct md5_ctx_st {
+ uint32_t a[4];
+ uint32_t counter;
+} md5_ctx_t;
+
+typedef uint8_t md5_hash_t[MD5_HASH_BYTES];
+
+
+void md5_init(md5_ctx_t *s);
+void md5_nextBlock(md5_ctx_t *state, const void* block);
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length);
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state);
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*MD5_H_*/
diff --git a/md5_sbox.h b/md5/md5_sbox.h
similarity index 100%
rename from md5_sbox.h
rename to md5/md5_sbox.h
diff --git a/mickey128.c b/mickey128/mickey128.c
similarity index 100%
rename from mickey128.c
rename to mickey128/mickey128.c
diff --git a/mickey128.h b/mickey128/mickey128.h
similarity index 100%
rename from mickey128.h
rename to mickey128/mickey128.h
diff --git a/mkfiles/arcfour.mk b/mkfiles/arcfour.mk
index d62c144..ebc6858 100644
--- a/mkfiles/arcfour.mk
+++ b/mkfiles/arcfour.mk
@@ -4,6 +4,7 @@ ALGO_NAME := ARCFOUR
# comment out the following line for removement of ARCFOUR from the build process
STREAM_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := arcfour/
$(ALGO_NAME)_OBJ := arcfour-asm.o
$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) \
nessie_stream_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/arcfour_c.mk b/mkfiles/arcfour_c.mk
index 40a3a84..5bee9f0 100644
--- a/mkfiles/arcfour_c.mk
+++ b/mkfiles/arcfour_c.mk
@@ -4,10 +4,9 @@ ALGO_NAME := ARCFOUR_C
# comment out the following line for removement of ARCFOUR from the build process
STREAM_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := arcfour/
$(ALGO_NAME)_OBJ := arcfour.o
-$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) \
- nessie_stream_test.o nessie_common.o \
- performance_test.o
+$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) nessie_stream_test.o nessie_common.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"
$(ALGO_NAME)_PERFORMANCE_TEST := "performance"
diff --git a/mkfiles/base64.mk b/mkfiles/base64.mk
index 58883c3..013b7ee 100644
--- a/mkfiles/base64.mk
+++ b/mkfiles/base64.mk
@@ -4,7 +4,7 @@ ALGO_NAME := BASE64
# comment out the following line for removement of base64 from the build process
ENCODINGS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := base64/
$(ALGO_NAME)_OBJ := base64_enc.o base64_dec.o
$(ALGO_NAME)_TEST_BIN := main-base64-test.o $(CLI_STD) \
performance_test.o noekeon_asm.o noekeon_prng.o memxor.o
diff --git a/mkfiles/bmw_c.mk b/mkfiles/bmw_c.mk
index 6305932..585bbb2 100644
--- a/mkfiles/bmw_c.mk
+++ b/mkfiles/bmw_c.mk
@@ -4,7 +4,7 @@ ALGO_NAME := BMW_C
# comment out the following line for removement of BlueMidnightWish from the build process
HASHES += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := bmw/
$(ALGO_NAME)_OBJ := bmw_small.o bmw_large.o
$(ALGO_NAME)_TEST_BIN := main-bmw-test.o hfal_bmw_small.o hfal_bmw_large.o $(CLI_STD) $(HFAL_STD)
$(ALGO_NAME)_NESSIE_TEST := test nessie
diff --git a/mkfiles/cast5.mk b/mkfiles/cast5.mk
index 08ead6e..318a0e5 100644
--- a/mkfiles/cast5.mk
+++ b/mkfiles/cast5.mk
@@ -4,6 +4,7 @@ ALGO_NAME := CAST5
# comment out the following line for removement of CAST5 from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := cast5/
$(ALGO_NAME)_OBJ := cast5.o
$(ALGO_NAME)_TEST_BIN := main-cast5-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/cast6.mk b/mkfiles/cast6.mk
index 3f8539f..f28800a 100644
--- a/mkfiles/cast6.mk
+++ b/mkfiles/cast6.mk
@@ -4,7 +4,7 @@ ALGO_NAME := CAST6
# comment out the following line for removement of CAST6 from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := cast6/
$(ALGO_NAME)_OBJ := cast6.o
$(ALGO_NAME)_TEST_BIN := main-cast6-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/des.mk b/mkfiles/des.mk
index 5d9540d..180d9e1 100644
--- a/mkfiles/des.mk
+++ b/mkfiles/des.mk
@@ -4,6 +4,7 @@ ALGO_NAME := DES
# comment out the following line for removement of DES from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := des/
$(ALGO_NAME)_OBJ := des.o
$(ALGO_NAME)_TEST_BIN := main-des-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/entropium.mk b/mkfiles/entropium.mk
index 02ad75b..e87b3de 100644
--- a/mkfiles/entropium.mk
+++ b/mkfiles/entropium.mk
@@ -4,6 +4,7 @@ ALGO_NAME := ENTROPIUM
# comment out the following line for removement of PRNG from the build process
PRNGS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := entropium/
$(ALGO_NAME)_OBJ := entropium.o sha256-asm.o
$(ALGO_NAME)_TEST_BIN := main-entropium-test.o $(CLI_STD) performance_test.o
diff --git a/mkfiles/grain.mk b/mkfiles/grain.mk
index 1b0da65..5e6638e 100644
--- a/mkfiles/grain.mk
+++ b/mkfiles/grain.mk
@@ -4,6 +4,7 @@ ALGO_NAME := GRAIN
# comment out the following line for removement of Grain from the build process
STREAM_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := grain/
$(ALGO_NAME)_OBJ := grain.o
$(ALGO_NAME)_TEST_BIN := main-grain-test.o $(CLI_STD) \
nessie_stream_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/hmac-md5.mk b/mkfiles/hmac-md5.mk
index 7d3f644..fd23627 100644
--- a/mkfiles/hmac-md5.mk
+++ b/mkfiles/hmac-md5.mk
@@ -4,9 +4,10 @@ ALGO_NAME := HMAC-MD5
# comment out the following line for removement of HMAC-MD5 from the build process
MACS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := hmac-md5/
$(ALGO_NAME)_OBJ := hmac-md5.o md5-asm.o
$(ALGO_NAME)_TEST_BIN := main-hmac-md5-test.o $(CLI_STD) \
- nessie_mac_test.o nessie_common.o base64_enc.o base64_dec.o
+ nessie_mac_test.o nessie_common.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"
$(ALGO_NAME)_PERFORMANCE_TEST := "performance"
diff --git a/mkfiles/hmac-sha1.mk b/mkfiles/hmac-sha1.mk
index 9087400..40958a7 100644
--- a/mkfiles/hmac-sha1.mk
+++ b/mkfiles/hmac-sha1.mk
@@ -4,6 +4,7 @@ ALGO_NAME := HMAC-SHA1
# comment out the following line for removement of HMAC-SHA1 from the build process
MACS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := hmac-sha1/
$(ALGO_NAME)_OBJ := hmac-sha1.o sha1-asm.o
$(ALGO_NAME)_TEST_BIN := main-hmac-sha1-test.o $(CLI_STD) \
nessie_mac_test.o nessie_common.o
diff --git a/mkfiles/hmac-sha256.mk b/mkfiles/hmac-sha256.mk
index ba221ab..4b25ea9 100644
--- a/mkfiles/hmac-sha256.mk
+++ b/mkfiles/hmac-sha256.mk
@@ -4,6 +4,7 @@ ALGO_NAME := HMAC-SHA256
# comment out the following line for removement of HMAC-SHA256 from the build process
MACS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := hmac-sha256/
$(ALGO_NAME)_OBJ := hmac-sha256.o sha256-asm.o
$(ALGO_NAME)_TEST_BIN := main-hmac-sha256-test.o $(CLI_STD) \
nessie_mac_test.o nessie_common.o
diff --git a/mkfiles/md5.mk b/mkfiles/md5.mk
index 025e9eb..bebdaa3 100644
--- a/mkfiles/md5.mk
+++ b/mkfiles/md5.mk
@@ -4,6 +4,7 @@ ALGO_NAME := MD5
# comment out the following line for removement of MD5 from the build process
HASHES += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := md5/
$(ALGO_NAME)_OBJ := md5-asm.o
$(ALGO_NAME)_TEST_BIN := main-md5-test.o hfal_md5.o $(CLI_STD) $(HFAL_STD)
$(ALGO_NAME)_NESSIE_TEST := "nessie"
diff --git a/mkfiles/md5_c.mk b/mkfiles/md5_c.mk
index 7b6bb1a..d7421e7 100644
--- a/mkfiles/md5_c.mk
+++ b/mkfiles/md5_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := MD5_C
# comment out the following line for removement of MD5 from the build process
HASHES += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := md5/
$(ALGO_NAME)_OBJ := md5.o
$(ALGO_NAME)_TEST_BIN := main-md5-test.o hfal_md5.o $(CLI_STD) $(HFAL_STD)
$(ALGO_NAME)_NESSIE_TEST := "nessie"
diff --git a/mkfiles/mickey128.mk b/mkfiles/mickey128.mk
index 6c95c96..c7bc17d 100644
--- a/mkfiles/mickey128.mk
+++ b/mkfiles/mickey128.mk
@@ -4,6 +4,7 @@ ALGO_NAME := MICKEY128
# comment out the following line for removement of Mickey128 from the build process
STREAM_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := mickey128/
$(ALGO_NAME)_OBJ := mickey128.o
$(ALGO_NAME)_TEST_BIN := main-mickey128-test.o $(CLI_STD) \
nessie_stream_test.o nessie_common.o
diff --git a/mkfiles/present.mk b/mkfiles/present.mk
index 3a4012c..3c73f82 100644
--- a/mkfiles/present.mk
+++ b/mkfiles/present.mk
@@ -4,7 +4,7 @@ ALGO_NAME := PRESENT
# comment out the following line for removement of present from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := present/
$(ALGO_NAME)_OBJ := present.o
$(ALGO_NAME)_TEST_BIN := main-present-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/rc5.mk b/mkfiles/rc5.mk
index 3a5f128..be82175 100644
--- a/mkfiles/rc5.mk
+++ b/mkfiles/rc5.mk
@@ -4,7 +4,7 @@ ALGO_NAME := RC5
# comment out the following line for removement of RC5 from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := rc5/
$(ALGO_NAME)_OBJ := rc5.o
$(ALGO_NAME)_TEST_BIN := main-rc5-test.o $(CLI_STD) nessie_bc_test.o \
nessie_common.o performance_test.o
diff --git a/mkfiles/rc6.mk b/mkfiles/rc6.mk
index e0bc603..a58b138 100644
--- a/mkfiles/rc6.mk
+++ b/mkfiles/rc6.mk
@@ -4,7 +4,7 @@ ALGO_NAME := RC6
# comment out the following line for removement of RC6 from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := rc6/
$(ALGO_NAME)_OBJ := rc6.o
$(ALGO_NAME)_TEST_BIN := main-rc6-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/seed.mk b/mkfiles/seed.mk
index 1c2b605..07ceb3e 100644
--- a/mkfiles/seed.mk
+++ b/mkfiles/seed.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SEED
# comment out the following line for removement of SEED from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := seed/
$(ALGO_NAME)_OBJ := seed-asm.o
$(ALGO_NAME)_TEST_BIN := main-seed-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/seed_C.mk b/mkfiles/seed_C.mk
index 14aa8ac..7c77666 100644
--- a/mkfiles/seed_C.mk
+++ b/mkfiles/seed_C.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SEED_C
# comment out the following line for removement of SEED from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := seed/
$(ALGO_NAME)_OBJ := seed_C.o
$(ALGO_NAME)_TEST_BIN := main-seed-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent-bitslice.mk b/mkfiles/serpent-bitslice.mk
index 6a9b76a..5719143 100644
--- a/mkfiles/serpent-bitslice.mk
+++ b/mkfiles/serpent-bitslice.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_BITSLICE
# comment out the following line for removement of serpent from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := serpent/
$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-bitslice-asm.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_asm_bitslice.mk b/mkfiles/serpent_asm_bitslice.mk
index afd0868..a5956c3 100644
--- a/mkfiles/serpent_asm_bitslice.mk
+++ b/mkfiles/serpent_asm_bitslice.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_BITSLICE
# comment out the following line for removement of serpent from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := serpent/
$(ALGO_NAME)_OBJ := serpent-sboxes-bitslice-asm.o serpent-asm.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_asm_fast.mk b/mkfiles/serpent_asm_fast.mk
index 3e3a4fb..d9ff725 100644
--- a/mkfiles/serpent_asm_fast.mk
+++ b/mkfiles/serpent_asm_fast.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_FAST
# comment out the following line for removement of serpent from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := serpent/
$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-fast.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_asm_small.mk b/mkfiles/serpent_asm_small.mk
index 6dde94b..4d6750e 100644
--- a/mkfiles/serpent_asm_small.mk
+++ b/mkfiles/serpent_asm_small.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_SMALL
# comment out the following line for removement of serpent from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := serpent/
$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-small.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_c.mk b/mkfiles/serpent_c.mk
index dd3a69b..f52ced4 100644
--- a/mkfiles/serpent_c.mk
+++ b/mkfiles/serpent_c.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_C
# comment out the following line for removement of serpent from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := serpent/
$(ALGO_NAME)_OBJ := serpent.o serpent-sboxes_c.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/sha1.mk b/mkfiles/sha1.mk
index 45df051..c986aeb 100644
--- a/mkfiles/sha1.mk
+++ b/mkfiles/sha1.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA1
# comment out the following line for removement of SHA1 from the build process
HASHES += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := sha1/
$(ALGO_NAME)_OBJ := sha1-asm.o
$(ALGO_NAME)_TEST_BIN := main-sha1-test.o hfal_sha1.o $(CLI_STD) $(HFAL_STD) dump-decl.o dump-asm.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"
diff --git a/mkfiles/sha1_c.mk b/mkfiles/sha1_c.mk
index 4b0b7ae..6998cbc 100644
--- a/mkfiles/sha1_c.mk
+++ b/mkfiles/sha1_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA1_C
# comment out the following line for removement of SHA1 from the build process
HASHES += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := sha1/
$(ALGO_NAME)_OBJ := sha1.o
$(ALGO_NAME)_TEST_BIN := main-sha1-test.o hfal_sha1.o dump-asm.o dump-decl.o $(CLI_STD) $(HFAL_STD)
$(ALGO_NAME)_NESSIE_TEST := "nessie"
diff --git a/mkfiles/sha256.mk b/mkfiles/sha256.mk
index 93ebdc7..9e56ed6 100644
--- a/mkfiles/sha256.mk
+++ b/mkfiles/sha256.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA256
# comment out the following line for removement of SHA256 from the build process
HASHES += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := sha256/
$(ALGO_NAME)_OBJ := sha256-asm.o
$(ALGO_NAME)_TEST_BIN := main-sha256-test.o dump-asm.o dump-decl.o hfal_sha256.o $(CLI_STD) $(HFAL_STD)
diff --git a/mkfiles/sha256_c.mk b/mkfiles/sha256_c.mk
index d52fe88..6c58677 100644
--- a/mkfiles/sha256_c.mk
+++ b/mkfiles/sha256_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA256_C
# comment out the following line for removement of SHA256 from the build process
HASHES += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := sha256/
$(ALGO_NAME)_OBJ := sha256.o
$(ALGO_NAME)_TEST_BIN := main-sha256-test.o $(CLI_STD) $(HFAL_STD) hfal_sha256.o dump-asm.o dump-decl.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"
diff --git a/mkfiles/shabea.mk b/mkfiles/shabea.mk
index e5f4a68..9c05cb3 100644
--- a/mkfiles/shabea.mk
+++ b/mkfiles/shabea.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHABEA
# comment out the following line for removement of SHABEA from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := shabea/
$(ALGO_NAME)_OBJ := shabea.o sha256-asm.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-shabea-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/shacal1enc.mk b/mkfiles/shacal1enc.mk
index 63be6c8..3003638 100644
--- a/mkfiles/shacal1enc.mk
+++ b/mkfiles/shacal1enc.mk
@@ -1,10 +1,10 @@
-# Makefile for present
+# Makefile for shacal1
ALGO_NAME := SHACAL1ENC
-# comment out the following line for removement of present from the build process
+# comment out the following line for removement of shacal1 from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := shacal1/
$(ALGO_NAME)_OBJ := shacal1_enc.o sha1-asm.o
$(ALGO_NAME)_TEST_BIN := main-shacal1_enc-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/shacal2enc.mk b/mkfiles/shacal2enc.mk
index e8a91a2..d5f0d9d 100644
--- a/mkfiles/shacal2enc.mk
+++ b/mkfiles/shacal2enc.mk
@@ -1,10 +1,10 @@
-# Makefile for present
+# Makefile for shacal2
ALGO_NAME := SHACAL2ENC
-# comment out the following line for removement of present from the build process
+# comment out the following line for removement of shacal2 from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
-
+$(ALGO_NAME)_DIR := shacal2/
$(ALGO_NAME)_OBJ := shacal2_enc.o sha256-asm.o
$(ALGO_NAME)_TEST_BIN := main-shacal2_enc-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/skipjack.mk b/mkfiles/skipjack.mk
index 56579b2..ef859df 100644
--- a/mkfiles/skipjack.mk
+++ b/mkfiles/skipjack.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SKIPJACK
# comment out the following line for removement of skipjack from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := skipjack/
$(ALGO_NAME)_OBJ := skipjack.o
$(ALGO_NAME)_TEST_BIN := main-skipjack-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/tdes.mk b/mkfiles/tdes.mk
index 7bdc41e..09bb96c 100644
--- a/mkfiles/tdes.mk
+++ b/mkfiles/tdes.mk
@@ -1,9 +1,10 @@
-# Makefile for DES
+# Makefile for Triple-DES
ALGO_NAME := TDES
-# comment out the following line for removement of DES from the build process
+# comment out the following line for removement of Triple-DES from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := des/
$(ALGO_NAME)_OBJ := des.o
$(ALGO_NAME)_TEST_BIN := main-tdes-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/trivium.mk b/mkfiles/trivium.mk
index bbfba3b..a668ee7 100644
--- a/mkfiles/trivium.mk
+++ b/mkfiles/trivium.mk
@@ -4,6 +4,7 @@ ALGO_NAME := TRIVIUM
# comment out the following line for removement of Trivium from the build process
STREAM_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := trivium/
$(ALGO_NAME)_OBJ := trivium.o
$(ALGO_NAME)_TEST_BIN := main-trivium-test.o $(CLI_STD) \
nessie_stream_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/xtea.mk b/mkfiles/xtea.mk
index 68adcfb..f2d1169 100644
--- a/mkfiles/xtea.mk
+++ b/mkfiles/xtea.mk
@@ -4,6 +4,7 @@ ALGO_NAME := XTEA
# comment out the following line for removement of XTEA from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := xtea/
$(ALGO_NAME)_OBJ := xtea-asm.o
$(ALGO_NAME)_TEST_BIN := main-xtea-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/xtea_c.mk b/mkfiles/xtea_c.mk
index dec8f8c..5bbd680 100644
--- a/mkfiles/xtea_c.mk
+++ b/mkfiles/xtea_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := XTEA_C
# comment out the following line for removement of XTEA from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
+$(ALGO_NAME)_DIR := xtea/
$(ALGO_NAME)_OBJ := xtea.o
$(ALGO_NAME)_TEST_BIN := main-xtea-test.o $(CLI_STD) \
nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/present.c b/present/present.c
similarity index 100%
rename from present.c
rename to present/present.c
diff --git a/present.h b/present/present.h
similarity index 100%
rename from present.h
rename to present/present.h
diff --git a/rc5.c b/rc5/rc5.c
similarity index 100%
rename from rc5.c
rename to rc5/rc5.c
diff --git a/rc5.h b/rc5/rc5.h
similarity index 100%
rename from rc5.h
rename to rc5/rc5.h
diff --git a/rc6.c b/rc6/rc6.c
similarity index 100%
rename from rc6.c
rename to rc6/rc6.c
diff --git a/rc6.h b/rc6/rc6.h
similarity index 100%
rename from rc6.h
rename to rc6/rc6.h
diff --git a/seed-asm.S b/seed/seed-asm.S
similarity index 100%
rename from seed-asm.S
rename to seed/seed-asm.S
diff --git a/seed.h b/seed/seed.h
similarity index 100%
rename from seed.h
rename to seed/seed.h
diff --git a/seed_C.c b/seed/seed_C.c
similarity index 100%
rename from seed_C.c
rename to seed/seed_C.c
diff --git a/seed_sbox.h b/seed/seed_sbox.h
similarity index 100%
rename from seed_sbox.h
rename to seed/seed_sbox.h
diff --git a/serpent/memxor.S b/serpent/memxor.S
new file mode 100644
index 0000000..a32058b
--- /dev/null
+++ b/serpent/memxor.S
@@ -0,0 +1,66 @@
+/* memxor.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+/*
+ * File: memxor.S
+ * Author: Daniel Otte
+ * Date: 2008-08-07
+ * License: GPLv3 or later
+ * Description: memxor, XORing one block into another
+ *
+ */
+
+/*
+ * void memxor(void* dest, const void* src, uint16_t n);
+ */
+ /*
+ * param dest is passed in r24:r25
+ * param src is passed in r22:r23
+ * param n is passed in r20:r21
+ */
+.global memxor
+memxor:
+ movw r30, r24
+ movw r26, r22
+ movw r24, r20
+ adiw r24, 0
+ breq 2f
+1:
+ ld r20, X+
+ ld r21, Z
+ eor r20, r21
+ st Z+, r20
+ sbiw r24, 1
+ brne 1b
+2:
+ ret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/serpent/memxor.h b/serpent/memxor.h
new file mode 100644
index 0000000..a62a616
--- /dev/null
+++ b/serpent/memxor.h
@@ -0,0 +1,7 @@
+#ifndef MEMXOR_H_
+#define MEMXOR_H_
+#include
+
+void memxor(void* dest, const void* src, uint16_t n);
+
+#endif
diff --git a/serpent-asm.S b/serpent/serpent-asm.S
similarity index 100%
rename from serpent-asm.S
rename to serpent/serpent-asm.S
diff --git a/serpent-sboxes-bitslice-asm.S b/serpent/serpent-sboxes-bitslice-asm.S
similarity index 100%
rename from serpent-sboxes-bitslice-asm.S
rename to serpent/serpent-sboxes-bitslice-asm.S
diff --git a/serpent-sboxes-bitslice.c b/serpent/serpent-sboxes-bitslice.c
similarity index 100%
rename from serpent-sboxes-bitslice.c
rename to serpent/serpent-sboxes-bitslice.c
diff --git a/serpent-sboxes-fast.S b/serpent/serpent-sboxes-fast.S
similarity index 100%
rename from serpent-sboxes-fast.S
rename to serpent/serpent-sboxes-fast.S
diff --git a/serpent-sboxes-small.S b/serpent/serpent-sboxes-small.S
similarity index 100%
rename from serpent-sboxes-small.S
rename to serpent/serpent-sboxes-small.S
diff --git a/serpent-sboxes.h b/serpent/serpent-sboxes.h
similarity index 100%
rename from serpent-sboxes.h
rename to serpent/serpent-sboxes.h
diff --git a/serpent-sboxes_c.c b/serpent/serpent-sboxes_c.c
similarity index 100%
rename from serpent-sboxes_c.c
rename to serpent/serpent-sboxes_c.c
diff --git a/serpent.c b/serpent/serpent.c
similarity index 100%
rename from serpent.c
rename to serpent/serpent.c
diff --git a/serpent.h b/serpent/serpent.h
similarity index 100%
rename from serpent.h
rename to serpent/serpent.h
diff --git a/sha1/sha1-asm.S b/sha1/sha1-asm.S
new file mode 100644
index 0000000..f571685
--- /dev/null
+++ b/sha1/sha1-asm.S
@@ -0,0 +1,886 @@
+/* sha1-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; SHA1 implementation in assembler for AVR
+SHA1_BLOCK_BITS = 512
+SHA1_HASH_BITS = 160
+
+.macro precall
+ /* push r18 - r27, r30 - r31*/
+ push r0
+ push r1
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+ push r26
+ push r27
+ push r30
+ push r31
+ clr r1
+.endm
+
+.macro postcall
+ pop r31
+ pop r30
+ pop r27
+ pop r26
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r1
+ pop r0
+.endm
+
+
+.macro hexdump length
+ push r27
+ push r26
+ ldi r25, '\r'
+ mov r24, r25
+ call uart_putc
+ ldi r25, '\n'
+ mov r24, r25
+ call uart_putc
+ pop r26
+ pop r27
+ movw r24, r26
+.if \length > 16
+ ldi r22, lo8(16)
+ ldi r23, hi8(16)
+ push r27
+ push r26
+ call uart_hexdump
+ pop r26
+ pop r27
+ adiw r26, 16
+ hexdump \length-16
+.else
+ ldi r22, lo8(\length)
+ ldi r23, hi8(\length)
+ call uart_hexdump
+.endif
+.endm
+
+.macro delay
+/*
+ push r0
+ push r1
+ clr r0
+1: clr r1
+2: dec r1
+ brne 2b
+ dec r0
+ brne 1b
+ pop r1
+ pop r0 // */
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+/*
+ precall
+ hexdump \length
+ postcall
+ // */
+.endm
+
+
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha1_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha1_ctx2hash
+; === sha1_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+; param1: the 16-bit destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to sha1_ctx structure
+; given in r23,r22
+sha1_ctx2hash:
+ movw r26, r22
+ movw r30, r24
+ ldi r21, 5
+ sbiw r26, 4
+1:
+ ldi r20, 4
+ adiw r26, 8
+2:
+ ld r0, -X
+ st Z+, r0
+ dec r20
+ brne 2b
+
+ dec r21
+ brne 1b
+
+ ret
+
+;###########################################################
+
+.global sha1
+; === sha1 ===
+; this function calculates SHA-1 hashes from messages in RAM
+; param1: the 16-bit hash destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to message
+; given in r23,r22
+; param3: 32-bit length value (length of message in bits)
+; given in r21,r20,r19,r18
+sha1:
+sha1_prolog:
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r16
+ push r17
+ in r16, SPL
+ in r17, SPH
+ subi r16, 5*4+8
+ sbci r17, 0
+ in r0, SREG
+ cli
+ out SPL, r16
+ out SPH, r17
+ out SREG, r0
+
+ push r25
+ push r24
+ inc r16
+ adc r17, r1
+
+ movw r8, r18 /* backup of length*/
+ movw r10, r20
+
+ movw r12, r22 /* backup pf msg-ptr */
+
+ movw r24, r16
+ rcall sha1_init
+ /* if length >= 512 */
+1:
+ tst r11
+ brne 4f
+ tst r10
+ brne 4f
+ mov r19, r9
+ cpi r19, 0x02
+ brlo 4f
+
+ movw r24, r16
+ movw r22, r12
+ rcall sha1_nextBlock
+ ldi r19, 0x64
+ add r22, r19
+ adc r23, r1
+ /* length -= 512 */
+ ldi r19, 0x02
+ sub r9, r19
+ sbc r10, r1
+ sbc r11, r1
+ rjmp 1b
+
+4:
+ movw r24, r16
+ movw r22, r12
+ movw r20, r8
+ rcall sha1_lastBlock
+
+ pop r24
+ pop r25
+ movw r22, r16
+ rcall sha1_ctx2hash
+
+sha1_epilog:
+ in r30, SPL
+ in r31, SPH
+ adiw r30, 5*4+8
+ in r0, SREG
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG, r0
+ pop r17
+ pop r16
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha1_lastBlock
+; === sha1_lastBlock ===
+; this function does padding & Co. for calculating SHA-1 hashes
+; param1: the 16-bit pointer to sha1_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+; param3: an 16-bit integer specifing length of block in bits
+; given in r21,r20
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
+
+
+sha1_lastBlock:
+ cpi r21, 0x02
+ brlo sha1_lastBlock_prolog
+ push r25
+ push r24
+ push r23
+ push r22
+ push r21
+ push r20
+ rcall sha1_nextBlock
+ pop r20
+ pop r21
+ pop r22
+ pop r23
+ pop r24
+ pop r25
+ subi r21, 2
+ subi r23, -2
+ rjmp sha1_lastBlock
+sha1_lastBlock_prolog:
+ /* allocate space on stack */
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ subi r30, lo8(64)
+ sbci r31, hi8(64) /* ??? */
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+
+ adiw r30, 1 /* SP points to next free byte on stack */
+ mov r18, r20 /* r20 = LSB(length) */
+ lsr r18
+ lsr r18
+ lsr r18
+ bst r21, 0 /* may be we should explain this ... */
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
+
+
+ movw r26, r22 /* X points to begin of msg */
+ tst r18
+ breq sha1_lastBlock_post_copy
+ mov r1, r18
+sha1_lastBlock_copy_loop:
+ ld r0, X+
+ st Z+, r0
+ dec r1
+ brne sha1_lastBlock_copy_loop
+sha1_lastBlock_post_copy:
+sha1_lastBlock_insert_stuffing_bit:
+ ldi r19, 0x80
+ mov r0,r19
+ ldi r19, 0x07
+ and r19, r20 /* if we are in bitmode */
+ breq 2f /* no bitmode */
+1:
+ lsr r0
+ dec r19
+ brne 1b
+ ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+ or r0, r19
+2:
+ st Z+, r0
+ inc r18
+
+/* checking stuff here */
+ cpi r18, 64-8+1
+ brsh 0f
+ rjmp sha1_lastBlock_insert_zeros
+0:
+ /* oh shit, we landed here */
+ /* first we have to fill it up with zeros */
+ ldi r19, 64
+ sub r19, r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r19
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r22, r30
+
+ push r31
+ push r30
+ push r25
+ push r24
+ push r21
+ push r20
+ rcall sha1_nextBlock
+ pop r20
+ pop r21
+ pop r24
+ pop r25
+ pop r30
+ pop r31
+
+ /* now we should subtract 512 from length */
+ movw r26, r24
+ adiw r26, 4*5+1 /* we can skip the lowest byte */
+ ld r19, X
+ subi r19, hi8(512)
+ st X+, r19
+ ldi r18, 6
+1:
+ ld r19, X
+ sbci r19, 0
+ st X+, r19
+ dec r18
+ brne 1b
+
+; clr r18 /* not neccessary ;-) */
+ /* reset Z pointer to begin of block */
+
+sha1_lastBlock_insert_zeros:
+ ldi r19, 64-8
+ sub r19, r18
+ breq sha1_lastBlock_insert_length
+ clr r1
+1:
+ st Z+, r1 /* r1 is still zero */
+ dec r19
+ brne 1b
+
+; rjmp sha1_lastBlock_epilog
+sha1_lastBlock_insert_length:
+ movw r26, r24 /* X points to state */
+ adiw r26, 5*4 /* X points to (state.length) */
+ adiw r30, 8 /* Z points one after the last byte of block */
+ ld r0, X+
+ add r0, r20
+ st -Z, r0
+ ld r0, X+
+ adc r0, r21
+ st -Z, r0
+ ldi r19, 6
+1:
+ ld r0, X+
+ adc r0, r1
+ st -Z, r0
+ dec r19
+ brne 1b
+
+ sbiw r30, 64-8
+ movw r22, r30
+ rcall sha1_nextBlock
+
+sha1_lastBlock_epilog:
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ adiw r30, 63 ; lo8(64)
+ adiw r30, 1 ; hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+ clr r1
+ clr r0
+ ret
+
+/**/
+;###########################################################
+
+.global sha1_nextBlock
+; === sha1_nextBlock ===
+; this is the core function for calculating SHA-1 hashes
+; param1: the 16-bit pointer to sha1_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
+
+xtmp = 0
+xNULL = 1
+W1 = 10
+W2 = 11
+T1 = 12
+T2 = 13
+T3 = 14
+T4 = 15
+LoopC = 16
+S = 17
+tmp1 = 18
+tmp2 = 19
+tmp3 = 20
+tmp4 = 21
+F1 = 22
+F2 = 23
+F3 = 24
+F4 = 25
+
+/* byteorder: high number <--> high significance */
+sha1_nextBlock:
+ ; initial, let's make some space ready for local vars
+ /* replace push & pop by mem ops? */
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r20, SPL
+ in r21, SPH
+ movw r18, r20 ;backup SP
+; movw r26, r20 ; X points to free space on stack /* maybe removeable? */
+ movw r30, r22 ; Z points to message
+ subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
+ sbci r21, hi8(sha1_nextBlock_localSpace)
+ movw r26, r20 ; X points to free space on stack
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ push r18
+ push r19 /* push old SP on new stack */
+ push r24
+ push r25 /* param1 will be needed later */
+
+ /* load a[] with state */
+ movw 28, r24 /* load pointer to state in Y */
+ adiw r26, 1 ; X++
+
+ ldi LoopC, 5*4
+1: ld tmp1, Y+
+ st X+, tmp1
+ dec LoopC
+ brne 1b
+
+ movw W1, r26 /* save pointer to w[0] */
+ /* load w[] with endian fixed message */
+ /* we might also use the changeendian32() function at bottom */
+ movw r30, r22 /* mv param2 (ponter to msg) to Z */
+ ldi LoopC, 16
+1:
+ ldd tmp1, Z+3
+ st X+, tmp1
+ ldd tmp1, Z+2
+ st X+, tmp1
+ ldd tmp1, Z+1
+ st X+, tmp1
+ ld tmp1, Z
+ st X+, tmp1
+ adiw r30, 4
+ dec LoopC
+ brne 1b
+
+ ;clr LoopC /* LoopC is named t in FIPS 180-2 */
+ clr xtmp
+sha1_nextBlock_mainloop:
+ mov S, LoopC
+ lsl S
+ lsl S
+ andi S, 0x3C /* S is a bytepointer so *4 */
+ /* load w[s] */
+ movw r26, W1
+ add r26, S /* X points at w[s] */
+ adc r27, xNULL
+ ld T1, X+
+ ld T2, X+
+ ld T3, X+
+ ld T4, X+
+
+ /**/
+ push r26
+ push r27
+ push T4
+ push T3
+ push T2
+ push T1
+ in r26, SPL
+ in r27, SPH
+ adiw r26, 1
+ dbg_hexdump 4
+ pop T1
+ pop T2
+ pop T3
+ pop T4
+ pop r27
+ pop r26
+ /**/
+
+ cpi LoopC, 16
+ brlt sha1_nextBlock_mainloop_core
+ /* update w[s] */
+ ldi tmp1, 2*4
+ rcall 1f
+ ldi tmp1, 8*4
+ rcall 1f
+ ldi tmp1, 13*4
+ rcall 1f
+ rjmp 2f
+1: /* this might be "outsourced" to save the jump above */
+ add tmp1, S
+ andi tmp1, 0x3f
+ movw r26, W1
+ add r26, tmp1
+ adc r27, xNULL
+ ld tmp2, X+
+ eor T1, tmp2
+ ld tmp2, X+
+ eor T2, tmp2
+ ld tmp2, X+
+ eor T3, tmp2
+ ld tmp2, X+
+ eor T4, tmp2
+ ret
+2: /* now we just hav to do a ROTL(T) and save T back */
+ mov tmp2, T4
+ rol tmp2
+ rol T1
+ rol T2
+ rol T3
+ rol T4
+ movw r26, W1
+ add r26, S
+ adc r27, xNULL
+ st X+, T1
+ st X+, T2
+ st X+, T3
+ st X+, T4
+
+sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/
+ /* T already contains w[s] */
+ movw r26, W1
+ sbiw r26, 4*1 /* X points at a[4] aka e */
+ ld tmp1, X+
+ add T1, tmp1
+ ld tmp1, X+
+ adc T2, tmp1
+ ld tmp1, X+
+ adc T3, tmp1
+ ld tmp1, X+
+ adc T4, tmp1 /* T = w[s]+e */
+ sbiw r26, 4*5 /* X points at a[0] aka a */
+ ld F1, X+
+ ld F2, X+
+ ld F3, X+
+ ld F4, X+
+ mov tmp1, F4 /* X points at a[1] aka b */
+ ldi tmp2, 5
+1:
+ rol tmp1
+ rol F1
+ rol F2
+ rol F3
+ rol F4
+ dec tmp2
+ brne 1b
+
+ add T1, F1
+ adc T2, F2
+ adc T3, F3
+ adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
+
+ /* now we have to do this fucking conditional stuff */
+ ldi r30, lo8(sha1_nextBlock_xTable)
+ ldi r31, hi8(sha1_nextBlock_xTable)
+ add r30, xtmp
+ adc r31, xNULL
+ lpm tmp1, Z
+ cp tmp1, LoopC
+ brne 1f
+ inc xtmp
+1: ldi r30, lo8(sha1_nextBlock_KTable)
+ ldi r31, hi8(sha1_nextBlock_KTable)
+ lsl xtmp
+ lsl xtmp
+ add r30, xtmp
+ adc r31, xNULL
+ lsr xtmp
+ lsr xtmp
+
+ lpm tmp1, Z+
+ add T1, tmp1
+ lpm tmp1, Z+
+ adc T2, tmp1
+ lpm tmp1, Z+
+ adc T3, tmp1
+ lpm tmp1, Z+
+ adc T4, tmp1
+ /* T = ROTL(a,5) + e + kt + w[s] */
+
+ /* Z-4 is just pointing to kt ... */
+ movw r28, r26 /* copy X in Y */
+ adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
+ lsr r31
+ ror r30
+
+ icall
+ mov F1, tmp1
+ icall
+ mov F2, tmp1
+ icall
+ mov F3, tmp1
+ icall
+
+ add T1, F1
+ adc T2, F2
+ adc T3, F3
+ adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
+ /* X points still at a[1] aka b, Y points at a[2] aka c */
+ /* update a[] */
+sha1_nextBlock_update_a:
+ /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
+ //adiw r28, 3*4 /* Y should point at a[4] aka e */
+ movw r28, W1
+ sbiw r28, 4
+
+ ldi tmp2, 4*4
+1:
+ ld tmp1, -Y
+ std Y+4, tmp1
+ dec tmp2
+ brne 1b
+ /* Y points at a[0] aka a*/
+
+ movw r28, W1
+ sbiw r28, 5*4
+ /* store T in a[0] aka a */
+ st Y+, T1
+ st Y+, T2
+ st Y+, T3
+ st Y+, T4
+ /* Y points at a[1] aka b*/
+
+ /* rotate c */
+ ldd T1, Y+1*4
+ ldd T2, Y+1*4+1
+ ldd T3, Y+1*4+2
+ ldd T4, Y+1*4+3
+ mov tmp1, T1
+ ldi tmp2, 2
+1: ror tmp1
+ ror T4
+ ror T3
+ ror T2
+ ror T1
+ dec tmp2
+ brne 1b
+ std Y+1*4+0, T1
+ std Y+1*4+1, T2
+ std Y+1*4+2, T3
+ std Y+1*4+3, T4
+
+ push r27
+ push r26
+ movw r26, W1
+ sbiw r26, 4*5
+ dbg_hexdump 4*5
+ pop r26
+ pop r27
+
+ inc LoopC
+ cpi LoopC, 80
+ brge 1f
+ rjmp sha1_nextBlock_mainloop
+/**************************************/
+1:
+ /* littel patch */
+ sbiw r28, 4
+
+/* add a[] to state and inc length */
+ pop r27
+ pop r26 /* now X points to state (and Y still at a[0]) */
+ ldi tmp4, 5
+1: clc
+ ldi tmp3, 4
+2: ld tmp1, X
+ ld tmp2, Y+
+ adc tmp1, tmp2
+ st X+, tmp1
+ dec tmp3
+ brne 2b
+ dec tmp4
+ brne 1b
+
+ /* now length += 512 */
+ adiw r26, 1 /* we skip the least significant byte */
+ ld tmp1, X
+ ldi tmp2, hi8(512) /* 2 */
+ add tmp1, tmp2
+ st X+, tmp1
+ ldi tmp2, 6
+1:
+ ld tmp1, X
+ adc tmp1, xNULL
+ st X+, tmp1
+ dec tmp2
+ brne 1b
+
+; EPILOG
+sha1_nextBlock_epilog:
+/* now we should clean up the stack */
+ pop r21
+ pop r20
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ clr r1
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ ret
+
+sha1_nextBlock_xTable:
+.byte 20,40,60,0
+sha1_nextBlock_KTable:
+.int 0x5a827999
+.int 0x6ed9eba1
+.int 0x8f1bbcdc
+.int 0xca62c1d6
+sha1_nextBlock_JumpTable:
+rjmp sha1_nextBlock_Ch
+ nop
+rjmp sha1_nextBlock_Parity
+ nop
+rjmp sha1_nextBlock_Maj
+ nop
+rjmp sha1_nextBlock_Parity
+
+ /* X and Y still point at a[1] aka b ; return value in tmp1 */
+sha1_nextBlock_Ch:
+ ld tmp1, Y+
+ mov tmp2, tmp1
+ com tmp2
+ ldd tmp3, Y+3 /* load from c */
+ and tmp1, tmp3
+ ldd tmp3, Y+7 /* load from d */
+ and tmp2, tmp3
+ eor tmp1, tmp2
+ ret
+
+sha1_nextBlock_Maj:
+ ld tmp1, Y+
+ mov tmp2, tmp1
+ ldd tmp3, Y+3 /* load from c */
+ and tmp1, tmp3
+ ldd tmp4, Y+7 /* load from d */
+ and tmp2, tmp4
+ eor tmp1, tmp2
+ and tmp3, tmp4
+ eor tmp1, tmp3
+ ret
+
+sha1_nextBlock_Parity:
+ ld tmp1, Y+
+ ldd tmp2, Y+3 /* load from c */
+ eor tmp1, tmp2
+ ldd tmp2, Y+7 /* load from d */
+ eor tmp1, tmp2
+ ret
+/*
+ch_str: .asciz "\r\nCh"
+maj_str: .asciz "\r\nMaj"
+parity_str: .asciz "\r\nParity"
+*/
+;###########################################################
+
+.global sha1_init
+;void sha1_init(sha1_ctx_t *state){
+; DEBUG_S("\r\nSHA1_INIT");
+; state->h[0] = 0x67452301;
+; state->h[1] = 0xefcdab89;
+; state->h[2] = 0x98badcfe;
+; state->h[3] = 0x10325476;
+; state->h[4] = 0xc3d2e1f0;
+; state->length = 0;
+;}
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha1_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8((sha1_init_vector))
+ ldi r31, hi8((sha1_init_vector))
+ ldi r22, 5*4 /* bytes to copy */
+sha1_init_vloop:
+ lpm r23, Z+
+ st X+, r23
+ dec r22
+ brne sha1_init_vloop
+ ldi r22, 8
+sha1_init_lloop:
+ st X+, r1
+ dec r22
+ brne sha1_init_lloop
+ ret
+
+sha1_init_vector:
+.int 0x67452301;
+.int 0xefcdab89;
+.int 0x98badcfe;
+.int 0x10325476;
+.int 0xc3d2e1f0;
+
diff --git a/sha1.c b/sha1/sha1.c
similarity index 100%
rename from sha1.c
rename to sha1/sha1.c
diff --git a/sha1/sha1.h b/sha1/sha1.h
new file mode 100644
index 0000000..6675d20
--- /dev/null
+++ b/sha1/sha1.h
@@ -0,0 +1,117 @@
+/* sha1.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/**
+ * \file sha1.h
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2006-10-08
+ * \license GPLv3 or later
+ * \brief SHA-1 declaration.
+ * \ingroup SHA-1
+ *
+ */
+
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include
+/** \def SHA1_HASH_BITS
+ * definees the size of a SHA-1 hash in bits
+ */
+
+/** \def SHA1_HASH_BYTES
+ * definees the size of a SHA-1 hash in bytes
+ */
+
+/** \def SHA1_BLOCK_BITS
+ * definees the size of a SHA-1 input block in bits
+ */
+
+/** \def SHA1_BLOCK_BYTES
+ * definees the size of a SHA-1 input block in bytes
+ */
+#define SHA1_HASH_BITS 160
+#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
+#define SHA1_BLOCK_BITS 512
+#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
+
+/** \typedef sha1_ctx_t
+ * \brief SHA-1 context type
+ *
+ * A vatiable of this type may hold the state of a SHA-1 hashing process
+ */
+typedef struct {
+ uint32_t h[5];
+ uint64_t length;
+} sha1_ctx_t;
+
+/** \typedef sha1_hash_t
+ * \brief hash value type
+ * A variable of this type may hold a SHA-1 hash value
+ */
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+/** \fn sha1_init(sha1_ctx_t *state)
+ * \brief initializes a SHA-1 context
+ * This function sets a ::sha1_ctx_t variable to the initialization vector
+ * for SHA-1 hashing.
+ * \param state pointer to the SHA-1 context variable
+ */
+void sha1_init(sha1_ctx_t *state);
+
+/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
+ * \brief process one input block
+ * This function processes one input block and updates the hash context
+ * accordingly
+ * \param state pointer to the state variable to update
+ * \param block pointer to the message block to process
+ */
+void sha1_nextBlock (sha1_ctx_t *state, const void* block);
+
+/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
+ * \brief processes the given block and finalizes the context
+ * This function processes the last block in a SHA-1 hashing process.
+ * The block should have a maximum length of a single input block.
+ * \param state pointer to the state variable to update and finalize
+ * \param block pointer to themessage block to process
+ * \param length_b length of the message block in bits
+ */
+void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
+
+/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
+ * \brief convert a state variable into an actual hash value
+ * Writes the hash value corresponding to the state to the memory pointed by dest.
+ * \param dest pointer to the hash value destination
+ * \param state pointer to the hash context
+ */
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+
+/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
+ * \brief hashing a message which in located entirely in RAM
+ * This function automatically hashes a message which is entirely in RAM with
+ * the SHA-1 hashing algorithm.
+ * \param dest pointer to the hash value destination
+ * \param msg pointer to the message which should be hashed
+ * \param length_b length of the message in bits
+ */
+void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
+
+
+
+#endif /*SHA1_H_*/
diff --git a/sha256/sha256-asm.S b/sha256/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/sha256/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+ /* push r18 - r27, r30 - r31*/
+ push r0
+ push r1
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+ push r26
+ push r27
+ push r30
+ push r31
+ clr r1
+.endm
+
+.macro postcall
+ pop r31
+ pop r30
+ pop r27
+ pop r26
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r1
+ pop r0
+.endm
+
+
+.macro hexdump length
+ push r27
+ push r26
+ ldi r25, '\r'
+ mov r24, r25
+ call uart_putc
+ ldi r25, '\n'
+ mov r24, r25
+ call uart_putc
+ pop r26
+ pop r27
+ movw r24, r26
+.if \length > 16
+ ldi r22, lo8(16)
+ ldi r23, hi8(16)
+ push r27
+ push r26
+ call uart_hexdump
+ pop r26
+ pop r27
+ adiw r26, 16
+ hexdump \length-16
+.else
+ ldi r22, lo8(\length)
+ ldi r23, hi8(\length)
+ call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+ precall
+ hexdump \length
+ postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+; param1: the 16-bit destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to sha256_ctx structure
+; given in r23,r22
+sha256_ctx2hash:
+ movw r26, r22
+ movw r30, r24
+ ldi r21, 8
+ sbiw r26, 4
+1:
+ ldi r20, 4
+ adiw r26, 8
+2:
+ ld r0, -X
+ st Z+, r0
+ dec r20
+ brne 2b
+
+ dec r21
+ brne 1b
+
+ ret
+
+;###########################################################
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+; param1: the 16-bit hash destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to message
+; given in r23,r22
+; param3: 32-bit length value (length of message in bits)
+; given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r16
+ push r17
+ in r16, SPL
+ in r17, SPH
+ subi r16, 8*4+8
+ sbci r17, 0
+ in r0, SREG
+ cli
+ out SPL, r16
+ out SPH, r17
+ out SREG, r0
+
+ push r25
+ push r24
+ inc r16
+ adc r17, r1
+
+ movw r8, r18 /* backup of length*/
+ movw r10, r20
+
+ movw r12, r22 /* backup pf msg-ptr */
+
+ movw r24, r16
+ rcall sha256_init
+ /* if length >= 512 */
+1:
+ tst r11
+ brne 4f
+ tst r10
+ brne 4f
+ mov r19, r9
+ cpi r19, 0x02
+ brlo 4f
+
+ movw r24, r16
+ movw r22, r12
+ rcall sha256_nextBlock
+ ldi r19, 0x64
+ add r22, r19
+ adc r23, r1
+ /* length -= 512 */
+ ldi r19, 0x02
+ sub r9, r19
+ sbc r10, r1
+ sbc r11, r1
+ rjmp 1b
+
+4:
+ movw r24, r16
+ movw r22, r12
+ movw r20, r8
+ rcall sha256_lastBlock
+
+ pop r24
+ pop r25
+ movw r22, r16
+ rcall sha256_ctx2hash
+
+sha256_epilog:
+ in r30, SPL
+ in r31, SPH
+ adiw r30, 8*4+8
+ in r0, SREG
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG, r0
+ pop r17
+ pop r16
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+; param3: an 16-bit integer specifing length of block in bits
+; given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+ cpi r21, 0x02
+ brlo sha256_lastBlock_prolog
+ push r25
+ push r24
+ push r23
+ push r22
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r22
+ pop r23
+ pop r24
+ pop r25
+ subi r21, 0x02
+ subi r23, -2
+ rjmp sha256_lastBlock
+sha256_lastBlock_prolog:
+ /* allocate space on stack */
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ subi r30, lo8(64)
+ sbci r31, hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+
+ adiw r30, 1 /* SP points to next free byte on stack */
+ mov r18, r20 /* r20 = LSB(length) */
+ lsr r18
+ lsr r18
+ lsr r18
+ bst r21, 0 /* may be we should explain this ... */
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
+
+
+ movw r26, r22 /* X points to begin of msg */
+ tst r18
+ breq sha256_lastBlock_post_copy
+ mov r1, r18
+sha256_lastBlock_copy_loop:
+ ld r0, X+
+ st Z+, r0
+ dec r1
+ brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:
+sha256_lastBlock_insert_stuffing_bit:
+ ldi r19, 0x80
+ mov r0,r19
+ ldi r19, 0x07
+ and r19, r20 /* if we are in bitmode */
+ breq 2f /* no bitmode */
+1:
+ lsr r0
+ dec r19
+ brne 1b
+ ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+ or r0, r19
+2:
+ st Z+, r0
+ inc r18
+
+/* checking stuff here */
+ cpi r18, 64-8+1
+ brsh 0f
+ rjmp sha256_lastBlock_insert_zeros
+0:
+ /* oh shit, we landed here */
+ /* first we have to fill it up with zeros */
+ ldi r19, 64
+ sub r19, r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r19
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r22, r30
+
+ push r31
+ push r30
+ push r25
+ push r24
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r24
+ pop r25
+ pop r30
+ pop r31
+
+ /* now we should subtract 512 from length */
+ movw r26, r24
+ adiw r26, 4*8+1 /* we can skip the lowest byte */
+ ld r19, X
+ subi r19, hi8(512)
+ st X+, r19
+ ldi r18, 6
+1:
+ ld r19, X
+ sbci r19, 0
+ st X+, r19
+ dec r18
+ brne 1b
+
+; clr r18 /* not neccessary ;-) */
+ /* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:
+ ldi r19, 64-8
+ sub r19, r18
+ breq sha256_lastBlock_insert_length
+ clr r1
+1:
+ st Z+, r1 /* r1 is still zero */
+ dec r19
+ brne 1b
+
+; rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+ movw r26, r24 /* X points to state */
+ adiw r26, 8*4 /* X points to (state.length) */
+ adiw r30, 8 /* Z points one after the last byte of block */
+ ld r0, X+
+ add r0, r20
+ st -Z, r0
+ ld r0, X+
+ adc r0, r21
+ st -Z, r0
+ ldi r19, 6
+1:
+ ld r0, X+
+ adc r0, r1
+ st -Z, r0
+ dec r19
+ brne 1b
+
+ sbiw r30, 64-8
+ movw r22, r30
+ rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ adiw r30, 63 ; lo8(64)
+ adiw r30, 1 ; hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+ clr r1
+ clr r0
+ ret
+
+/**/
+;###########################################################
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1 = 4
+T2 = 5
+T3 = 6
+T4 = 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+ push r4 /* replace push & pop by mem ops? */
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r20, SPL
+ in r21, SPH
+ movw r18, r20 ;backup SP
+; movw r26, r20 ; X points to free space on stack
+ movw r30, r22 ; Z points to message
+ subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+ sbci r21, hi8(sha256_nextBlock_localSpace)
+ movw r26, r20 ; X points to free space on stack
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+ push r18
+ push r19
+ push r24
+ push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ adiw r26, 1 ; X++
+ ldi r20, 16
+sha256_nextBlock_wcpyloop:
+ ld r23, Z+
+ ld r22, Z+
+ ld r19, Z+
+ ld r18, Z+
+ st X+, r18
+ st X+, r19
+ st X+, r22
+ st X+, r23
+ dec r20
+ brne sha256_nextBlock_wcpyloop
+/* for (i=16; i<64; ++i){
+ w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
+ } */
+ /* r25,r24,r23,r24 (r21,r20) are function values
+ r19,r18,r17,r16 are the accumulator
+ r15,r14,r13,rBck1 are backup1
+ r11,r10,r9 ,r8 are xor accu
+ r1 is round counter */
+
+ ldi r20, 64-16
+ mov LoopC, r20
+sha256_nextBlock_wcalcloop:
+ movw r30, r26 ; cp X to Z
+ sbiw r30, 63
+ sbiw r30, 1 ; substract 64 = 16*4
+ ld Accu1, Z+
+ ld Accu2, Z+
+ ld Accu3, Z+
+ ld Accu4, Z+ /* w[i] = w[i-16] */
+ ld Bck1, Z+
+ ld Bck2, Z+
+ ld Bck3, Z+
+ ld Bck4, Z+ /* backup = w[i-15] */
+ /* now sigma 0 */
+ mov Func1, Bck2
+ mov Func2, Bck3
+ mov Func3, Bck4
+ mov Func4, Bck1 /* prerotated by 8 */
+ ldi r20, 1
+ rcall bitrotl
+ movw XAccu1, Func1
+ movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ ror Bck1
+ dec Func2
+ brne sigma0_shr
+ eor XAccu1, Bck1
+ eor XAccu2, Bck2
+ eor XAccu3, Bck3
+ eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ ldd Func1, Z+7*4 /* now accu += w[i-7] */
+ ldd Func2, Z+7*4+1
+ ldd Func3, Z+7*4+2
+ ldd Func4, Z+7*4+3
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+ ldd Bck2, Z+12*4+1
+ ldd Bck3, Z+12*4+2
+ ldd Bck4, Z+12*4+3
+ /* now sigma 1 */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 1
+ rcall bitrotr
+ movw XAccu3, Func3
+ movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */
+; movw Func1, Bck3
+; movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ dec Func2
+ brne sigma1_shr
+ eor XAccu1, Bck2
+ eor XAccu2, Bck3
+ eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ /* now let's store the shit */
+ st X+, Accu1
+ st X+, Accu2
+ st X+, Accu3
+ st X+, Accu4
+ dec LoopC
+ breq 3f ; skip if zero
+ rjmp sha256_nextBlock_wcalcloop
+3:
+ /* we are finished with w array X points one byte post w */
+/* init a array */
+ pop r31
+ pop r30
+ push r30
+ push r31
+ ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:
+ ld r1, Z+
+ st X+, r1
+ dec r25
+ brne init_a_array
+
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+ t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+ t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+ memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
+ a[4] += t1;
+ a[0] = t1 + t2;
+ } */
+ /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+ sbiw r26, 8*4 /* X still points at a[7]+1*/
+ movw r28, r26
+ ldi r30, lo8(sha256_kv)
+ ldi r31, hi8(sha256_kv)
+ dec r27 /* X - (64*4 == 256) */
+ ldi r25, 64
+ mov LoopC, r25
+sha256_main_loop:
+ /* now calculate t1 */
+ /*CH(x,y,z) = (x&y)^((~x)&z)*/
+ ldd T1, Y+5*4
+ ldd T2, Y+5*4+1
+ ldd T3, Y+5*4+2
+ ldd T4, Y+5*4+3 /* y in T */
+ ldd Func1, Y+4*4
+ ldd Func2, Y+4*4+1
+ ldd Func3, Y+4*4+2
+ ldd Func4, Y+4*4+3 /* x in Func */
+ ldd Bck1, Y+6*4
+ ldd Bck2, Y+6*4+1
+ ldd Bck3, Y+6*4+2
+ ldd Bck4, Y+6*4+3 /* z in Bck */
+ and T1, Func1
+ and T2, Func2
+ and T3, Func3
+ and T4, Func4
+ com Func1
+ com Func2
+ com Func3
+ com Func4
+ and Bck1, Func1
+ and Bck2, Func2
+ and Bck3, Func3
+ and Bck4, Func4
+ eor T1, Bck1
+ eor T2, Bck2
+ eor T3, Bck3
+ eor T4, Bck4 /* done, CH(x,y,z) is in T */
+ /* now SIGMA1(a[4]) */
+ ldd Bck4, Y+4*4 /* think about using it from Func reg above*/
+ ldd Bck1, Y+4*4+1
+ ldd Bck2, Y+4*4+2
+ ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotl /* rotr(x,6) */
+ movw XAccu1, Func1
+ movw XAccu3, Func3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 3
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+ movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+ ldi r20, 1
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4
+ /* now we've to add a[7], w[i] and k[i] */
+ ldd XAccu1, Y+4*7
+ ldd XAccu2, Y+4*7+1
+ ldd XAccu3, Y+4*7+2
+ ldd XAccu4, Y+4*7+3
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add a[7] */
+ ld XAccu1, X+
+ ld XAccu2, X+
+ ld XAccu3, X+
+ ld XAccu4, X+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add w[i] */
+ lpm XAccu1, Z+
+ lpm XAccu2, Z+
+ lpm XAccu3, Z+
+ lpm XAccu4, Z+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+ /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+ /* starting with MAJ(x,y,z) */
+ ldd Func1, Y+4*0+0
+ ldd Func2, Y+4*0+1
+ ldd Func3, Y+4*0+2
+ ldd Func4, Y+4*0+3 /* load x=a[0] */
+ ldd XAccu1, Y+4*1+0
+ ldd XAccu2, Y+4*1+1
+ ldd XAccu3, Y+4*1+2
+ ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+ and XAccu1, Func1
+ and XAccu2, Func2
+ and XAccu3, Func3
+ and XAccu4, Func4 /* XAccu == (x & y) */
+ ldd Bck1, Y+4*2+0
+ ldd Bck2, Y+4*2+1
+ ldd Bck3, Y+4*2+2
+ ldd Bck4, Y+4*2+3 /* load z=a[2] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */
+ ldd Func1, Y+4*1+0
+ ldd Func2, Y+4*1+1
+ ldd Func3, Y+4*1+2
+ ldd Func4, Y+4*1+3 /* load y=a[1] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+ /* SIGMA0(a[0]) */
+ ldd Bck1, Y+4*0+0 /* we should combine this with above */
+ ldd Bck2, Y+4*0+1
+ ldd Bck3, Y+4*0+2
+ ldd Bck4, Y+4*0+3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotr
+ movw Accu1, Func1
+ movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotate by 16 bits */
+ ldi r20, 3
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+ mov Func1, Bck4
+ mov Func2, Bck1
+ mov Func3, Bck2
+ mov Func4, Bck3 /* prerotate by 24 bits */
+ ldi r20, 2
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+ add Accu1, XAccu1 /* add previous result (MAJ)*/
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4
+ /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+ /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+ ldi r21, 7*4
+ adiw r28, 7*4
+a_shift_loop:
+ ld r25, -Y /* warning: this is PREdecrement */
+ std Y+4, r25
+ dec r21
+ brne a_shift_loop
+
+ ldd Bck1, Y+4*4+0
+ ldd Bck2, Y+4*4+1
+ ldd Bck3, Y+4*4+2
+ ldd Bck4, Y+4*4+3
+ add Bck1, T1
+ adc Bck2, T2
+ adc Bck3, T3
+ adc Bck4, T4
+ std Y+4*4+0, Bck1
+ std Y+4*4+1, Bck2
+ std Y+4*4+2, Bck3
+ std Y+4*4+3, Bck4
+ add Accu1, T1
+ adc Accu2, T2
+ adc Accu3, T3
+ adc Accu4, T4
+ std Y+4*0+0, Accu1
+ std Y+4*0+1, Accu2
+ std Y+4*0+2, Accu3
+ std Y+4*0+3, Accu4 /* a array updated */
+
+
+ dec LoopC
+ breq update_state
+ rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:
+ /* update state */
+ /* pointers to state should still exist on the stack ;-) */
+ pop r31
+ pop r30
+ ldi r21, 8
+update_state_loop:
+ ldd Accu1, Z+0
+ ldd Accu2, Z+1
+ ldd Accu3, Z+2
+ ldd Accu4, Z+3
+ ld Func1, Y+
+ ld Func2, Y+
+ ld Func3, Y+
+ ld Func4, Y+
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ st Z+, Accu1
+ st Z+, Accu2
+ st Z+, Accu3
+ st Z+, Accu4
+ dec r21
+ brne update_state_loop
+ /* now we just have to update the length */
+ adiw r30, 1 /* since we add 512, we can simply skip the LSB */
+ ldi r21, 2
+ ldi r22, 6
+ ld r20, Z
+ add r20, r21
+ st Z+, r20
+ clr r21
+sha256_nextBlock_fix_length:
+ brcc sha256_nextBlock_epilog
+ ld r20, Z
+ adc r20, r21
+ st Z+, r20
+ dec r22
+ brne sha256_nextBlock_fix_length
+
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+
+ pop r21
+ pop r20
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ clr r1
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop r7
+ pop r6
+ pop r5
+ pop r4
+ ret
+
+sha256_kv: ; round-key-vector stored in ProgMem
+.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+
+;###########################################################
+
+.global sha256_init
+;uint32_t sha256_init_vector[]={
+; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+; state->length=0;
+; memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8((sha256_init_vector))
+ ldi r31, hi8((sha256_init_vector))
+ ldi r22, 32+8
+sha256_init_vloop:
+ lpm r23, Z+
+ st X+, r23
+ dec r22
+ brne sha256_init_vloop
+ ret
+
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67
+.word 0xF372, 0x3C6E
+.word 0xF53A, 0xA54F
+.word 0x527F, 0x510E
+.word 0x688C, 0x9B05
+.word 0xD9AB, 0x1F83
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,r22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotl32:
+ cpi r20, 8
+ brlo bitrotl
+ mov r21, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ subi r20, 8
+ rjmp rotl32
+bitrotl:
+ clr r21
+ clc
+bitrotl_loop:
+ tst r20
+ breq fixrotl
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ rol r21
+ dec r20
+ rjmp bitrotl_loop
+fixrotl:
+ or r22, r21
+ ret
+
+
+;###########################################################
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotr32:
+ cpi r20, 8
+ brlo bitrotr
+ mov r21, r22
+ mov r22, r23
+ mov r23, r24
+ mov r24, r25
+ mov r25, r21
+ subi r20, 8
+ rjmp rotr32
+bitrotr:
+ clr r21
+ clc
+bitrotr_loop:
+ tst r20
+ breq fixrotr
+ ror r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ dec r20
+ rjmp bitrotr_loop
+fixrotr:
+ or r25, r21
+ ret
+
+
+;###########################################################
+
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+; param1: the 32-bit word
+; given in r25,r24,r23,22 (r25 is most significant)
+; modifys: r21, r22
+change_endian32:
+ movw r20, r22 ; (r22,r23) --> (r20,r21)
+ mov r22, r25
+ mov r23, r24
+ mov r24, r21
+ mov r25, r20
+ ret
+
diff --git a/sha256.c b/sha256/sha256.c
similarity index 100%
rename from sha256.c
rename to sha256/sha256.c
diff --git a/sha256/sha256.h b/sha256/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/sha256/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/**
+ * \file sha256.h
+ * \author Daniel Otte
+ * \date 2006-05-16
+ * \license GPLv3 or later
+ *
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS 256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ *
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+ uint32_t h[8];
+ uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ *
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ *
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ *
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block
+ *
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ *
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/shabea/memxor.S b/shabea/memxor.S
new file mode 100644
index 0000000..a32058b
--- /dev/null
+++ b/shabea/memxor.S
@@ -0,0 +1,66 @@
+/* memxor.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+/*
+ * File: memxor.S
+ * Author: Daniel Otte
+ * Date: 2008-08-07
+ * License: GPLv3 or later
+ * Description: memxor, XORing one block into another
+ *
+ */
+
+/*
+ * void memxor(void* dest, const void* src, uint16_t n);
+ */
+ /*
+ * param dest is passed in r24:r25
+ * param src is passed in r22:r23
+ * param n is passed in r20:r21
+ */
+.global memxor
+memxor:
+ movw r30, r24
+ movw r26, r22
+ movw r24, r20
+ adiw r24, 0
+ breq 2f
+1:
+ ld r20, X+
+ ld r21, Z
+ eor r20, r21
+ st Z+, r20
+ sbiw r24, 1
+ brne 1b
+2:
+ ret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/shabea/memxor.h b/shabea/memxor.h
new file mode 100644
index 0000000..a62a616
--- /dev/null
+++ b/shabea/memxor.h
@@ -0,0 +1,7 @@
+#ifndef MEMXOR_H_
+#define MEMXOR_H_
+#include
+
+void memxor(void* dest, const void* src, uint16_t n);
+
+#endif
diff --git a/shabea/sha256-asm.S b/shabea/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/shabea/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+ /* push r18 - r27, r30 - r31*/
+ push r0
+ push r1
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+ push r26
+ push r27
+ push r30
+ push r31
+ clr r1
+.endm
+
+.macro postcall
+ pop r31
+ pop r30
+ pop r27
+ pop r26
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r1
+ pop r0
+.endm
+
+
+.macro hexdump length
+ push r27
+ push r26
+ ldi r25, '\r'
+ mov r24, r25
+ call uart_putc
+ ldi r25, '\n'
+ mov r24, r25
+ call uart_putc
+ pop r26
+ pop r27
+ movw r24, r26
+.if \length > 16
+ ldi r22, lo8(16)
+ ldi r23, hi8(16)
+ push r27
+ push r26
+ call uart_hexdump
+ pop r26
+ pop r27
+ adiw r26, 16
+ hexdump \length-16
+.else
+ ldi r22, lo8(\length)
+ ldi r23, hi8(\length)
+ call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+ precall
+ hexdump \length
+ postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+; param1: the 16-bit destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to sha256_ctx structure
+; given in r23,r22
+sha256_ctx2hash:
+ movw r26, r22
+ movw r30, r24
+ ldi r21, 8
+ sbiw r26, 4
+1:
+ ldi r20, 4
+ adiw r26, 8
+2:
+ ld r0, -X
+ st Z+, r0
+ dec r20
+ brne 2b
+
+ dec r21
+ brne 1b
+
+ ret
+
+;###########################################################
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+; param1: the 16-bit hash destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to message
+; given in r23,r22
+; param3: 32-bit length value (length of message in bits)
+; given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r16
+ push r17
+ in r16, SPL
+ in r17, SPH
+ subi r16, 8*4+8
+ sbci r17, 0
+ in r0, SREG
+ cli
+ out SPL, r16
+ out SPH, r17
+ out SREG, r0
+
+ push r25
+ push r24
+ inc r16
+ adc r17, r1
+
+ movw r8, r18 /* backup of length*/
+ movw r10, r20
+
+ movw r12, r22 /* backup pf msg-ptr */
+
+ movw r24, r16
+ rcall sha256_init
+ /* if length >= 512 */
+1:
+ tst r11
+ brne 4f
+ tst r10
+ brne 4f
+ mov r19, r9
+ cpi r19, 0x02
+ brlo 4f
+
+ movw r24, r16
+ movw r22, r12
+ rcall sha256_nextBlock
+ ldi r19, 0x64
+ add r22, r19
+ adc r23, r1
+ /* length -= 512 */
+ ldi r19, 0x02
+ sub r9, r19
+ sbc r10, r1
+ sbc r11, r1
+ rjmp 1b
+
+4:
+ movw r24, r16
+ movw r22, r12
+ movw r20, r8
+ rcall sha256_lastBlock
+
+ pop r24
+ pop r25
+ movw r22, r16
+ rcall sha256_ctx2hash
+
+sha256_epilog:
+ in r30, SPL
+ in r31, SPH
+ adiw r30, 8*4+8
+ in r0, SREG
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG, r0
+ pop r17
+ pop r16
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+; param3: an 16-bit integer specifing length of block in bits
+; given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+ cpi r21, 0x02
+ brlo sha256_lastBlock_prolog
+ push r25
+ push r24
+ push r23
+ push r22
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r22
+ pop r23
+ pop r24
+ pop r25
+ subi r21, 0x02
+ subi r23, -2
+ rjmp sha256_lastBlock
+sha256_lastBlock_prolog:
+ /* allocate space on stack */
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ subi r30, lo8(64)
+ sbci r31, hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+
+ adiw r30, 1 /* SP points to next free byte on stack */
+ mov r18, r20 /* r20 = LSB(length) */
+ lsr r18
+ lsr r18
+ lsr r18
+ bst r21, 0 /* may be we should explain this ... */
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
+
+
+ movw r26, r22 /* X points to begin of msg */
+ tst r18
+ breq sha256_lastBlock_post_copy
+ mov r1, r18
+sha256_lastBlock_copy_loop:
+ ld r0, X+
+ st Z+, r0
+ dec r1
+ brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:
+sha256_lastBlock_insert_stuffing_bit:
+ ldi r19, 0x80
+ mov r0,r19
+ ldi r19, 0x07
+ and r19, r20 /* if we are in bitmode */
+ breq 2f /* no bitmode */
+1:
+ lsr r0
+ dec r19
+ brne 1b
+ ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+ or r0, r19
+2:
+ st Z+, r0
+ inc r18
+
+/* checking stuff here */
+ cpi r18, 64-8+1
+ brsh 0f
+ rjmp sha256_lastBlock_insert_zeros
+0:
+ /* oh shit, we landed here */
+ /* first we have to fill it up with zeros */
+ ldi r19, 64
+ sub r19, r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r19
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r22, r30
+
+ push r31
+ push r30
+ push r25
+ push r24
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r24
+ pop r25
+ pop r30
+ pop r31
+
+ /* now we should subtract 512 from length */
+ movw r26, r24
+ adiw r26, 4*8+1 /* we can skip the lowest byte */
+ ld r19, X
+ subi r19, hi8(512)
+ st X+, r19
+ ldi r18, 6
+1:
+ ld r19, X
+ sbci r19, 0
+ st X+, r19
+ dec r18
+ brne 1b
+
+; clr r18 /* not neccessary ;-) */
+ /* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:
+ ldi r19, 64-8
+ sub r19, r18
+ breq sha256_lastBlock_insert_length
+ clr r1
+1:
+ st Z+, r1 /* r1 is still zero */
+ dec r19
+ brne 1b
+
+; rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+ movw r26, r24 /* X points to state */
+ adiw r26, 8*4 /* X points to (state.length) */
+ adiw r30, 8 /* Z points one after the last byte of block */
+ ld r0, X+
+ add r0, r20
+ st -Z, r0
+ ld r0, X+
+ adc r0, r21
+ st -Z, r0
+ ldi r19, 6
+1:
+ ld r0, X+
+ adc r0, r1
+ st -Z, r0
+ dec r19
+ brne 1b
+
+ sbiw r30, 64-8
+ movw r22, r30
+ rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ adiw r30, 63 ; lo8(64)
+ adiw r30, 1 ; hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+ clr r1
+ clr r0
+ ret
+
+/**/
+;###########################################################
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1 = 4
+T2 = 5
+T3 = 6
+T4 = 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+ push r4 /* replace push & pop by mem ops? */
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r20, SPL
+ in r21, SPH
+ movw r18, r20 ;backup SP
+; movw r26, r20 ; X points to free space on stack
+ movw r30, r22 ; Z points to message
+ subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+ sbci r21, hi8(sha256_nextBlock_localSpace)
+ movw r26, r20 ; X points to free space on stack
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+ push r18
+ push r19
+ push r24
+ push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ adiw r26, 1 ; X++
+ ldi r20, 16
+sha256_nextBlock_wcpyloop:
+ ld r23, Z+
+ ld r22, Z+
+ ld r19, Z+
+ ld r18, Z+
+ st X+, r18
+ st X+, r19
+ st X+, r22
+ st X+, r23
+ dec r20
+ brne sha256_nextBlock_wcpyloop
+/* for (i=16; i<64; ++i){
+ w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
+ } */
+ /* r25,r24,r23,r24 (r21,r20) are function values
+ r19,r18,r17,r16 are the accumulator
+ r15,r14,r13,rBck1 are backup1
+ r11,r10,r9 ,r8 are xor accu
+ r1 is round counter */
+
+ ldi r20, 64-16
+ mov LoopC, r20
+sha256_nextBlock_wcalcloop:
+ movw r30, r26 ; cp X to Z
+ sbiw r30, 63
+ sbiw r30, 1 ; substract 64 = 16*4
+ ld Accu1, Z+
+ ld Accu2, Z+
+ ld Accu3, Z+
+ ld Accu4, Z+ /* w[i] = w[i-16] */
+ ld Bck1, Z+
+ ld Bck2, Z+
+ ld Bck3, Z+
+ ld Bck4, Z+ /* backup = w[i-15] */
+ /* now sigma 0 */
+ mov Func1, Bck2
+ mov Func2, Bck3
+ mov Func3, Bck4
+ mov Func4, Bck1 /* prerotated by 8 */
+ ldi r20, 1
+ rcall bitrotl
+ movw XAccu1, Func1
+ movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ ror Bck1
+ dec Func2
+ brne sigma0_shr
+ eor XAccu1, Bck1
+ eor XAccu2, Bck2
+ eor XAccu3, Bck3
+ eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ ldd Func1, Z+7*4 /* now accu += w[i-7] */
+ ldd Func2, Z+7*4+1
+ ldd Func3, Z+7*4+2
+ ldd Func4, Z+7*4+3
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+ ldd Bck2, Z+12*4+1
+ ldd Bck3, Z+12*4+2
+ ldd Bck4, Z+12*4+3
+ /* now sigma 1 */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 1
+ rcall bitrotr
+ movw XAccu3, Func3
+ movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */
+; movw Func1, Bck3
+; movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ dec Func2
+ brne sigma1_shr
+ eor XAccu1, Bck2
+ eor XAccu2, Bck3
+ eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ /* now let's store the shit */
+ st X+, Accu1
+ st X+, Accu2
+ st X+, Accu3
+ st X+, Accu4
+ dec LoopC
+ breq 3f ; skip if zero
+ rjmp sha256_nextBlock_wcalcloop
+3:
+ /* we are finished with w array X points one byte post w */
+/* init a array */
+ pop r31
+ pop r30
+ push r30
+ push r31
+ ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:
+ ld r1, Z+
+ st X+, r1
+ dec r25
+ brne init_a_array
+
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+ t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+ t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+ memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
+ a[4] += t1;
+ a[0] = t1 + t2;
+ } */
+ /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+ sbiw r26, 8*4 /* X still points at a[7]+1*/
+ movw r28, r26
+ ldi r30, lo8(sha256_kv)
+ ldi r31, hi8(sha256_kv)
+ dec r27 /* X - (64*4 == 256) */
+ ldi r25, 64
+ mov LoopC, r25
+sha256_main_loop:
+ /* now calculate t1 */
+ /*CH(x,y,z) = (x&y)^((~x)&z)*/
+ ldd T1, Y+5*4
+ ldd T2, Y+5*4+1
+ ldd T3, Y+5*4+2
+ ldd T4, Y+5*4+3 /* y in T */
+ ldd Func1, Y+4*4
+ ldd Func2, Y+4*4+1
+ ldd Func3, Y+4*4+2
+ ldd Func4, Y+4*4+3 /* x in Func */
+ ldd Bck1, Y+6*4
+ ldd Bck2, Y+6*4+1
+ ldd Bck3, Y+6*4+2
+ ldd Bck4, Y+6*4+3 /* z in Bck */
+ and T1, Func1
+ and T2, Func2
+ and T3, Func3
+ and T4, Func4
+ com Func1
+ com Func2
+ com Func3
+ com Func4
+ and Bck1, Func1
+ and Bck2, Func2
+ and Bck3, Func3
+ and Bck4, Func4
+ eor T1, Bck1
+ eor T2, Bck2
+ eor T3, Bck3
+ eor T4, Bck4 /* done, CH(x,y,z) is in T */
+ /* now SIGMA1(a[4]) */
+ ldd Bck4, Y+4*4 /* think about using it from Func reg above*/
+ ldd Bck1, Y+4*4+1
+ ldd Bck2, Y+4*4+2
+ ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotl /* rotr(x,6) */
+ movw XAccu1, Func1
+ movw XAccu3, Func3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 3
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+ movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+ ldi r20, 1
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4
+ /* now we've to add a[7], w[i] and k[i] */
+ ldd XAccu1, Y+4*7
+ ldd XAccu2, Y+4*7+1
+ ldd XAccu3, Y+4*7+2
+ ldd XAccu4, Y+4*7+3
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add a[7] */
+ ld XAccu1, X+
+ ld XAccu2, X+
+ ld XAccu3, X+
+ ld XAccu4, X+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add w[i] */
+ lpm XAccu1, Z+
+ lpm XAccu2, Z+
+ lpm XAccu3, Z+
+ lpm XAccu4, Z+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+ /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+ /* starting with MAJ(x,y,z) */
+ ldd Func1, Y+4*0+0
+ ldd Func2, Y+4*0+1
+ ldd Func3, Y+4*0+2
+ ldd Func4, Y+4*0+3 /* load x=a[0] */
+ ldd XAccu1, Y+4*1+0
+ ldd XAccu2, Y+4*1+1
+ ldd XAccu3, Y+4*1+2
+ ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+ and XAccu1, Func1
+ and XAccu2, Func2
+ and XAccu3, Func3
+ and XAccu4, Func4 /* XAccu == (x & y) */
+ ldd Bck1, Y+4*2+0
+ ldd Bck2, Y+4*2+1
+ ldd Bck3, Y+4*2+2
+ ldd Bck4, Y+4*2+3 /* load z=a[2] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */
+ ldd Func1, Y+4*1+0
+ ldd Func2, Y+4*1+1
+ ldd Func3, Y+4*1+2
+ ldd Func4, Y+4*1+3 /* load y=a[1] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+ /* SIGMA0(a[0]) */
+ ldd Bck1, Y+4*0+0 /* we should combine this with above */
+ ldd Bck2, Y+4*0+1
+ ldd Bck3, Y+4*0+2
+ ldd Bck4, Y+4*0+3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotr
+ movw Accu1, Func1
+ movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotate by 16 bits */
+ ldi r20, 3
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+ mov Func1, Bck4
+ mov Func2, Bck1
+ mov Func3, Bck2
+ mov Func4, Bck3 /* prerotate by 24 bits */
+ ldi r20, 2
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+ add Accu1, XAccu1 /* add previous result (MAJ)*/
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4
+ /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+ /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+ ldi r21, 7*4
+ adiw r28, 7*4
+a_shift_loop:
+ ld r25, -Y /* warning: this is PREdecrement */
+ std Y+4, r25
+ dec r21
+ brne a_shift_loop
+
+ ldd Bck1, Y+4*4+0
+ ldd Bck2, Y+4*4+1
+ ldd Bck3, Y+4*4+2
+ ldd Bck4, Y+4*4+3
+ add Bck1, T1
+ adc Bck2, T2
+ adc Bck3, T3
+ adc Bck4, T4
+ std Y+4*4+0, Bck1
+ std Y+4*4+1, Bck2
+ std Y+4*4+2, Bck3
+ std Y+4*4+3, Bck4
+ add Accu1, T1
+ adc Accu2, T2
+ adc Accu3, T3
+ adc Accu4, T4
+ std Y+4*0+0, Accu1
+ std Y+4*0+1, Accu2
+ std Y+4*0+2, Accu3
+ std Y+4*0+3, Accu4 /* a array updated */
+
+
+ dec LoopC
+ breq update_state
+ rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:
+ /* update state */
+ /* pointers to state should still exist on the stack ;-) */
+ pop r31
+ pop r30
+ ldi r21, 8
+update_state_loop:
+ ldd Accu1, Z+0
+ ldd Accu2, Z+1
+ ldd Accu3, Z+2
+ ldd Accu4, Z+3
+ ld Func1, Y+
+ ld Func2, Y+
+ ld Func3, Y+
+ ld Func4, Y+
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ st Z+, Accu1
+ st Z+, Accu2
+ st Z+, Accu3
+ st Z+, Accu4
+ dec r21
+ brne update_state_loop
+ /* now we just have to update the length */
+ adiw r30, 1 /* since we add 512, we can simply skip the LSB */
+ ldi r21, 2
+ ldi r22, 6
+ ld r20, Z
+ add r20, r21
+ st Z+, r20
+ clr r21
+sha256_nextBlock_fix_length:
+ brcc sha256_nextBlock_epilog
+ ld r20, Z
+ adc r20, r21
+ st Z+, r20
+ dec r22
+ brne sha256_nextBlock_fix_length
+
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+
+ pop r21
+ pop r20
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ clr r1
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop r7
+ pop r6
+ pop r5
+ pop r4
+ ret
+
+sha256_kv: ; round-key-vector stored in ProgMem
+.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+
+;###########################################################
+
+.global sha256_init
+;uint32_t sha256_init_vector[]={
+; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+; state->length=0;
+; memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8((sha256_init_vector))
+ ldi r31, hi8((sha256_init_vector))
+ ldi r22, 32+8
+sha256_init_vloop:
+ lpm r23, Z+
+ st X+, r23
+ dec r22
+ brne sha256_init_vloop
+ ret
+
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67
+.word 0xF372, 0x3C6E
+.word 0xF53A, 0xA54F
+.word 0x527F, 0x510E
+.word 0x688C, 0x9B05
+.word 0xD9AB, 0x1F83
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,r22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotl32:
+ cpi r20, 8
+ brlo bitrotl
+ mov r21, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ subi r20, 8
+ rjmp rotl32
+bitrotl:
+ clr r21
+ clc
+bitrotl_loop:
+ tst r20
+ breq fixrotl
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ rol r21
+ dec r20
+ rjmp bitrotl_loop
+fixrotl:
+ or r22, r21
+ ret
+
+
+;###########################################################
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotr32:
+ cpi r20, 8
+ brlo bitrotr
+ mov r21, r22
+ mov r22, r23
+ mov r23, r24
+ mov r24, r25
+ mov r25, r21
+ subi r20, 8
+ rjmp rotr32
+bitrotr:
+ clr r21
+ clc
+bitrotr_loop:
+ tst r20
+ breq fixrotr
+ ror r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ dec r20
+ rjmp bitrotr_loop
+fixrotr:
+ or r25, r21
+ ret
+
+
+;###########################################################
+
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+; param1: the 32-bit word
+; given in r25,r24,r23,22 (r25 is most significant)
+; modifys: r21, r22
+change_endian32:
+ movw r20, r22 ; (r22,r23) --> (r20,r21)
+ mov r22, r25
+ mov r23, r24
+ mov r24, r21
+ mov r25, r20
+ ret
+
diff --git a/shabea/sha256.h b/shabea/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/shabea/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/**
+ * \file sha256.h
+ * \author Daniel Otte
+ * \date 2006-05-16
+ * \license GPLv3 or later
+ *
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS 256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ *
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+ uint32_t h[8];
+ uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ *
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ *
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ *
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block
+ *
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ *
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/shabea.c b/shabea/shabea.c
similarity index 100%
rename from shabea.c
rename to shabea/shabea.c
diff --git a/shabea.h b/shabea/shabea.h
similarity index 100%
rename from shabea.h
rename to shabea/shabea.h
diff --git a/shacal1/sha1-asm.S b/shacal1/sha1-asm.S
new file mode 100644
index 0000000..f571685
--- /dev/null
+++ b/shacal1/sha1-asm.S
@@ -0,0 +1,886 @@
+/* sha1-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; SHA1 implementation in assembler for AVR
+SHA1_BLOCK_BITS = 512
+SHA1_HASH_BITS = 160
+
+.macro precall
+ /* push r18 - r27, r30 - r31*/
+ push r0
+ push r1
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+ push r26
+ push r27
+ push r30
+ push r31
+ clr r1
+.endm
+
+.macro postcall
+ pop r31
+ pop r30
+ pop r27
+ pop r26
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r1
+ pop r0
+.endm
+
+
+.macro hexdump length
+ push r27
+ push r26
+ ldi r25, '\r'
+ mov r24, r25
+ call uart_putc
+ ldi r25, '\n'
+ mov r24, r25
+ call uart_putc
+ pop r26
+ pop r27
+ movw r24, r26
+.if \length > 16
+ ldi r22, lo8(16)
+ ldi r23, hi8(16)
+ push r27
+ push r26
+ call uart_hexdump
+ pop r26
+ pop r27
+ adiw r26, 16
+ hexdump \length-16
+.else
+ ldi r22, lo8(\length)
+ ldi r23, hi8(\length)
+ call uart_hexdump
+.endif
+.endm
+
+.macro delay
+/*
+ push r0
+ push r1
+ clr r0
+1: clr r1
+2: dec r1
+ brne 2b
+ dec r0
+ brne 1b
+ pop r1
+ pop r0 // */
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+/*
+ precall
+ hexdump \length
+ postcall
+ // */
+.endm
+
+
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha1_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha1_ctx2hash
+; === sha1_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+; param1: the 16-bit destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to sha1_ctx structure
+; given in r23,r22
+sha1_ctx2hash:
+ movw r26, r22
+ movw r30, r24
+ ldi r21, 5
+ sbiw r26, 4
+1:
+ ldi r20, 4
+ adiw r26, 8
+2:
+ ld r0, -X
+ st Z+, r0
+ dec r20
+ brne 2b
+
+ dec r21
+ brne 1b
+
+ ret
+
+;###########################################################
+
+.global sha1
+; === sha1 ===
+; this function calculates SHA-1 hashes from messages in RAM
+; param1: the 16-bit hash destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to message
+; given in r23,r22
+; param3: 32-bit length value (length of message in bits)
+; given in r21,r20,r19,r18
+sha1:
+sha1_prolog:
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r16
+ push r17
+ in r16, SPL
+ in r17, SPH
+ subi r16, 5*4+8
+ sbci r17, 0
+ in r0, SREG
+ cli
+ out SPL, r16
+ out SPH, r17
+ out SREG, r0
+
+ push r25
+ push r24
+ inc r16
+ adc r17, r1
+
+ movw r8, r18 /* backup of length*/
+ movw r10, r20
+
+ movw r12, r22 /* backup pf msg-ptr */
+
+ movw r24, r16
+ rcall sha1_init
+ /* if length >= 512 */
+1:
+ tst r11
+ brne 4f
+ tst r10
+ brne 4f
+ mov r19, r9
+ cpi r19, 0x02
+ brlo 4f
+
+ movw r24, r16
+ movw r22, r12
+ rcall sha1_nextBlock
+ ldi r19, 0x64
+ add r22, r19
+ adc r23, r1
+ /* length -= 512 */
+ ldi r19, 0x02
+ sub r9, r19
+ sbc r10, r1
+ sbc r11, r1
+ rjmp 1b
+
+4:
+ movw r24, r16
+ movw r22, r12
+ movw r20, r8
+ rcall sha1_lastBlock
+
+ pop r24
+ pop r25
+ movw r22, r16
+ rcall sha1_ctx2hash
+
+sha1_epilog:
+ in r30, SPL
+ in r31, SPH
+ adiw r30, 5*4+8
+ in r0, SREG
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG, r0
+ pop r17
+ pop r16
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha1_lastBlock
+; === sha1_lastBlock ===
+; this function does padding & Co. for calculating SHA-1 hashes
+; param1: the 16-bit pointer to sha1_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+; param3: an 16-bit integer specifing length of block in bits
+; given in r21,r20
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
+
+
+sha1_lastBlock:
+ cpi r21, 0x02
+ brlo sha1_lastBlock_prolog
+ push r25
+ push r24
+ push r23
+ push r22
+ push r21
+ push r20
+ rcall sha1_nextBlock
+ pop r20
+ pop r21
+ pop r22
+ pop r23
+ pop r24
+ pop r25
+ subi r21, 2
+ subi r23, -2
+ rjmp sha1_lastBlock
+sha1_lastBlock_prolog:
+ /* allocate space on stack */
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ subi r30, lo8(64)
+ sbci r31, hi8(64) /* ??? */
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+
+ adiw r30, 1 /* SP points to next free byte on stack */
+ mov r18, r20 /* r20 = LSB(length) */
+ lsr r18
+ lsr r18
+ lsr r18
+ bst r21, 0 /* may be we should explain this ... */
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
+
+
+ movw r26, r22 /* X points to begin of msg */
+ tst r18
+ breq sha1_lastBlock_post_copy
+ mov r1, r18
+sha1_lastBlock_copy_loop:
+ ld r0, X+
+ st Z+, r0
+ dec r1
+ brne sha1_lastBlock_copy_loop
+sha1_lastBlock_post_copy:
+sha1_lastBlock_insert_stuffing_bit:
+ ldi r19, 0x80
+ mov r0,r19
+ ldi r19, 0x07
+ and r19, r20 /* if we are in bitmode */
+ breq 2f /* no bitmode */
+1:
+ lsr r0
+ dec r19
+ brne 1b
+ ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+ or r0, r19
+2:
+ st Z+, r0
+ inc r18
+
+/* checking stuff here */
+ cpi r18, 64-8+1
+ brsh 0f
+ rjmp sha1_lastBlock_insert_zeros
+0:
+ /* oh shit, we landed here */
+ /* first we have to fill it up with zeros */
+ ldi r19, 64
+ sub r19, r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r19
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r22, r30
+
+ push r31
+ push r30
+ push r25
+ push r24
+ push r21
+ push r20
+ rcall sha1_nextBlock
+ pop r20
+ pop r21
+ pop r24
+ pop r25
+ pop r30
+ pop r31
+
+ /* now we should subtract 512 from length */
+ movw r26, r24
+ adiw r26, 4*5+1 /* we can skip the lowest byte */
+ ld r19, X
+ subi r19, hi8(512)
+ st X+, r19
+ ldi r18, 6
+1:
+ ld r19, X
+ sbci r19, 0
+ st X+, r19
+ dec r18
+ brne 1b
+
+; clr r18 /* not neccessary ;-) */
+ /* reset Z pointer to begin of block */
+
+sha1_lastBlock_insert_zeros:
+ ldi r19, 64-8
+ sub r19, r18
+ breq sha1_lastBlock_insert_length
+ clr r1
+1:
+ st Z+, r1 /* r1 is still zero */
+ dec r19
+ brne 1b
+
+; rjmp sha1_lastBlock_epilog
+sha1_lastBlock_insert_length:
+ movw r26, r24 /* X points to state */
+ adiw r26, 5*4 /* X points to (state.length) */
+ adiw r30, 8 /* Z points one after the last byte of block */
+ ld r0, X+
+ add r0, r20
+ st -Z, r0
+ ld r0, X+
+ adc r0, r21
+ st -Z, r0
+ ldi r19, 6
+1:
+ ld r0, X+
+ adc r0, r1
+ st -Z, r0
+ dec r19
+ brne 1b
+
+ sbiw r30, 64-8
+ movw r22, r30
+ rcall sha1_nextBlock
+
+sha1_lastBlock_epilog:
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ adiw r30, 63 ; lo8(64)
+ adiw r30, 1 ; hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+ clr r1
+ clr r0
+ ret
+
+/**/
+;###########################################################
+
+.global sha1_nextBlock
+; === sha1_nextBlock ===
+; this is the core function for calculating SHA-1 hashes
+; param1: the 16-bit pointer to sha1_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
+
+xtmp = 0
+xNULL = 1
+W1 = 10
+W2 = 11
+T1 = 12
+T2 = 13
+T3 = 14
+T4 = 15
+LoopC = 16
+S = 17
+tmp1 = 18
+tmp2 = 19
+tmp3 = 20
+tmp4 = 21
+F1 = 22
+F2 = 23
+F3 = 24
+F4 = 25
+
+/* byteorder: high number <--> high significance */
+sha1_nextBlock:
+ ; initial, let's make some space ready for local vars
+ /* replace push & pop by mem ops? */
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r20, SPL
+ in r21, SPH
+ movw r18, r20 ;backup SP
+; movw r26, r20 ; X points to free space on stack /* maybe removeable? */
+ movw r30, r22 ; Z points to message
+ subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
+ sbci r21, hi8(sha1_nextBlock_localSpace)
+ movw r26, r20 ; X points to free space on stack
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ push r18
+ push r19 /* push old SP on new stack */
+ push r24
+ push r25 /* param1 will be needed later */
+
+ /* load a[] with state */
+ movw 28, r24 /* load pointer to state in Y */
+ adiw r26, 1 ; X++
+
+ ldi LoopC, 5*4
+1: ld tmp1, Y+
+ st X+, tmp1
+ dec LoopC
+ brne 1b
+
+ movw W1, r26 /* save pointer to w[0] */
+ /* load w[] with endian fixed message */
+ /* we might also use the changeendian32() function at bottom */
+ movw r30, r22 /* mv param2 (ponter to msg) to Z */
+ ldi LoopC, 16
+1:
+ ldd tmp1, Z+3
+ st X+, tmp1
+ ldd tmp1, Z+2
+ st X+, tmp1
+ ldd tmp1, Z+1
+ st X+, tmp1
+ ld tmp1, Z
+ st X+, tmp1
+ adiw r30, 4
+ dec LoopC
+ brne 1b
+
+ ;clr LoopC /* LoopC is named t in FIPS 180-2 */
+ clr xtmp
+sha1_nextBlock_mainloop:
+ mov S, LoopC
+ lsl S
+ lsl S
+ andi S, 0x3C /* S is a bytepointer so *4 */
+ /* load w[s] */
+ movw r26, W1
+ add r26, S /* X points at w[s] */
+ adc r27, xNULL
+ ld T1, X+
+ ld T2, X+
+ ld T3, X+
+ ld T4, X+
+
+ /**/
+ push r26
+ push r27
+ push T4
+ push T3
+ push T2
+ push T1
+ in r26, SPL
+ in r27, SPH
+ adiw r26, 1
+ dbg_hexdump 4
+ pop T1
+ pop T2
+ pop T3
+ pop T4
+ pop r27
+ pop r26
+ /**/
+
+ cpi LoopC, 16
+ brlt sha1_nextBlock_mainloop_core
+ /* update w[s] */
+ ldi tmp1, 2*4
+ rcall 1f
+ ldi tmp1, 8*4
+ rcall 1f
+ ldi tmp1, 13*4
+ rcall 1f
+ rjmp 2f
+1: /* this might be "outsourced" to save the jump above */
+ add tmp1, S
+ andi tmp1, 0x3f
+ movw r26, W1
+ add r26, tmp1
+ adc r27, xNULL
+ ld tmp2, X+
+ eor T1, tmp2
+ ld tmp2, X+
+ eor T2, tmp2
+ ld tmp2, X+
+ eor T3, tmp2
+ ld tmp2, X+
+ eor T4, tmp2
+ ret
+2: /* now we just hav to do a ROTL(T) and save T back */
+ mov tmp2, T4
+ rol tmp2
+ rol T1
+ rol T2
+ rol T3
+ rol T4
+ movw r26, W1
+ add r26, S
+ adc r27, xNULL
+ st X+, T1
+ st X+, T2
+ st X+, T3
+ st X+, T4
+
+sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/
+ /* T already contains w[s] */
+ movw r26, W1
+ sbiw r26, 4*1 /* X points at a[4] aka e */
+ ld tmp1, X+
+ add T1, tmp1
+ ld tmp1, X+
+ adc T2, tmp1
+ ld tmp1, X+
+ adc T3, tmp1
+ ld tmp1, X+
+ adc T4, tmp1 /* T = w[s]+e */
+ sbiw r26, 4*5 /* X points at a[0] aka a */
+ ld F1, X+
+ ld F2, X+
+ ld F3, X+
+ ld F4, X+
+ mov tmp1, F4 /* X points at a[1] aka b */
+ ldi tmp2, 5
+1:
+ rol tmp1
+ rol F1
+ rol F2
+ rol F3
+ rol F4
+ dec tmp2
+ brne 1b
+
+ add T1, F1
+ adc T2, F2
+ adc T3, F3
+ adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
+
+ /* now we have to do this fucking conditional stuff */
+ ldi r30, lo8(sha1_nextBlock_xTable)
+ ldi r31, hi8(sha1_nextBlock_xTable)
+ add r30, xtmp
+ adc r31, xNULL
+ lpm tmp1, Z
+ cp tmp1, LoopC
+ brne 1f
+ inc xtmp
+1: ldi r30, lo8(sha1_nextBlock_KTable)
+ ldi r31, hi8(sha1_nextBlock_KTable)
+ lsl xtmp
+ lsl xtmp
+ add r30, xtmp
+ adc r31, xNULL
+ lsr xtmp
+ lsr xtmp
+
+ lpm tmp1, Z+
+ add T1, tmp1
+ lpm tmp1, Z+
+ adc T2, tmp1
+ lpm tmp1, Z+
+ adc T3, tmp1
+ lpm tmp1, Z+
+ adc T4, tmp1
+ /* T = ROTL(a,5) + e + kt + w[s] */
+
+ /* Z-4 is just pointing to kt ... */
+ movw r28, r26 /* copy X in Y */
+ adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
+ lsr r31
+ ror r30
+
+ icall
+ mov F1, tmp1
+ icall
+ mov F2, tmp1
+ icall
+ mov F3, tmp1
+ icall
+
+ add T1, F1
+ adc T2, F2
+ adc T3, F3
+ adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
+ /* X points still at a[1] aka b, Y points at a[2] aka c */
+ /* update a[] */
+sha1_nextBlock_update_a:
+ /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
+ //adiw r28, 3*4 /* Y should point at a[4] aka e */
+ movw r28, W1
+ sbiw r28, 4
+
+ ldi tmp2, 4*4
+1:
+ ld tmp1, -Y
+ std Y+4, tmp1
+ dec tmp2
+ brne 1b
+ /* Y points at a[0] aka a*/
+
+ movw r28, W1
+ sbiw r28, 5*4
+ /* store T in a[0] aka a */
+ st Y+, T1
+ st Y+, T2
+ st Y+, T3
+ st Y+, T4
+ /* Y points at a[1] aka b*/
+
+ /* rotate c */
+ ldd T1, Y+1*4
+ ldd T2, Y+1*4+1
+ ldd T3, Y+1*4+2
+ ldd T4, Y+1*4+3
+ mov tmp1, T1
+ ldi tmp2, 2
+1: ror tmp1
+ ror T4
+ ror T3
+ ror T2
+ ror T1
+ dec tmp2
+ brne 1b
+ std Y+1*4+0, T1
+ std Y+1*4+1, T2
+ std Y+1*4+2, T3
+ std Y+1*4+3, T4
+
+ push r27
+ push r26
+ movw r26, W1
+ sbiw r26, 4*5
+ dbg_hexdump 4*5
+ pop r26
+ pop r27
+
+ inc LoopC
+ cpi LoopC, 80
+ brge 1f
+ rjmp sha1_nextBlock_mainloop
+/**************************************/
+1:
+ /* littel patch */
+ sbiw r28, 4
+
+/* add a[] to state and inc length */
+ pop r27
+ pop r26 /* now X points to state (and Y still at a[0]) */
+ ldi tmp4, 5
+1: clc
+ ldi tmp3, 4
+2: ld tmp1, X
+ ld tmp2, Y+
+ adc tmp1, tmp2
+ st X+, tmp1
+ dec tmp3
+ brne 2b
+ dec tmp4
+ brne 1b
+
+ /* now length += 512 */
+ adiw r26, 1 /* we skip the least significant byte */
+ ld tmp1, X
+ ldi tmp2, hi8(512) /* 2 */
+ add tmp1, tmp2
+ st X+, tmp1
+ ldi tmp2, 6
+1:
+ ld tmp1, X
+ adc tmp1, xNULL
+ st X+, tmp1
+ dec tmp2
+ brne 1b
+
+; EPILOG
+sha1_nextBlock_epilog:
+/* now we should clean up the stack */
+ pop r21
+ pop r20
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ clr r1
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ ret
+
+sha1_nextBlock_xTable:
+.byte 20,40,60,0
+sha1_nextBlock_KTable:
+.int 0x5a827999
+.int 0x6ed9eba1
+.int 0x8f1bbcdc
+.int 0xca62c1d6
+sha1_nextBlock_JumpTable:
+rjmp sha1_nextBlock_Ch
+ nop
+rjmp sha1_nextBlock_Parity
+ nop
+rjmp sha1_nextBlock_Maj
+ nop
+rjmp sha1_nextBlock_Parity
+
+ /* X and Y still point at a[1] aka b ; return value in tmp1 */
+sha1_nextBlock_Ch:
+ ld tmp1, Y+
+ mov tmp2, tmp1
+ com tmp2
+ ldd tmp3, Y+3 /* load from c */
+ and tmp1, tmp3
+ ldd tmp3, Y+7 /* load from d */
+ and tmp2, tmp3
+ eor tmp1, tmp2
+ ret
+
+sha1_nextBlock_Maj:
+ ld tmp1, Y+
+ mov tmp2, tmp1
+ ldd tmp3, Y+3 /* load from c */
+ and tmp1, tmp3
+ ldd tmp4, Y+7 /* load from d */
+ and tmp2, tmp4
+ eor tmp1, tmp2
+ and tmp3, tmp4
+ eor tmp1, tmp3
+ ret
+
+sha1_nextBlock_Parity:
+ ld tmp1, Y+
+ ldd tmp2, Y+3 /* load from c */
+ eor tmp1, tmp2
+ ldd tmp2, Y+7 /* load from d */
+ eor tmp1, tmp2
+ ret
+/*
+ch_str: .asciz "\r\nCh"
+maj_str: .asciz "\r\nMaj"
+parity_str: .asciz "\r\nParity"
+*/
+;###########################################################
+
+.global sha1_init
+;void sha1_init(sha1_ctx_t *state){
+; DEBUG_S("\r\nSHA1_INIT");
+; state->h[0] = 0x67452301;
+; state->h[1] = 0xefcdab89;
+; state->h[2] = 0x98badcfe;
+; state->h[3] = 0x10325476;
+; state->h[4] = 0xc3d2e1f0;
+; state->length = 0;
+;}
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha1_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8((sha1_init_vector))
+ ldi r31, hi8((sha1_init_vector))
+ ldi r22, 5*4 /* bytes to copy */
+sha1_init_vloop:
+ lpm r23, Z+
+ st X+, r23
+ dec r22
+ brne sha1_init_vloop
+ ldi r22, 8
+sha1_init_lloop:
+ st X+, r1
+ dec r22
+ brne sha1_init_lloop
+ ret
+
+sha1_init_vector:
+.int 0x67452301;
+.int 0xefcdab89;
+.int 0x98badcfe;
+.int 0x10325476;
+.int 0xc3d2e1f0;
+
diff --git a/shacal1/sha1.h b/shacal1/sha1.h
new file mode 100644
index 0000000..6675d20
--- /dev/null
+++ b/shacal1/sha1.h
@@ -0,0 +1,117 @@
+/* sha1.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/**
+ * \file sha1.h
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2006-10-08
+ * \license GPLv3 or later
+ * \brief SHA-1 declaration.
+ * \ingroup SHA-1
+ *
+ */
+
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include
+/** \def SHA1_HASH_BITS
+ * definees the size of a SHA-1 hash in bits
+ */
+
+/** \def SHA1_HASH_BYTES
+ * definees the size of a SHA-1 hash in bytes
+ */
+
+/** \def SHA1_BLOCK_BITS
+ * definees the size of a SHA-1 input block in bits
+ */
+
+/** \def SHA1_BLOCK_BYTES
+ * definees the size of a SHA-1 input block in bytes
+ */
+#define SHA1_HASH_BITS 160
+#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
+#define SHA1_BLOCK_BITS 512
+#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
+
+/** \typedef sha1_ctx_t
+ * \brief SHA-1 context type
+ *
+ * A vatiable of this type may hold the state of a SHA-1 hashing process
+ */
+typedef struct {
+ uint32_t h[5];
+ uint64_t length;
+} sha1_ctx_t;
+
+/** \typedef sha1_hash_t
+ * \brief hash value type
+ * A variable of this type may hold a SHA-1 hash value
+ */
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+/** \fn sha1_init(sha1_ctx_t *state)
+ * \brief initializes a SHA-1 context
+ * This function sets a ::sha1_ctx_t variable to the initialization vector
+ * for SHA-1 hashing.
+ * \param state pointer to the SHA-1 context variable
+ */
+void sha1_init(sha1_ctx_t *state);
+
+/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
+ * \brief process one input block
+ * This function processes one input block and updates the hash context
+ * accordingly
+ * \param state pointer to the state variable to update
+ * \param block pointer to the message block to process
+ */
+void sha1_nextBlock (sha1_ctx_t *state, const void* block);
+
+/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
+ * \brief processes the given block and finalizes the context
+ * This function processes the last block in a SHA-1 hashing process.
+ * The block should have a maximum length of a single input block.
+ * \param state pointer to the state variable to update and finalize
+ * \param block pointer to themessage block to process
+ * \param length_b length of the message block in bits
+ */
+void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
+
+/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
+ * \brief convert a state variable into an actual hash value
+ * Writes the hash value corresponding to the state to the memory pointed by dest.
+ * \param dest pointer to the hash value destination
+ * \param state pointer to the hash context
+ */
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+
+/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
+ * \brief hashing a message which in located entirely in RAM
+ * This function automatically hashes a message which is entirely in RAM with
+ * the SHA-1 hashing algorithm.
+ * \param dest pointer to the hash value destination
+ * \param msg pointer to the message which should be hashed
+ * \param length_b length of the message in bits
+ */
+void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
+
+
+
+#endif /*SHA1_H_*/
diff --git a/shacal1_enc.c b/shacal1/shacal1_enc.c
similarity index 100%
rename from shacal1_enc.c
rename to shacal1/shacal1_enc.c
diff --git a/shacal1_enc.h b/shacal1/shacal1_enc.h
similarity index 100%
rename from shacal1_enc.h
rename to shacal1/shacal1_enc.h
diff --git a/shacal2/sha256-asm.S b/shacal2/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/shacal2/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/*
+ * Author: Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+ /* push r18 - r27, r30 - r31*/
+ push r0
+ push r1
+ push r18
+ push r19
+ push r20
+ push r21
+ push r22
+ push r23
+ push r24
+ push r25
+ push r26
+ push r27
+ push r30
+ push r31
+ clr r1
+.endm
+
+.macro postcall
+ pop r31
+ pop r30
+ pop r27
+ pop r26
+ pop r25
+ pop r24
+ pop r23
+ pop r22
+ pop r21
+ pop r20
+ pop r19
+ pop r18
+ pop r1
+ pop r0
+.endm
+
+
+.macro hexdump length
+ push r27
+ push r26
+ ldi r25, '\r'
+ mov r24, r25
+ call uart_putc
+ ldi r25, '\n'
+ mov r24, r25
+ call uart_putc
+ pop r26
+ pop r27
+ movw r24, r26
+.if \length > 16
+ ldi r22, lo8(16)
+ ldi r23, hi8(16)
+ push r27
+ push r26
+ call uart_hexdump
+ pop r26
+ pop r27
+ adiw r26, 16
+ hexdump \length-16
+.else
+ ldi r22, lo8(\length)
+ ldi r23, hi8(\length)
+ call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+ precall
+ hexdump \length
+ postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+; param1: the 16-bit destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to sha256_ctx structure
+; given in r23,r22
+sha256_ctx2hash:
+ movw r26, r22
+ movw r30, r24
+ ldi r21, 8
+ sbiw r26, 4
+1:
+ ldi r20, 4
+ adiw r26, 8
+2:
+ ld r0, -X
+ st Z+, r0
+ dec r20
+ brne 2b
+
+ dec r21
+ brne 1b
+
+ ret
+
+;###########################################################
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+; param1: the 16-bit hash destination pointer
+; given in r25,r24 (r25 is most significant)
+; param2: the 16-bit pointer to message
+; given in r23,r22
+; param3: 32-bit length value (length of message in bits)
+; given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r16
+ push r17
+ in r16, SPL
+ in r17, SPH
+ subi r16, 8*4+8
+ sbci r17, 0
+ in r0, SREG
+ cli
+ out SPL, r16
+ out SPH, r17
+ out SREG, r0
+
+ push r25
+ push r24
+ inc r16
+ adc r17, r1
+
+ movw r8, r18 /* backup of length*/
+ movw r10, r20
+
+ movw r12, r22 /* backup pf msg-ptr */
+
+ movw r24, r16
+ rcall sha256_init
+ /* if length >= 512 */
+1:
+ tst r11
+ brne 4f
+ tst r10
+ brne 4f
+ mov r19, r9
+ cpi r19, 0x02
+ brlo 4f
+
+ movw r24, r16
+ movw r22, r12
+ rcall sha256_nextBlock
+ ldi r19, 0x64
+ add r22, r19
+ adc r23, r1
+ /* length -= 512 */
+ ldi r19, 0x02
+ sub r9, r19
+ sbc r10, r1
+ sbc r11, r1
+ rjmp 1b
+
+4:
+ movw r24, r16
+ movw r22, r12
+ movw r20, r8
+ rcall sha256_lastBlock
+
+ pop r24
+ pop r25
+ movw r22, r16
+ rcall sha256_ctx2hash
+
+sha256_epilog:
+ in r30, SPL
+ in r31, SPH
+ adiw r30, 8*4+8
+ in r0, SREG
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG, r0
+ pop r17
+ pop r16
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ ret
+
+;###########################################################
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+; param3: an 16-bit integer specifing length of block in bits
+; given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+ cpi r21, 0x02
+ brlo sha256_lastBlock_prolog
+ push r25
+ push r24
+ push r23
+ push r22
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r22
+ pop r23
+ pop r24
+ pop r25
+ subi r21, 0x02
+ subi r23, -2
+ rjmp sha256_lastBlock
+sha256_lastBlock_prolog:
+ /* allocate space on stack */
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ subi r30, lo8(64)
+ sbci r31, hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+
+ adiw r30, 1 /* SP points to next free byte on stack */
+ mov r18, r20 /* r20 = LSB(length) */
+ lsr r18
+ lsr r18
+ lsr r18
+ bst r21, 0 /* may be we should explain this ... */
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
+
+
+ movw r26, r22 /* X points to begin of msg */
+ tst r18
+ breq sha256_lastBlock_post_copy
+ mov r1, r18
+sha256_lastBlock_copy_loop:
+ ld r0, X+
+ st Z+, r0
+ dec r1
+ brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:
+sha256_lastBlock_insert_stuffing_bit:
+ ldi r19, 0x80
+ mov r0,r19
+ ldi r19, 0x07
+ and r19, r20 /* if we are in bitmode */
+ breq 2f /* no bitmode */
+1:
+ lsr r0
+ dec r19
+ brne 1b
+ ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+ or r0, r19
+2:
+ st Z+, r0
+ inc r18
+
+/* checking stuff here */
+ cpi r18, 64-8+1
+ brsh 0f
+ rjmp sha256_lastBlock_insert_zeros
+0:
+ /* oh shit, we landed here */
+ /* first we have to fill it up with zeros */
+ ldi r19, 64
+ sub r19, r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r19
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r22, r30
+
+ push r31
+ push r30
+ push r25
+ push r24
+ push r21
+ push r20
+ rcall sha256_nextBlock
+ pop r20
+ pop r21
+ pop r24
+ pop r25
+ pop r30
+ pop r31
+
+ /* now we should subtract 512 from length */
+ movw r26, r24
+ adiw r26, 4*8+1 /* we can skip the lowest byte */
+ ld r19, X
+ subi r19, hi8(512)
+ st X+, r19
+ ldi r18, 6
+1:
+ ld r19, X
+ sbci r19, 0
+ st X+, r19
+ dec r18
+ brne 1b
+
+; clr r18 /* not neccessary ;-) */
+ /* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:
+ ldi r19, 64-8
+ sub r19, r18
+ breq sha256_lastBlock_insert_length
+ clr r1
+1:
+ st Z+, r1 /* r1 is still zero */
+ dec r19
+ brne 1b
+
+; rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+ movw r26, r24 /* X points to state */
+ adiw r26, 8*4 /* X points to (state.length) */
+ adiw r30, 8 /* Z points one after the last byte of block */
+ ld r0, X+
+ add r0, r20
+ st -Z, r0
+ ld r0, X+
+ adc r0, r21
+ st -Z, r0
+ ldi r19, 6
+1:
+ ld r0, X+
+ adc r0, r1
+ st -Z, r0
+ dec r19
+ brne 1b
+
+ sbiw r30, 64-8
+ movw r22, r30
+ rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+ in r30, SPL
+ in r31, SPH
+ in r1, SREG
+ adiw r30, 63 ; lo8(64)
+ adiw r30, 1 ; hi8(64)
+ cli
+ out SPL, r30
+ out SPH, r31
+ out SREG,r1
+ clr r1
+ clr r0
+ ret
+
+/**/
+;###########################################################
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+; param1: the 16-bit pointer to sha256_ctx structure
+; given in r25,r24 (r25 is most significant)
+; param2: an 16-bit pointer to 64 byte block to hash
+; given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1 = 4
+T2 = 5
+T3 = 6
+T4 = 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+ push r4 /* replace push & pop by mem ops? */
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r20, SPL
+ in r21, SPH
+ movw r18, r20 ;backup SP
+; movw r26, r20 ; X points to free space on stack
+ movw r30, r22 ; Z points to message
+ subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+ sbci r21, hi8(sha256_nextBlock_localSpace)
+ movw r26, r20 ; X points to free space on stack
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+ push r18
+ push r19
+ push r24
+ push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ adiw r26, 1 ; X++
+ ldi r20, 16
+sha256_nextBlock_wcpyloop:
+ ld r23, Z+
+ ld r22, Z+
+ ld r19, Z+
+ ld r18, Z+
+ st X+, r18
+ st X+, r19
+ st X+, r22
+ st X+, r23
+ dec r20
+ brne sha256_nextBlock_wcpyloop
+/* for (i=16; i<64; ++i){
+ w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
+ } */
+ /* r25,r24,r23,r24 (r21,r20) are function values
+ r19,r18,r17,r16 are the accumulator
+ r15,r14,r13,rBck1 are backup1
+ r11,r10,r9 ,r8 are xor accu
+ r1 is round counter */
+
+ ldi r20, 64-16
+ mov LoopC, r20
+sha256_nextBlock_wcalcloop:
+ movw r30, r26 ; cp X to Z
+ sbiw r30, 63
+ sbiw r30, 1 ; substract 64 = 16*4
+ ld Accu1, Z+
+ ld Accu2, Z+
+ ld Accu3, Z+
+ ld Accu4, Z+ /* w[i] = w[i-16] */
+ ld Bck1, Z+
+ ld Bck2, Z+
+ ld Bck3, Z+
+ ld Bck4, Z+ /* backup = w[i-15] */
+ /* now sigma 0 */
+ mov Func1, Bck2
+ mov Func2, Bck3
+ mov Func3, Bck4
+ mov Func4, Bck1 /* prerotated by 8 */
+ ldi r20, 1
+ rcall bitrotl
+ movw XAccu1, Func1
+ movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ ror Bck1
+ dec Func2
+ brne sigma0_shr
+ eor XAccu1, Bck1
+ eor XAccu2, Bck2
+ eor XAccu3, Bck3
+ eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ ldd Func1, Z+7*4 /* now accu += w[i-7] */
+ ldd Func2, Z+7*4+1
+ ldd Func3, Z+7*4+2
+ ldd Func4, Z+7*4+3
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+ ldd Bck2, Z+12*4+1
+ ldd Bck3, Z+12*4+2
+ ldd Bck4, Z+12*4+3
+ /* now sigma 1 */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 1
+ rcall bitrotr
+ movw XAccu3, Func3
+ movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */
+; movw Func1, Bck3
+; movw Func3, Bck1 /* prerotated by 16 */
+ ldi r20, 2
+ rcall bitrotr
+ eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+ lsr Bck4
+ ror Bck3
+ ror Bck2
+ dec Func2
+ brne sigma1_shr
+ eor XAccu1, Bck2
+ eor XAccu2, Bck3
+ eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+ add Accu1, XAccu1
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4 /* finished with sigma0 */
+ /* now let's store the shit */
+ st X+, Accu1
+ st X+, Accu2
+ st X+, Accu3
+ st X+, Accu4
+ dec LoopC
+ breq 3f ; skip if zero
+ rjmp sha256_nextBlock_wcalcloop
+3:
+ /* we are finished with w array X points one byte post w */
+/* init a array */
+ pop r31
+ pop r30
+ push r30
+ push r31
+ ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:
+ ld r1, Z+
+ st X+, r1
+ dec r25
+ brne init_a_array
+
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+ t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+ t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+ memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
+ a[4] += t1;
+ a[0] = t1 + t2;
+ } */
+ /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+ sbiw r26, 8*4 /* X still points at a[7]+1*/
+ movw r28, r26
+ ldi r30, lo8(sha256_kv)
+ ldi r31, hi8(sha256_kv)
+ dec r27 /* X - (64*4 == 256) */
+ ldi r25, 64
+ mov LoopC, r25
+sha256_main_loop:
+ /* now calculate t1 */
+ /*CH(x,y,z) = (x&y)^((~x)&z)*/
+ ldd T1, Y+5*4
+ ldd T2, Y+5*4+1
+ ldd T3, Y+5*4+2
+ ldd T4, Y+5*4+3 /* y in T */
+ ldd Func1, Y+4*4
+ ldd Func2, Y+4*4+1
+ ldd Func3, Y+4*4+2
+ ldd Func4, Y+4*4+3 /* x in Func */
+ ldd Bck1, Y+6*4
+ ldd Bck2, Y+6*4+1
+ ldd Bck3, Y+6*4+2
+ ldd Bck4, Y+6*4+3 /* z in Bck */
+ and T1, Func1
+ and T2, Func2
+ and T3, Func3
+ and T4, Func4
+ com Func1
+ com Func2
+ com Func3
+ com Func4
+ and Bck1, Func1
+ and Bck2, Func2
+ and Bck3, Func3
+ and Bck4, Func4
+ eor T1, Bck1
+ eor T2, Bck2
+ eor T3, Bck3
+ eor T4, Bck4 /* done, CH(x,y,z) is in T */
+ /* now SIGMA1(a[4]) */
+ ldd Bck4, Y+4*4 /* think about using it from Func reg above*/
+ ldd Bck1, Y+4*4+1
+ ldd Bck2, Y+4*4+2
+ ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotl /* rotr(x,6) */
+ movw XAccu1, Func1
+ movw XAccu3, Func3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 3
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4
+ movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+ movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+ ldi r20, 1
+ rcall bitrotr /* rotr(x,11) */
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4
+ /* now we've to add a[7], w[i] and k[i] */
+ ldd XAccu1, Y+4*7
+ ldd XAccu2, Y+4*7+1
+ ldd XAccu3, Y+4*7+2
+ ldd XAccu4, Y+4*7+3
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add a[7] */
+ ld XAccu1, X+
+ ld XAccu2, X+
+ ld XAccu3, X+
+ ld XAccu4, X+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add w[i] */
+ lpm XAccu1, Z+
+ lpm XAccu2, Z+
+ lpm XAccu3, Z+
+ lpm XAccu4, Z+
+ add T1, XAccu1
+ adc T2, XAccu2
+ adc T3, XAccu3
+ adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+ /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+ /* starting with MAJ(x,y,z) */
+ ldd Func1, Y+4*0+0
+ ldd Func2, Y+4*0+1
+ ldd Func3, Y+4*0+2
+ ldd Func4, Y+4*0+3 /* load x=a[0] */
+ ldd XAccu1, Y+4*1+0
+ ldd XAccu2, Y+4*1+1
+ ldd XAccu3, Y+4*1+2
+ ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+ and XAccu1, Func1
+ and XAccu2, Func2
+ and XAccu3, Func3
+ and XAccu4, Func4 /* XAccu == (x & y) */
+ ldd Bck1, Y+4*2+0
+ ldd Bck2, Y+4*2+1
+ ldd Bck3, Y+4*2+2
+ ldd Bck4, Y+4*2+3 /* load z=a[2] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */
+ ldd Func1, Y+4*1+0
+ ldd Func2, Y+4*1+1
+ ldd Func3, Y+4*1+2
+ ldd Func4, Y+4*1+3 /* load y=a[1] */
+ and Func1, Bck1
+ and Func2, Bck2
+ and Func3, Bck3
+ and Func4, Bck4
+ eor XAccu1, Func1
+ eor XAccu2, Func2
+ eor XAccu3, Func3
+ eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+ /* SIGMA0(a[0]) */
+ ldd Bck1, Y+4*0+0 /* we should combine this with above */
+ ldd Bck2, Y+4*0+1
+ ldd Bck3, Y+4*0+2
+ ldd Bck4, Y+4*0+3
+ movw Func1, Bck1
+ movw Func3, Bck3
+ ldi r20, 2
+ rcall bitrotr
+ movw Accu1, Func1
+ movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+ movw Func1, Bck3
+ movw Func3, Bck1 /* prerotate by 16 bits */
+ ldi r20, 3
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+ mov Func1, Bck4
+ mov Func2, Bck1
+ mov Func3, Bck2
+ mov Func4, Bck3 /* prerotate by 24 bits */
+ ldi r20, 2
+ rcall bitrotl
+ eor Accu1, Func1
+ eor Accu2, Func2
+ eor Accu3, Func3
+ eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+ add Accu1, XAccu1 /* add previous result (MAJ)*/
+ adc Accu2, XAccu2
+ adc Accu3, XAccu3
+ adc Accu4, XAccu4
+ /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+ /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+ ldi r21, 7*4
+ adiw r28, 7*4
+a_shift_loop:
+ ld r25, -Y /* warning: this is PREdecrement */
+ std Y+4, r25
+ dec r21
+ brne a_shift_loop
+
+ ldd Bck1, Y+4*4+0
+ ldd Bck2, Y+4*4+1
+ ldd Bck3, Y+4*4+2
+ ldd Bck4, Y+4*4+3
+ add Bck1, T1
+ adc Bck2, T2
+ adc Bck3, T3
+ adc Bck4, T4
+ std Y+4*4+0, Bck1
+ std Y+4*4+1, Bck2
+ std Y+4*4+2, Bck3
+ std Y+4*4+3, Bck4
+ add Accu1, T1
+ adc Accu2, T2
+ adc Accu3, T3
+ adc Accu4, T4
+ std Y+4*0+0, Accu1
+ std Y+4*0+1, Accu2
+ std Y+4*0+2, Accu3
+ std Y+4*0+3, Accu4 /* a array updated */
+
+
+ dec LoopC
+ breq update_state
+ rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:
+ /* update state */
+ /* pointers to state should still exist on the stack ;-) */
+ pop r31
+ pop r30
+ ldi r21, 8
+update_state_loop:
+ ldd Accu1, Z+0
+ ldd Accu2, Z+1
+ ldd Accu3, Z+2
+ ldd Accu4, Z+3
+ ld Func1, Y+
+ ld Func2, Y+
+ ld Func3, Y+
+ ld Func4, Y+
+ add Accu1, Func1
+ adc Accu2, Func2
+ adc Accu3, Func3
+ adc Accu4, Func4
+ st Z+, Accu1
+ st Z+, Accu2
+ st Z+, Accu3
+ st Z+, Accu4
+ dec r21
+ brne update_state_loop
+ /* now we just have to update the length */
+ adiw r30, 1 /* since we add 512, we can simply skip the LSB */
+ ldi r21, 2
+ ldi r22, 6
+ ld r20, Z
+ add r20, r21
+ st Z+, r20
+ clr r21
+sha256_nextBlock_fix_length:
+ brcc sha256_nextBlock_epilog
+ ld r20, Z
+ adc r20, r21
+ st Z+, r20
+ dec r22
+ brne sha256_nextBlock_fix_length
+
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+
+ pop r21
+ pop r20
+ in r0, SREG
+ cli ; we want to be uninterrupted while updating SP
+ out SPL, r20
+ out SPH, r21
+ out SREG, r0
+
+ clr r1
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop r7
+ pop r6
+ pop r5
+ pop r4
+ ret
+
+sha256_kv: ; round-key-vector stored in ProgMem
+.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+
+;###########################################################
+
+.global sha256_init
+;uint32_t sha256_init_vector[]={
+; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+; state->length=0;
+; memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1
+ ldi r30, lo8((sha256_init_vector))
+ ldi r31, hi8((sha256_init_vector))
+ ldi r22, 32+8
+sha256_init_vloop:
+ lpm r23, Z+
+ st X+, r23
+ dec r22
+ brne sha256_init_vloop
+ ret
+
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67
+.word 0xF372, 0x3C6E
+.word 0xF53A, 0xA54F
+.word 0x527F, 0x510E
+.word 0x688C, 0x9B05
+.word 0xD9AB, 0x1F83
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,r22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotl32:
+ cpi r20, 8
+ brlo bitrotl
+ mov r21, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ subi r20, 8
+ rjmp rotl32
+bitrotl:
+ clr r21
+ clc
+bitrotl_loop:
+ tst r20
+ breq fixrotl
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ rol r21
+ dec r20
+ rjmp bitrotl_loop
+fixrotl:
+ or r22, r21
+ ret
+
+
+;###########################################################
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+; param1: the 32-bit word to rotate
+; given in r25,r24,r23,22 (r25 is most significant)
+; param2: an 8-bit value telling how often to rotate
+; given in r20
+; modifys: r21, r22
+rotr32:
+ cpi r20, 8
+ brlo bitrotr
+ mov r21, r22
+ mov r22, r23
+ mov r23, r24
+ mov r24, r25
+ mov r25, r21
+ subi r20, 8
+ rjmp rotr32
+bitrotr:
+ clr r21
+ clc
+bitrotr_loop:
+ tst r20
+ breq fixrotr
+ ror r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ dec r20
+ rjmp bitrotr_loop
+fixrotr:
+ or r25, r21
+ ret
+
+
+;###########################################################
+
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+; param1: the 32-bit word
+; given in r25,r24,r23,22 (r25 is most significant)
+; modifys: r21, r22
+change_endian32:
+ movw r20, r22 ; (r22,r23) --> (r20,r21)
+ mov r22, r25
+ mov r23, r24
+ mov r24, r21
+ mov r25, r20
+ ret
+
diff --git a/shacal2/sha256.h b/shacal2/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/shacal2/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+/**
+ * \file sha256.h
+ * \author Daniel Otte
+ * \date 2006-05-16
+ * \license GPLv3 or later
+ *
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS 256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ *
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+ uint32_t h[8];
+ uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ *
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ *
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ *
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block
+ *
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ *
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/shacal2_enc.c b/shacal2/shacal2_enc.c
similarity index 100%
rename from shacal2_enc.c
rename to shacal2/shacal2_enc.c
diff --git a/shacal2_enc.h b/shacal2/shacal2_enc.h
similarity index 100%
rename from shacal2_enc.h
rename to shacal2/shacal2_enc.h
diff --git a/skipjack.c b/skipjack/skipjack.c
similarity index 100%
rename from skipjack.c
rename to skipjack/skipjack.c
diff --git a/skipjack.h b/skipjack/skipjack.h
similarity index 100%
rename from skipjack.h
rename to skipjack/skipjack.h
diff --git a/test_src/main-hmac-md5-test.c b/test_src/main-hmac-md5-test.c
index dfbcead..f61e119 100644
--- a/test_src/main-hmac-md5-test.c
+++ b/test_src/main-hmac-md5-test.c
@@ -28,9 +28,10 @@
#include "md5.h"
#include "hmac-md5.h"
+/*
#include "base64_enc.h"
#include "base64_dec.h"
-
+*/
#include "nessie_mac_test.h"
#include
@@ -109,6 +110,7 @@ void strhexdump(char* dest, void* src, uint16_t length){
}
}
+/*
void cram_md5_interactive(void){
char key[101];
char msg_b64[101];
@@ -137,9 +139,9 @@ void cram_md5_interactive(void){
cli_putstr_P(PSTR("\r\nresponse: "));
cli_hexdump(hmac, HMAC_MD5_BYTES);
cli_putstr_P(PSTR("\r\nresponse (b64): "));
- cli_putstr(msg_b64);
-
+ cli_putstr(msg_b64);
}
+*/
void md5_interactive(void){
@@ -164,7 +166,7 @@ const char test_str[] PROGMEM = "test";
/* const char performance_str[] PROGMEM = "performance"; */
const char echo_str[] PROGMEM = "echo";
const char hmd5i_str[] PROGMEM = "hmac-md5";
-const char crammd5i_str[] PROGMEM = "cram-md5";
+/* const char crammd5i_str[] PROGMEM = "cram-md5"; */
const char md5i_str[] PROGMEM = "md5";
@@ -172,7 +174,7 @@ cmdlist_entry_t cmdlist[] PROGMEM = {
{ nessie_str, NULL, testrun_nessie_hmacmd5},
{ test_str, NULL, testrun_test_hmacmd5},
{ hmd5i_str, NULL, hmacmd5_interactive},
- { crammd5i_str, NULL, cram_md5_interactive},
+/* { crammd5i_str, NULL, cram_md5_interactive}, */
{ md5i_str, NULL, md5_interactive},
/* { performance_str, NULL, testrun_performance_hmacmd5}, */
{ echo_str, (void*)1, (void_fpt)echo_ctrl},
diff --git a/trivium.c b/trivium/trivium.c
similarity index 100%
rename from trivium.c
rename to trivium/trivium.c
diff --git a/trivium.h b/trivium/trivium.h
similarity index 100%
rename from trivium.h
rename to trivium/trivium.h
diff --git a/xtea-asm.S b/xtea/xtea-asm.S
similarity index 100%
rename from xtea-asm.S
rename to xtea/xtea-asm.S
diff --git a/xtea.c b/xtea/xtea.c
similarity index 100%
rename from xtea.c
rename to xtea/xtea.c
diff --git a/xtea.h b/xtea/xtea.h
similarity index 100%
rename from xtea.h
rename to xtea/xtea.h