new MD5 ins ASM with C (working on pure ASM implementation) plus enhancments in asm Macros

small changes in MD5 C-implementation (migth be a little faster)
little bug fixed in Nessie-Hash-Test
This commit is contained in:
bg 2008-11-25 01:23:22 +00:00
parent 7c4486d332
commit 5ac75cfae2
16 changed files with 769 additions and 68 deletions

View File

@ -14,15 +14,18 @@ include mkfiles/*.mk
ALGORITHMS = $(BLOCK_CIPHERS) $(STREAM_CIPHERS) $(HASHES) $(PRNGS) $(MACS)
ALGORITHMS_OBJ = $(patsubst %,%_OBJ, $(ALGORITHMS))
ALGORITHMS_TEST_BIN = $(patsubst %,%_TEST_BIN, $(ALGORITHMS))
define OBJinBINDIR_TEMPLATE
$(1) = $(2)
endef
$(foreach a, $(ALGORITHMS_OBJ), $(eval $(call OBJinBINDIR_TEMPLATE, $(a), $(patsubst %.o,$(BIN_DIR)%.o,$($(a))))))
ALGORITHMS_TEST_BIN = $(patsubst %,%_TEST_BIN, $(ALGORITHMS))
$(foreach a, $(ALGORITHMS_TEST_BIN), $(eval $(call OBJinBINDIR_TEMPLATE, $(a), $(patsubst %.o,$(TESTBIN_DIR)%.o,$($(a))))))
ALGORITHMS_TEST_BIN_IMM = $(foreach a, $(ALGORITHMS_TEST_BIN), $($(a)))
#ALGORITHMS_TEST_BIN_IMM = $(foreach a, $(ALGORITHMS_TEST_BIN), $($(a)))
ALGORITHMS_NESSIE_TEST = $(patsubst %,%_NESSIE_TEST, $(ALGORITHMS))
ALGORITHMS_PERFORMANCE_TEST = $(patsubst %,%_PERORMANCE_TEST, $(ALGORITHMS))
@ -73,7 +76,7 @@ info:
@echo " $(MACS)"
@echo " PRNG functions:"
@echo " $(PRNGS)"
# @echo " ALGORITHMS_TEST_BIN"
# @echo " ALGORITHMS_TEST_BIN:"
# @echo " $(ALGORITHMS_TEST_BIN)"
# @echo " ALGORITHMS_TEST_TARGET_ELF:"
# @echo " $(ALGORITHMS_TEST_TARGET_ELF)"
@ -116,6 +119,15 @@ endef
$(foreach algo, $(ALGORITHMS), $(eval $(call OBJ_TEMPLATE, $(algo), $($(algo)_OBJ))))
#-------------------------------------------------------------------------------
define TESTBIN_TEMPLATE
$(1)_TEST_BIN: $(2)
endef
$(foreach algo, $(ALGORITHMS), $(eval $(call TESTBIN_TEMPLATE, $(algo), $($(algo)_TEST_BIN))))
#-------------------------------------------------------------------------------
$(BLOCK_CIPHERS_OBJ): $(patsubst %,%_OBJ, $(BLOCK_CIPHERS))
@ -124,8 +136,6 @@ $(HASHES_OBJ): $(patsubst %,%_OBJ, $(HASHES))
$(PRNGS_OBJ): $(patsubst %,%_OBJ, $(PRNGS))
$(MACS_OBJ): $(patsubst %,%_OBJ, $(MACS))
$(ALGORITHMS_TEST_BIN): $(ALGORITHMS_TEST_BIN_IMM)
#-------------------------------------------------------------------------------
define SIZE_TEMPLATE

View File

@ -16,16 +16,16 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
/*
* File: arcfour.c
* Author: Daniel Otte
* email: daniel.otte@rub.de
* Date: 2006-06-07
* License: GPLv3 or later
* Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
*
*
*/
#include <stdint.h>
#include "arcfour.h"
@ -38,7 +38,7 @@ void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx){
uint16_t x,y=0;
for(x=0; x<= 255; ++x)
ctx->s[x]=x;
for(x=0; x<= 255; ++x){
y += ctx->s[x] + ((uint8_t*)key)[x % length_B];
y &= 0xff;
@ -46,7 +46,7 @@ void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx){
t = ctx->s[y];
ctx->s[y] = ctx->s[x];
ctx->s[x] = t;
}
}
ctx->i = ctx->j = 0;
}

View File

@ -16,29 +16,29 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
/*
* File: arcfour.h
* Author: Daniel Otte
* Date: 2006-06-07
* License: GPLv3+
* Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
* Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
*/
/**
/**
* \file arcfour.h
* \author Daniel Otte
* \date 2006-06-07
* \license GPLv3+
* \brief Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
*
* \brief Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
*
* This header file defines the interface of the ARCFOUR cipher implementation.
*
*
* This implementation aims to be compatible with the ARCFOUR description
* availabe at
* available at
* http://www.mozilla.org/projects/security/pki/nss/draft-kaukonen-cipher-arcfour-03.txt
*/
#ifndef ARCFOUR_H_
#define ARCFOUR_H_
@ -46,46 +46,46 @@
/** \typedef arcfour_ctx_t
* \brief type for arcfour context
*
*
* A variable of this type may contain a complete ARCFOUR context.
* The context is used to store the state of the cipher and gets
* created by the arcfour_init(arcfour_ctx_t *c, uint8_t *key, uint8_t length_B)
* function. The context is of the fixed size of 258 bytes
*/
/** \struct arcfour_ctx_st
* \brief base for ::arcfour_ctx_t
*
*
* The struct holds the two indices and the S-Box
*/
typedef struct arcfour_ctx_st {
uint8_t i,j;
uint8_t s[256];
} arcfour_ctx_t;
/** \fn void arcfour_init(arcfour_ctx_t *ctx, void *key, uint8_t length_B)
* \brief setup a context with a key
*
*
* This function sets up a ::arcfour_ctx_t context using
* the supplied key of the given length.
* \param ctx pointer to the context
* \param key pointer to the key
* \param length_B length of the key in bytes (between 1 and 255)
*/
void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx);
/** \fn uint8_t arcfour_gen(arcfour_ctx_t *ctx)
* \brief generates a byte of keystream
*
*
* This function generates the next byte of keystream
* from the supplied ::arcfour_ctx_t context which is updated acordingly
*
* from the supplied ::arcfour_ctx_t context which is updated accordingly
*
* \param ctx pointer to the context
* \return byte of keystream
*/
uint8_t arcfour_gen(arcfour_ctx_t *ctx);
#endif

View File

@ -67,8 +67,8 @@
sbiw \reg1, \size
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SPL), \reg1
out _SFR_IO_ADDR(SREG), r0
out _SFR_IO_ADDR(SPL), \reg1
.endm
.macro stack_free size:req, reg1=r30, reg2=r31
@ -78,8 +78,8 @@
adiw \reg1, \size
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SPL), \reg1
out _SFR_IO_ADDR(SREG), r0
out _SFR_IO_ADDR(SPL), \reg1
.endm
/*******************************************************************************

View File

@ -1,9 +1,9 @@
OBJ = $(SERPENT_OBJ)
MCU_TARGET = atmega644
OPTIMIZE = -Os
DEFS = -D$(call uc, $(MCU_TARGET))
FLASHCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c avr911 -U flash:w:# no space at the end
#FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end
DEP_DIR = deps/
BIN_DIR = bin/
TESTBIN_DIR = test_bin/
@ -21,7 +21,7 @@ CC = avr-gcc
override CFLAGS = -MMD -MF$(DEP_DIR)$(patsubst %.c,%.d,$(patsubst $(TESTSRC_DIR)%,%,$<)) -I. -gdwarf-2 -pedantic -std=c99 -Wall -Wstrict-prototypes $(OPTIMIZE) -mmcu=$(MCU_TARGET) $(DEFS)
override LDFLAGS = -gdwarf-2 -Wl,-Map,
override ASFLAGS = -mmcu=$(MCU_TARGET) -gdwarf-2
override ASFLAGS = -mmcu=$(MCU_TARGET) -Wa,--gdwarf-2
SIZESTAT_FILE = sizestats.txt

View File

@ -20,13 +20,17 @@
#define __CONFIG_H__
#include <avr/io.h>
#define F_CPU 16000000 /* Oszillator-Frequenz in Hz */
// #define F_CPU 14745600
#define DEBUG uart
/* uart.[ch] defines */
#define UART_INTERRUPT 1
#define UART_BAUD_RATE 38400
#define UART_BAUD_RATE 9600
#define UART_RXBUFSIZE 16
#define UART_TXBUFSIZE 16
#define UART_LINE_BUFFER_SIZE 40
@ -41,8 +45,6 @@
#define UART_CTS_BIT 1
*/
//#define ATMEGA644 /* this is now done by make */
#define CLI_AUTO_HELP
#endif

521
md5-asm.S Normal file
View File

@ -0,0 +1,521 @@
/* md5-asm.S */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Author: Daniel Otte
* License: GPLv3 or later
* Date: 2008-11-15
*/
.include "avr-asm-macros.S"
;###########################################################
; S-BOX
T_table:
.hword 0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c
.hword 0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44
.hword 0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679
.hword 0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6
.hword 0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1
.hword 0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef
.hword 0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d
.hword 0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf
.hword 0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4
.hword 0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a
.hword 0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef
.hword 0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08
.hword 0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86
#define MD5_init_fast
.global md5_init
#ifndef MD5_init_fast
;###########################################################
;void md5_init(md5_ctx_t *state)
; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
; modifys: Z(r30,r31), X(r25,r26)
; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes
md5_init:
movw r26, r24 ; (24,25) --> (26,27) load X with param1
ldi r30, lo8(md5_init_vector)
ldi r31, hi8(md5_init_vector)
ldi r24, 16+4
md5_init_vloop:
lpm r0, Z+
st X+, r0
dec r24
brne md5_init_vloop
ret
md5_init_vector:
.hword 0x2301, 0x6745
.hword 0xAB89, 0xEFCD
.hword 0xDCFE, 0x98BA
.hword 0x5476, 0x1032
.hword 0x0000, 0x0000
#else
;###########################################################
.global md5_init_fast
;void md5_init(md5_ctx_t *state)
; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
; modifys: r23, r22
; cycles = 1+16*3+4*2+4 = 1+48+12 = 61
; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes
md5_init:
md5_init_fast:
movw r26, r24
ldi r24, 0x01
st X+, r24
ldi r24, 0x23
st X+, r24
ldi r24, 0x45
st X+, r24
ldi r24, 0x67
st X+, r24
ldi r24, 0x89
st X+, r24
ldi r24, 0xAB
st X+, r24
ldi r24, 0xCD
st X+, r24
ldi r24, 0xEF
st X+, r24
ldi r24, 0xFE
st X+, r24
ldi r24, 0xDC
st X+, r24
ldi r24, 0xBA
st X+, r24
ldi r24, 0x98
st X+, r24
ldi r24, 0x76
st X+, r24
ldi r24, 0x54
st X+, r24
ldi r24, 0x32
st X+, r24
ldi r24, 0x10
st X+, r24
st X+, r1
st X+, r1
st X+, r1
st X+, r1
ret
#endif
;###########################################################
/*
static
uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
return ((x&y)|((~x)&z));
}
*/
; x: r22-r25
; y: r18-r21
; z: r14-r17
md5_F:
and r18, r22
and r19, r23
and r20, r24
and r21, r25
com r22
com r23
com r24
com r25
and r22, r14
and r23, r15
and r24, r16
and r25, r17
or r22, r18
or r23, r19
or r24, r20
or r25, r21
rjmp md5_core_F_exit
/*
static
uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
return ((x&z)|((~z)&y));
}
*/
; x: r22-r25
; y: r18-r21
; z: r14-r17
md5_G:
and r22, r14
and r23, r15
and r24, r16
and r25, r17
com r14
com r15
com r16
com r17
and r18, r14
and r19, r15
and r20, r16
and r21, r17
or r22, r18
or r23, r19
or r24, r20
or r25, r21
rjmp md5_core_F_exit
/*
static
uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
return (x^y^z);
}
*/
; x: r22-r25
; y: r18-r21
; z: r14-r17
md5_H:
eor r22, r18
eor r22, r14
eor r23, r19
eor r23, r15
eor r24, r20
eor r24, r16
eor r25, r21
eor r25, r17
rjmp md5_core_F_exit
/*
static
uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
return (y ^ (x | (~z)));
}
*/
jump_table:
rjmp md5_F
rjmp md5_G
rjmp md5_H
; rjmp md5_I
; x: r22-r25
; y: r18-r21
; z: r14-r17
md5_I:
com r14
com r15
com r16
com r17
or r22, r14
or r23, r15
or r24, r16
or r25, r17
eor r22, r18
eor r23, r19
eor r24, r20
eor r25, r21
rjmp md5_core_F_exit
as_table:
; (as+0)&3 (as+3)&3 (as+1)&3 (as+2)&3
; Z X Y
; AS_SAVE0 AS_SAVE1 AS_SAVE2 AS_SAVE3
.byte 1*4, 0*4, 2*4, 3*4 ;as=1
.byte 2*4, 1*4, 3*4, 0*4 ;as=2
.byte 3*4, 2*4, 0*4, 1*4 ;as=3
.byte 0*4, 3*4, 1*4, 2*4 ;as=4
;###########################################################
.global md5_core
md5_core:
mov r21, r20
mov r20, r18
mov r19, r16
mov r18, r14
; rjmp md5_core_asm
/*
void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
uint32_t t;
md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
as &= 0x3;
/ * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
a[as]=a[(as+1)&3] + ROTL32(t, s);
}
*/
; a: r24-r25
; block: r22-r23
; as: r21
; s: r20
; i: r19
; fi: r18
P_A0 = 24
P_A1 = 25
P_B0 = 22
P_B1 = 23
P_AS = 21
P_S = 20
P_I = 19
P_FI = 18
; x: r22-r25
; y: r18-r21
; z: r14-r17
AS_SAVE0 = 4
AS_SAVE1 = 5
AS_SAVE2 = 6
AS_SAVE3 = 7
FI_SAVE = 8
S_SAVE = 9
ACCU0 = 10
ACCU1 = 11
ACCU2 = 12
ACCU3 = 13
ARG_X0 = 22
ARG_X1 = 23
ARG_X2 = 24
ARG_X3 = 25
ARG_Y0 = 18
ARG_Y1 = 19
ARG_Y2 = 20
ARG_Y3 = 21
ARG_Z0 = 14
ARG_Z1 = 15
ARG_Z2 = 16
ARG_Z3 = 17
md5_core_asm:
push r28
push r29
push_range 4, 17
ldi r30, lo8(T_table)
ldi r31, hi8(T_table)
lsl P_I
rol r1
lsl P_I
rol r1
add r30, P_I
adc r31, r1
clr r1
mov FI_SAVE, r18
/* loading T[i] into ACCU */
lpm ACCU0, Z+
lpm ACCU1, Z+
lpm ACCU2, Z+
lpm ACCU3, Z
/* add *block to ACCU */
movw r30, P_B0
ld r0, Z+
add ACCU0, r0
ld r0, Z+
adc ACCU1, r0
ld r0, Z+
adc ACCU2, r0
ld r0, Z+
adc ACCU3, r0
/* add a[as+0&3] to ACCU */
ldi r30, lo8(as_table)
ldi r31, hi8(as_table)
dec P_AS
andi P_AS, 0x03
lsl P_AS
lsl P_AS
add r30, r21
adc r31, r1 ; Z points to the correct row in as_table
lpm AS_SAVE0, Z+
lpm AS_SAVE1, Z+
lpm AS_SAVE2, Z+
lpm AS_SAVE3, Z
movw r26, r24 ; X points to a[0]
add r26, AS_SAVE0
adc r27, r1 ; X points at a[as&3]
ld r0, X+
add ACCU0, r0
ld r0, X+
adc ACCU1, r0
ld r0, X+
adc ACCU2, r0
ld r0, X+
adc ACCU3, r0
mov S_SAVE, r20
movw r28, r24
/* loading z value */
movw r26, r28
add r26, AS_SAVE1
adc r27, r1
ld ARG_Z0, X+
ld ARG_Z1, X+
ld ARG_Z2, X+
ld ARG_Z3, X
/* loading x value */
movw r26, r28
add r26, AS_SAVE2
adc r27, r1
ld ARG_X0, X+
ld ARG_X1, X+
ld ARG_X2, X+
ld ARG_X3, X
/* loading y value */
movw r26, r28
add r26, AS_SAVE3
adc r27, r1
ldi r30, pm_lo8(jump_table)
ldi r31, pm_hi8(jump_table)
add r30, FI_SAVE
adc r31, r1 ; Z points to the correct entry in our jump table
ld ARG_Y0, X+
ld ARG_Y1, X+
ld ARG_Y2, X+
ld ARG_Y3, X
ijmp /* calls the function pointed by Z */
md5_core_F_exit:
/* add ACCU to result of f() */
add r22, ACCU0
adc r23, ACCU1
adc r24, ACCU2
adc r25, ACCU3
/* rotate */
mov r20, S_SAVE
rotl32:
cpi r20, 8
brlo bitrotl
mov r21, r25
mov r25, r24
mov r24, r23
mov r23, r22
mov r22, r21
subi r20, 8
rjmp rotl32
bitrotl:
mov r21, r25
bitrotl_loop:
tst r20
breq fixrotl
bitrotl_loop2:
lsl r21
rol r22
rol r23
rol r24
rol r25
dec r20
brne bitrotl_loop2
fixrotl:
/* add a[(as+1)&3] */
movw r26, r28
add r26, AS_SAVE2
adc r27, r1
ld r0, X+
add r22, r0
ld r0, X+
adc r23, r0
ld r0, X+
adc r24, r0
ld r0, X
adc r25, r0
/* store result */
movw r26, r28
add r26, AS_SAVE0
adc r27, r1
st X+, r22
st X+, r23
st X+, r24
st X , r25
md5_core_exit:
pop_range 4, 17
pop r29
pop r28
ret
;###################################################################
/*
void md5_nextBlock(md5_ctx_t *state, void* block){
uint32_t a[4];
uint8_t m,n,i=0;
a[0]=state->a[0];
a[1]=state->a[1];
a[2]=state->a[2];
a[3]=state->a[3];
/ * round 1 * /
uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
}
}
/ * round 2 * /
uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
}
}
/ * round 3 * /
uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
}
}
/ * round 4 * /
uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
}
}
state->a[0] += a[0];
state->a[1] += a[1];
state->a[2] += a[2];
state->a[3] += a[3];
state->counter++;
}
*/
/*
shift_table:
.byte 7,12,17,22
.byte 5, 9,14,20
.byte 4,11,16,23
.byte 6,10,15,21
md5_nextBlock:
stack_alloc 4*4
stack_free 4*4
*/

135
md5-stub.c Normal file
View File

@ -0,0 +1,135 @@
/* md5-asm.c */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "md5.h"
#include "uart.h"
#include <stdint.h>
#include <string.h>
#undef DEBUG
void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi);
/*
#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n))))
static
void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
uint32_t t;
md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
as &= 0x3;
// * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
#ifdef DEBUG
char funcc[]={'*', '-', '+', '~'};
uart_putstr("\r\n DBG: md5_core [");
uart_putc(funcc[fi]);
uart_hexdump(&as, 1); uart_putc(' ');
uart_hexdump(&k, 1); uart_putc(' ');
uart_hexdump(&s, 1); uart_putc(' ');
uart_hexdump(&i, 1); uart_putc(']');
#endif
t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
a[as]=a[(as+1)&3] + ROTL32(t, s);
}
*/
void md5_nextBlock(md5_ctx_t *state, void* block){
uint32_t a[4];
uint8_t m,n,i=0;
/* this requires other mixed sboxes */
#ifdef DEBUG
uart_putstr("\r\n DBG: md5_nextBlock: block:\r\n");
uart_hexdump(block, 16); uart_putstr("\r\n");
uart_hexdump(block+16, 16); uart_putstr("\r\n");
uart_hexdump(block+32, 16); uart_putstr("\r\n");
uart_hexdump(block+48, 16); uart_putstr("\r\n");
#endif
a[0]=state->a[0];
a[1]=state->a[1];
a[2]=state->a[2];
a[3]=state->a[3];
/* round 1 */
uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
}
}
/* round 2 */
uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
}
}
/* round 3 */
uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
}
}
/* round 4 */
uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
}
}
state->a[0] += a[0];
state->a[1] += a[1];
state->a[2] += a[2];
state->a[3] += a[3];
state->counter++;
}
void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){
uint16_t l;
uint8_t b[64];
while (length_b >= 512){
md5_nextBlock(state, block);
length_b -= 512;
block = ((uint8_t*)block) + 512/8;
}
memset(b, 0, 64);
memcpy(b, block, length_b/8);
/* insert padding one */
l=length_b/8;
if(length_b%8){
uint8_t t;
t = ((uint8_t*)block)[l];
t |= (0x80>>(length_b%8));
b[l]=t;
}else{
b[l]=0x80;
}
/* insert length value */
if(l+sizeof(uint64_t) >= 512/8){
md5_nextBlock(state, b);
state->counter--;
memset(b, 0, 64);
}
*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
md5_nextBlock(state, b);
}

27
md5.c
View File

@ -40,19 +40,23 @@ void md5_init(md5_ctx_t *s){
s->a[2] = 0x98badcfe;
s->a[3] = 0x10325476;
}
static
uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
return ((x&y)|((~x)&z));
}
static
uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
return ((x&z)|((~z)&y));
}
static
uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
return (x^y^z);
}
static
uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
return (y ^ (x | (~z)));
}
@ -61,7 +65,8 @@ typedef uint32_t md5_func_t(uint32_t, uint32_t, uint32_t);
#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n))))
void md5_core(uint32_t* a, uint8_t as, void* block, uint8_t k, uint8_t s, uint8_t i, uint8_t fi){
static
void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
uint32_t t;
md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
as &= 0x3;
@ -75,7 +80,7 @@ void md5_core(uint32_t* a, uint8_t as, void* block, uint8_t k, uint8_t s, uint8_
uart_hexdump(&s, 1); uart_putc(' ');
uart_hexdump(&i, 1); uart_putc(']');
#endif
t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + ((uint32_t*)block)[k] + md5_T[i] ;
t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
a[as]=a[(as+1)&3] + ROTL32(t, s);
}
@ -97,31 +102,31 @@ void md5_nextBlock(md5_ctx_t *state, void* block){
a[3]=state->a[3];
/* round 1 */
uint8_t s1t[]={7,12,17,22};
uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, 4-n, block, m*4+n, s1t[n],i++,0);
md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
}
}
/* round 2 */
uint8_t s2t[]={5,9,14,20};
uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, 4-n, block, (1+m*4+n*5)&0xf, s2t[n],i++,1);
md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
}
}
/* round 3 */
uint8_t s3t[]={4,11,16,23};
uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, 4-n, block, (5-m*4+n*3)&0xf, s3t[n],i++,2);
md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
}
}
/* round 4 */
uint8_t s4t[]={6,10,15,21};
uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
for(m=0;m<4;++m){
for(n=0;n<4;++n){
md5_core(a, 4-n, block, (0-m*4+n*7)&0xf, s4t[n],i++,3);
md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
}
}
state->a[0] += a[0];

12
mkfiles/md5_asm.mk Normal file
View File

@ -0,0 +1,12 @@
# Makefile for MD5
ALGO_NAME := MD5_ASM
# comment out the following line for removement of MD5 from the build process
HASHES += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := md5-asm.o md5-stub.o
$(ALGO_NAME)_TEST_BIN := main-md5-test.o debug.o uart.o serial-tools.o \
nessie_hash_test.o nessie_common.o cli.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"
$(ALGO_NAME)_PEROFRMANCE_TEST := "performance"

View File

@ -919,24 +919,18 @@ sha256_kv: ; round-key-vector stored in ProgMem
; state->length=0;
; memcpy(state->h, sha256_init_vector, 8*4);
;}
; param1: (Func3,r24) 16-bit pointer to sha256_ctx_t struct in ram
; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
; modifys: Z(r30,r31), Func1, r22
sha256_init:
movw r26, r24 ; (24,25) --> (26,27) load X with param1
ldi r30, lo8((sha256_init_vector))
ldi r31, hi8((sha256_init_vector))
ldi r22, 32
ldi r22, 32+8
sha256_init_vloop:
lpm r23, Z+
st X+, r23
dec r22
brne sha256_init_vloop
ldi r22, 8
clr r1 ;this should not be needed
sha256_init_lloop:
st X+, r1
dec r22
brne sha256_init_lloop
ret
sha256_init_vector:
@ -948,6 +942,8 @@ sha256_init_vector:
.word 0x688C, 0x9B05
.word 0xD9AB, 0x1F83
.word 0xCD19, 0x5BE0
.word 0x0000, 0x0000
.word 0x0000, 0x0000
;###########################################################
@ -968,7 +964,7 @@ rotl32:
mov r23, r22
mov r22, r21
subi r20, 8
rjmp rotr32
rjmp rotl32
bitrotl:
clr r21
clc

View File

@ -80,9 +80,11 @@ int16_t execcommand_d0_P(const char* str, PGM_P v, void(*fpt[])(void) ){
if(i!=-1){
if(fpt[i])
fpt[i]();
return i;
}else{
cli_auto_help_P(v);
return -1;
}
cli_auto_help_P(v);
return -1;
}

View File

@ -85,7 +85,12 @@ void testrun_nessie_md5(void){
void testrun_md5(void){
md5_ctx_t s;
char* testv[]={"", "a", "abc", "message digest", "abcdefghijklmnopqrstuvwxyz",
char* testv[]={
"",
"a",
"abc",
"message digest",
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
"12345678901234567890123456789012345678901234567890123456789012345678901234567890"};
uint8_t i;
@ -94,10 +99,10 @@ void testrun_md5(void){
for(i=0; i<7; ++i){
uart_putstr("\r\n MD5 (\"");
uart_putstr(testv[i]);
uart_putstr("\") = \r\n");
uart_putstr("\") = \r\n\t");
md5_init(&s);
md5_lastBlock(&s, testv[i], strlen(testv[i])*8);
uart_hexdump(&s.a[0], 16);
uart_hexdump(&(s.a[0]), 16);
}
}
@ -141,7 +146,7 @@ void testrun_performance_md5(void){
/*****************************************************************************
* main *
* main *
*****************************************************************************/
int main (void){

View File

@ -151,12 +151,22 @@ static
void tv4_hash(void){
uint8_t ctx[nessie_hash_ctx.ctx_size_B];
uint8_t hash[(nessie_hash_ctx.hashsize_b+7)/8];
uint8_t block[256/8];
uint16_t n=256;
uint8_t block[nessie_hash_ctx.hashsize_b/8];
uint16_t n=nessie_hash_ctx.hashsize_b;
uint32_t i;
uart_putstr_P(PSTR("\r\n message="));
uart_putstr(PSTR("256 zero bits"));
if(nessie_hash_ctx.hashsize_b>=10000)
uart_putc('0' + (nessie_hash_ctx.hashsize_b/10000)%10);
if(nessie_hash_ctx.hashsize_b>=1000)
uart_putc('0' + (nessie_hash_ctx.hashsize_b/1000)%10);
if(nessie_hash_ctx.hashsize_b>=100)
uart_putc('0' + (nessie_hash_ctx.hashsize_b/100)%10);
if(nessie_hash_ctx.hashsize_b>=10)
uart_putc('0' + (nessie_hash_ctx.hashsize_b/10)%10);
uart_putc('0' + nessie_hash_ctx.hashsize_b%10);
uart_putstr_P(PSTR(" zero bits"));
memset(block, 0, 256/8);
nessie_hash_ctx.hash_init(ctx);

View File

@ -51,13 +51,13 @@ ISR(TIMER1_OVF_vect){
}
void calibrateTimer(void){
volatile uint8_t i;
startTimer(1);
stopTimer();
const_overhead = TCNT1;
startTimer(1);
TCNT1=0xFFFE;
; ; ; ;
// asm volatile("NOP\n"::); asm volatile("NOP\n"::);
i++;
stopTimer();
int_overhead = TCNT1;
}

View File

@ -33,6 +33,9 @@
#define UBRRH UBRR0H
#define UBRRL UBRR0L
#define URSEL UMSEL
#define USART_UDRE_vect USART0_UDRE_vect
#define USART_RXC_vect USART0_RX_vect
#define UCSRA UCSR0A
#endif
#ifdef ATMEGA644