some bigint stuff in ASM

2011-07-05 19:34:03 +00:00 · 2011-07-05 19:34:03 +00:00 · 0ce5b31e97
parent ca330e4062
commit 0ce5b31e97
9 changed files with 501 additions and 18 deletions
--- a/avr-asm-macros.S
+++ b/avr-asm-macros.S
@ -26,6 +26,8 @@
 *
 */
 //#ifndef AVR_ASM_MACROS__S__
 //#define AVR_ASM_MACROS__S__
 #include <avr/io.h>
 /*******************************************************************************
@ -144,4 +146,5 @@
 *******************************************************************************/
 //#endif /* AVR_ASM_MACROS__S__ */
--- a/bigint/bigint.c
+++ b/bigint/bigint.c
@ -77,9 +77,9 @@ void bigint_adjust(bigint_t* a){
 /******************************************************************************/
 void bigint_copy(bigint_t* dest, const bigint_t* src){
 	memcpy(dest->wordv, src->wordv, src->length_B);
 	dest->length_B = src->length_B;
 	dest->info = src->info;
 	memcpy(dest->wordv, src->wordv, src->length_B);
 }
 /******************************************************************************/
@ -611,7 +611,6 @@ void bigint_reduce(bigint_t* a, const bigint_t* r){
 	while(bigint_cmp_u(a,r)>=0){
 		bigint_sub_u(a,a,r);
 	}
 	bigint_adjust(a);
 }
 /******************************************************************************/
--- a/bigint/bigint_add_u.S
+++ b/bigint/bigint_add_u.S
@ -26,7 +26,6 @@
 *
 */
 #include "avr-asm-macros.S"
 /*
 param dest: r24:r25
@ -132,6 +131,6 @@ bigint_add_u:
 9:
 	pop_range 24, 25
 	pop_range 28, 29
-	jmp bigint_adjust
+	rjmp bigint_adjust
--- a/bigint/bigint_adjust.S
+++ b/bigint/bigint_adjust.S
@ -0,0 +1,110 @@
 /* bigint_adjust.S */
 /*
    This file is part of the ARM-Crypto-Lib.
    Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 /*
 void bigint_adjust(bigint_t* a){
 	while(a->length_B!=0 && a->wordv[a->length_B-1]==0){
 		a->length_B--;
 	}
 	if(a->length_B==0){
 		a->info=0;
 		return;
 	}
 	uint8_t t;
 	uint8_t i = 0x07;
 	t = a->wordv[a->length_B-1];
 	while((t&0x80)==0 && i){
 		t<<=1;
 		i--;
 	}
 	SET_FBS(a, i);
 }
 */
 .global bigint_adjust
 bigint_adjust:
 	movw r30, r24
 	ldd r24, Z+0
 	ldd r25, Z+1
 	ldd r26, Z+3
 	ldd r27, Z+4
 	add r26, r24
 	adc r27, r25
 20:
 	sbiw r24, 1
 	brmi 30f
 	ld r23, -X
 	tst r23
 	brne 40f
 	rjmp 20b
 30:
 	std Z+0, r1
 	std Z+0, r1
 	std Z+2, r1
 	ret
 40:
 	adiw r24, 1
 	std Z+0, r24
 	std Z+1, r25
 	clr r24
 50:
 	sbrc r23, 0
 	mov r25, r24
 	lsr r23
 	inc r24
 	brpl 50b
 	ldd r23, Z+2
 	andi r23, 0xF8
 	or r23, r25
 	std Z+2, r23
 bigint_adjust_ret:
 	ret
 /*
 void bigint_copy(bigint_t* dest, const bigint_t* src){
 	memcpy(dest->wordv, src->wordv, src->length_B);
 	dest->length_B = src->length_B;
 	dest->info = src->info;
 }
 */
 .global bigint_copy
 bigint_copy:
 	movw r26, r24
 	movw r30, r22
 	/* copy length */
 	ld r24, Z+
 	st X+, r24
 	ld r25, Z+
 	st X+, r25
 	/* copy info */
 	ld r22, Z+
 	st X+, r22
 	/* load wordv pointers */
 	ld r22, Z+
 	ld r23, Z
 	movw r30, r22
 	ld r22, X+
 	ld r23, X
 	movw r26, r22
 10:
 	sbiw r24, 1
 	brmi bigint_adjust_ret
 	ld r22, Z+
 	st X+, r22
 	rjmp 10b
--- a/bigint/bigint_asm.S
+++ b/bigint/bigint_asm.S
@ -0,0 +1,334 @@
 /* bigint_asm.S */
 /*
    This file is part of the ARM-Crypto-Lib.
    Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include "avr-asm-macros.S"
 #include "bigint_adjust.S"
 #include "bigint_add_u.S"
 /******************************************************************************/
 /*
 void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale){
 	uint16_t i,j=0;
 	uint16_t t=0;
 	if(scale>dest->length_B)
 		memset(dest->wordv+dest->length_B, 0, scale-dest->length_B);
 	for(i=scale; i<a->length_B+scale; ++i,++j){
 		t = a->wordv[j] + t;
 		if(dest->length_B>i){
 			t += dest->wordv[i];
 		}
 		dest->wordv[i] = (uint8_t)t;
 		t>>=8;
 	}
 	while(t){
 		if(dest->length_B>i){
 			t = dest->wordv[i] + t;
 		}
 		dest->wordv[i] = (uint8_t)t;
 		t>>=8;
 		++i;
 	}
 	if(dest->length_B < i){
 		dest->length_B = i;
 	}
 	bigint_adjust(dest);
 }
 */
 DST_SIZE_0 = 22
 DST_SIZE_1 = 23
 SRC_SIZE_0 = 20
 SRC_SIZE_1 = 23
 SCALE_0    = 18
 SCALE_1    = 19
 DST_CTX_0  =  6
 DST_CTX_1  =  7
 SRC_CTX_0  =  8
 SRC_CTX_1  =  9
 TMP_0      = 10
 TMP_1      = 11
 .global bigint_add_scale_u
 #if 0
 bigint_add_scale_u:
 	push_range 6, 11
 	movw r30, r24 /* dest ptr */
 	movw r26, r22 /* src ptr */
 	movw r24, r20 /* scale */
 	movw DST_CTX_0, r30
 	movw SRC_CTX_0, r26
 	movw SCALE_0, r24
 	/* pad dst with zeros if scale > dst_length */
 	ld DST_SIZE_0, Z+
 	ld DST_SIZE_1, Z+
 	sub r24, DST_SIZE_0
 	sbc r25, DST_SIZE_1
 	ldd TMP_0, Z+1 /* load tmp with DST_WORDV */
 	ldd TMP_1, Z+2
 	movw r30, TMP_0
 	brmi 20f /* branch if DST_SIZE > scale */
 	add r30, DST_SIZE_0
 	adc r31, DST_SIZE_1
 10:
 	sbiw r24, 1
 	brmi 25f
 	st Z+, r1
 	rjmp 10b
 20:
 	add r30, r20 /* add scale to DST_WORDV */
 	adc r31, r21
 	/* add src to dest until one of the two ends */
 25:
 	ld SRC_SIZE_0, X+
 	ld SRC_SIZE_1, X+
 	adiw r26, 1
 	ld TMP_0, X+ /* load tmp with SRC_WORDV */
 	ld TMP_1, X
 	movw r26, TMP_0
 	movw r24, SRC_SIZE_0
 	add r24, SCALE_0
 	adc r25, SCALE_1
 	clt
 	cp  r24, DST_SIZE_0
 	cpc r25, DST_SIZE_1
 	brlo 30f
 	set
 	movw r24, DST_SIZE_0
 30:
 	adiw r24, 0
 	breq 35f
 	inc r25
 	clc
 31:
 	ld TMP_0, X+
 	ld TMP_1, Z
 	adc TMP_1, TMP_0
 	st Z+, TMP_1
 	dec r24
 	brne 31b
 	dec r25
 	brne 31b
 35:
 	rol TMP_1
 	brts 40f
 	/* dst is longer than src+scale */
 	ror TMP_1
 38:
 	ld TMP_0, Z
 	adc TMP_0, r1
 	st Z+, TMP_0
 	brcs 38b
 	rjmp 90f
 40:
 	/* dst is shorter than src+scale */
 	movw r24, SRC_SIZE_0
 	sub r24, DST_SIZE_0
 	sbc r25, DST_SIZE_1
 	add r24, SCALE_0
 	adc r25, SCALE_1
 	adiw r24, 0
 	breq 90f
 	inc r25
 	ror TMP_1
 45:
 	ld TMP_0, X+
 	adc TMP_0, r1
 	st Z+, TMP_0
 	dec r24
 	brne 45b
 	dec r25
 	brne 45b
 90:
 	movw r24, DST_CTX_0
 	pop_range 6, 11
 	rjmp bigint_adjust
 #endif
 /******************************************************************************/
 /******************************************************************************/
 /******************************************************************************/
 DST_LEN_0 = 22
 DST_LEN_1 = 23
 SRC_LEN_0 = 20
 SRC_LEN_1 = 21
 SCALE_0   = 18
 SCALE_1   = 19
 DST_CTX_0 =  6
 DST_CTX_1 =  7
 SRC_CTX_0 =  8
 SRC_CTX_1 =  9
 TMP_0     = 10
 TMP_1     = 11
 bigint_add_scale_u:
 	push_range 6, 11
 	movw r30, r24 /* dest ptr */
 	movw r26, r22 /* src ptr */
 	movw r24, r20 /* scale */
 	movw DST_CTX_0, r30
 	movw SRC_CTX_0, r26
 	movw SCALE_0, r24
 	/* pad dest with zeros to length of SRC_LENGTH + scale */
 	ld SRC_LEN_0, X+
 	ld SRC_LEN_1, X+
 	adiw r26, 1
 	ld TMP_0, X+
 	ld TMP_1, X+
 	movw r26, TMP_0 /* move SRC_WORDV to X */
 	ldd DST_LEN_0, Z+0
 	ldd DST_LEN_1, Z+1
    ldd TMP_0, Z+3
    ldd TMP_1, Z+4
    movw r30, TMP_0 /* move DEST_WORDV to Z */
 	movw TMP_0, SCALE_0
 	sub TMP_0, DST_LEN_0
 	sbc TMP_1, DST_LEN_1
 	movw r24, TMP_0
 	brmi 40f /* no padding needed since DST_LEN > scale */
 	add r30, DST_LEN_0 /* add DST_LEN to Z (DEST_WORDV)*/
 	adc r31, DST_LEN_1
 	/* pad and copy src in front of dest */
 10: /* padding loop */
 	sbiw r24, 1
 	brmi 11f
 	st Z+, r1
 	rjmp 10b
 11:
 	/* start of copy */
 	movw r24, SRC_LEN_0
 12: /* copy loop */
 	sbiw r24, 1
 	brmi 13f
 	ld TMP_0, X+
 	st Z+, TMP_0
 	rjmp 12b
 13:
 	movw TMP_0, SCALE_0
 	add TMP_0, SRC_LEN_0
 	adc TMP_1, SRC_LEN_1
 	movw r30, DST_CTX_0
 	std Z+0, TMP_0
 	std Z+1, TMP_1
 	movw r24, r30
 99:
 	pop_range 6, 11
 	rjmp bigint_adjust
 40:
    /* TODO */
    /* Z points at DST_WORDV */
    /* X points at SRC_WORDV */
    /* r24:r25 contains scale - DST_LEN (negativ) */
    /* set T bit if DST_LEN > SCR_LEN + scale */
    clt
    add r30, SCALE_0
    adc r31, SCALE_1
    add TMP_0, SRC_LEN_0
    adc TMP_1, SRC_LEN_1
 	brpl 41f
 	set
 	/* DST_LEN > SRC_LEN + scale && DST_LEN > scale */
 	/*
 	       +-------+-------+ SRC + scale
 	   +------+------------+ DST
 	*/
 	movw r24, SRC_LEN_0
 	rjmp 44f
 41:
 	/* DST_LEN <= SRC_LEN + scale && DST_LEN > scale */
 	/*
 	       +-------+-------+ SRC + scale
 	          +------------+ DST
 	*/
 	com r24 /* negate r24:r25 */
 	com r25
 	adiw r24, 1
 44:
 	clc
 45:
 	dec r24
 	brpl 46f
 	dec r25
 	brmi 50f
 46:	ld TMP_0, X+
 	ld TMP_1, Z
 	adc TMP_0, TMP_1
 	st Z+, TMP_0
 	rjmp 45b
 50:
    /* do the overhaning part */
    rol r1
    movw r24, r30
    movw r30, DST_CTX_0
    ldd TMP_0, Z+3
    ldd TMP_1, Z+4
    movw r30, TMP_0
    add r30, DST_LEN_0
    adc r31, DST_LEN_1
    adiw r30, 1
    st Z, r1
    movw r30, r24
    ror r1
 	brtc 60f
 51:	brcc 53f
 52:	ld TMP_0, Z
 	adc TMP_0, r1
 	st Z+, TMP_0
 	brcs 52b
 53:
    /* TODO */
    movw r24, r30
    movw r30, DST_CTX_0
    ldd TMP_0, Z+3
    ldd TMP_1, Z+4
    sub r24, TMP_0
    sbc r25, TMP_1
    std Z+0, r24
    std Z+1, r25
    movw r24, r30
    rjmp 99b
 60: rol r1 /* backup carry */
 	movw r24, SRC_LEN_0
    add r24, SCALE_0
    adc r25, SCALE_1
    sub r24, DST_LEN_0
    sbc r25, DST_LEN_1
    ror r1 /* restore carry */
 61: dec r24
    brpl 62f
    dec r25
    brmi 63f
 62: ld TMP_0, X+
    adc TMP_0, r1
    st Z+, TMP_0
    rjmp 61b
 63:
 	brcc 53b
 	ldi r24, 1
 	st Z+, r24
 	rjmp 53b
--- a/doc/acl_blockciphers.texi
+++ b/doc/acl_blockciphers.texi
@ -1,8 +1,7 @@
@c acl_blockcipher.texi
@section Block ciphers
-@subsection What a block cipher does
+ A block cipher is a algorithm which turns an input of fixed length into an 
 A block cipher is a algorithm which turn an input of fixed length into an 
 output of the same length (enciphering or encrypting). The transformation is 
 specified by a key which has to be of a fixed length, or a length of a given 
 set or range.
--- a/host/bigint_test.rb
+++ b/host/bigint_test.rb
@ -254,17 +254,18 @@ def add_scale_test(a, b, scale)
  begin
    line = $sp.gets()
    line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
    if /^Error:.*/.match(line)
      puts line
      return false
    end
  end while not /[\s]*enter a:[\s]*/.match(line)
  puts("DBG put (#{__LINE__}): "+a.to_s(16)+" ") if $debug
  $sp.print(a.to_s(16)+" ")
  begin
    line = $sp.gets()
    line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
    if /^Error:.*/.match(line)
      puts line
      return false
@ -274,17 +275,17 @@ def add_scale_test(a, b, scale)
  begin
    line = $sp.gets()
    line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
    if /^Error:.*/.match(line)
      puts line
      return false
    end
  end while not /[\s]*enter scale:[\s]*/.match(line)
-  $sp.print(scale.to_s(16)+"\n")
+  $sp.print(scale.to_s(10)+"\r")
  begin
    line = $sp.gets()
    line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
    if /^Error:.*/.match(line)
      puts line
      return false
@ -295,12 +296,13 @@ def add_scale_test(a, b, scale)
  s_ = m[3].to_i(16)
  c_ = m[4].to_i(16)
  line.chomp!
-  if(a_== a && b_ == b && c_ == (a+b))
+  should = a + (b<<(8*scale))
  if(a_== a && b_ == b && s_ == scale && c_ == should )
    $logfile.printf("[pass]: %s\n", line)
    return true
  else
-    $logfile.printf("[fail (%s%s%s)]: %s", (a==a_)?"":"a", (b==b_)?"":"b", (c_==a+b)?"":"c",line)
+    $logfile.printf("[fail (%s%s%s)]: %s", (a==a_)?"":"a", (b==b_)?"":"b", (scale==s_)?"":"s",(c_==should)?"":"c",line)
-    $logfile.printf(" ; should %s + %s = %s\n", a.to_s(16), b.to_s(16), (a+b).to_s(16))
+    $logfile.printf(" ; should %s + %s << 8*%s = %s\n", a.to_s(16), b.to_s(16), scale.to_s(16), should.to_s(16))
    return false
  end
  return false
@ -552,6 +554,41 @@ def run_test_add(skip=0)
  end while length_a_B<4096/8
 end
 ################################################################################
 # run_test_add_scale                                                           #
 ################################################################################
 def run_test_add_scale(skip=0)
  length_a_B = skip+1
  length_b_B = skip+1
  begin
    $size = length_a_B
    (0..16).each do |i|
      (0..300).each do |scale|
        a = rand(256**length_a_B)
        b = rand(256**length_a_B)
        v = add_scale_test(a, b, scale)
        screen_progress(v)
        v = add_scale_test(b, a, scale)
        screen_progress(v)
      end
    end
    (0..16).each do |i|
      (0..300).each do |scale|
        b_size = rand(length_b_B+1)
        a = rand(256**length_a_B)
        b = rand(256**b_size)
        v = add_scale_test(a, b, scale)
        screen_progress(v)      
        v = add_scale_test(b, a, scale)
        screen_progress(v)
      end
    end
    length_a_B += 1
    length_b_B += 1
  end while length_a_B<4096/8
 end
 ################################################################################
 # run_test_mul                                                                 #
 ################################################################################
@ -762,12 +799,14 @@ $logfile.printf("seed = 0x%X\n", 0xdeadbeef)
 tests = Hash.new
 tests['a'] = proc {|x| run_test_add(x) }
 tests['m'] = proc {|x| run_test_mul(x) }
 tests['x'] = proc {|x| run_test_add_scale(x) }
 tests['s'] = proc {|x| run_test_square(x) }
 tests['r'] = proc {|x| run_test_reduce(x) }
 tests['e'] = proc {|x| run_test_expmod(x) }
 tests['g'] = proc {|x| run_test_gcdext(x) }
 init_str = Hash.new
 init_str['a'] = 'add-test'
 init_str['x'] = 'add-scale-test'
 init_str['m'] = 'mul-test'
 init_str['s'] = 'square-test'
 init_str['r'] = 'reduce-test'
--- a/mkfiles/bigint.mk
+++ b/mkfiles/bigint.mk
@ -2,7 +2,7 @@
 ALGO_NAME := BIGINT
 # comment out the following line for removement of BigInt from the build process
-#AUX += $(ALGO_NAME)
+AUX += $(ALGO_NAME)
 $(ALGO_NAME)_DIR      := bigint/
 $(ALGO_NAME)_INCDIR   := memxor/ noekeon/
--- a/test_src/main-bigint-test.c
+++ b/test_src/main-bigint-test.c
@ -104,12 +104,12 @@ void test_add_scale_bigint(void){
 	for(;;){
 		cli_putstr_P(PSTR("\r\nenter a:"));
 		if(bigint_read_hex_echo(&a)){
-			cli_putstr_P(PSTR("\r\n end add test"));
+			cli_putstr_P(PSTR("\r\n end add-scale test"));
 			return;
 		}
 		cli_putstr_P(PSTR("\r\nenter b:"));
 		if(bigint_read_hex_echo(&b)){
-			cli_putstr_P(PSTR("\r\n end add test"));
+			cli_putstr_P(PSTR("\r\n end add-scale test"));
 			return;
 		}
 		cli_putstr_P(PSTR("\r\nenter scale:"));
@ -140,8 +140,8 @@ void test_add_scale_bigint(void){
 			free(b.wordv);
 			continue;
 		}
 		bigint_copy(&c, &a);
 		c.wordv = c_b;
 		bigint_copy(&c, &a);
 		bigint_add_scale_u(&c, &b, scale);
 		bigint_print_hex(&c);
 		cli_putstr_P(PSTR("\r\n"));