even more serpent in asm

This commit is contained in:
bg 2008-08-10 23:19:11 +00:00
parent c1553054f9
commit 0f66a12e93
6 changed files with 623 additions and 14 deletions

View File

@ -5,7 +5,7 @@ ALGO_NAME := SERPENT_ASM_SMALL
BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := serpent.o serpent-sboxes-small.o memxor.o
$(ALGO_NAME)_OBJ := serpent.o serpent-asm.o serpent-sboxes-small.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o debug.o uart.o serial-tools.o \
nessie_bc_test.o nessie_common.o cli.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"

591
serpent-asm.S Normal file
View File

@ -0,0 +1,591 @@
/* serpent_asm.S */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* File: serpent_sboxes.S
* Author: Daniel Otte
* Date: 2008-08-07
* License: GPLv3 or later
* Description: Implementation of the serpent sbox function.
*
*/
#include <avr/io.h>
/*******************************************************************************
* MACRO SECTION *
*******************************************************************************/
.macro push_ p1:req, p2:vararg
push \p1
.ifnb \p2
push_ \p2
.endif
.endm
.macro pop_ p1:req, p2:vararg
pop \p1
.ifnb \p2
pop_ \p2
.endif
.endm
.macro push_range from:req, to:req
push \from
.if \to-\from
push_range "(\from+1)",\to
.endif
.endm
.macro pop_range from:req, to:req
pop \to
.if \to-\from
pop_range \from,"(\to-1)"
.endif
.endm
.macro stack_alloc size:req, reg1=r30, reg2=r31
in r0, _SFR_IO_ADDR(SREG)
cli
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
sbiw \reg1, \size
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SPL), \reg1
out _SFR_IO_ADDR(SREG), r0
.endm
.macro stack_free size:req, reg1=r30, reg2=r31
in r0, _SFR_IO_ADDR(SREG)
cli
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
adiw \reg1, \size
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SPL), \reg1
out _SFR_IO_ADDR(SREG), r0
.endm
/*******************************************************************************
* END of MACRO SECTION *
*******************************************************************************/
/*
static void serpent_lt(uint8_t *b){
X0 = rotl32(X0, 13);
X2 = rotl32(X2, 3);
X1 ^= X0 ^ X2;
X3 ^= X2 ^ (X0 << 3);
X1 = rotl32(X1, 1);
X3 = rotl32(X3, 7);
X0 ^= X1 ^ X3;
X2 ^= X3 ^ (X1 << 7);
X0 = rotl32(X0, 5);
X2 = rotr32(X2, 10);
}
*/
#if 0
A0 = 4
A1 = 5
A2 = 6
A3 = 7
B0 = 8
B1 = 9
B2 = 10
B3 = 11
C0 = 12
C1 = 13
C2 = 14
C3 = 15
D0 = 16
D1 = 17
D2 = 18
D3 = 19
T0 = 20
T1 = 21
T2 = 22
T3 = 23
.global serpent_lt
serpent_lt:
push_range 4, 17
movw r26, r24
ld A2, X+
ld A3, X+
ld A0, X+
ld A1, X+
ldi r20, 3
mov r0, A0
1:
lsr r0
ror A3
ror A2
ror A1
ror A0
dec r20
brne 1b
ld B0, X+
ld B1, X+
ld B2, X+
ld B3, X+
ld C2, X+
ld C3, X+
ld C0, X+
ld C1, X+
ldi r20, 3
mov r0, C0
1:
lsr r0
ror C3
ror C2
ror C1
ror C0
dec r20
brne 1b
ld D0, X+
ld D1, X+
ld D2, X+
ld D3, X+
/* X1 ^= X0 ^ X2; */
eor B0, A0
eor B0, C0
eor B1, A1
eor B1, C1
eor B2, A2
eor B2, C2
eor B3, A3
eor B3, C3
/* X3 ^= X2 ^ (X0 << 3); */
mov T0, A0
mov T1, A1
mov T2, A2
mov T3, A3
ldi r24, 3
1:
lsl T0
rol T1
rol T2
rol T3
dec r24
brne 1b
eor C0, B0
eor C0, T0
eor C1, B1
eor C1, T1
eor C2, B2
eor C2, T2
eor C3, B3
eor C3, T3
/* X1 = rotl32(X1, 1); */
mov r0, B3
lsl r0
rol B0
rol B1
rol B2
rol B3
/* X3 = rotl32(X3, 7); */
mov r0, D3
mov D3, D2
mov D2, D1
mov D1, D0
mov D0, r0
lsr r0
ror D3
ror D2
ror D1
ror D0
/* X0 ^= X1 ^ X3; */
eor A0, B0
eor A0, D0
eor A1, B1
eor A1, D1
eor A2, B2
eor A2, D2
eor A3, B3
eor A3, D3
/* X2 ^= X3 ^ (X1 << 7); */
mov T1, B0
mov T2, B1
mov T3, B2
clr T0
mov r0, B3
lsr r0
ror T2
ror T1
ror T0
eor C0, D0
eor C0, T0
eor C1, D1
eor C1, T1
eor C2, D2
eor C2, T2
eor C3, D3
eor C3, T3
/* X0 = rotl32(X0, 5); */
ldi r24, 5
mov r0, A3
1:
lsl r0
rol A0
rol A1
rol A2
rol A3
dec r24
brne 1b
/* X2 = rotr32(X2, 10); */
mov r0, C0
mov C0, C1
mov C1, C2
mov C2, C3
mov C3, r0
ldi r24, 2
1:
lsr r0
ror C2
ror C1
ror C0
ror C3
dec r24
brne 1b
clr r31
ldi r30, D3+1
ldi r24, 16
1:
ld r0, -Z
st -X, r0
dec r24
brne 1b
pop_range 4, 17
ret
#endif
T0 = 22
T1 = 23
T2 = 24
T3 = 25
TT = 21
/* rotate the data word (4 byte) pointed to by X by r20 bits to the right */
memrotr32:
ld T0, X+
ld T1, X+
ld T2, X+
ld T3, X+
mov TT, T0
1:
lsr TT
ror T3
ror T2
ror T1
ror T0
dec r20
brne 1b
st -X, T3
st -X, T2
st -X, T1
st -X, T0
ret
/* rotate the data word (4 byte) pointed to by X by r20 bits to the left */
memrotl32:
ld T0, X+
ld T1, X+
ld T2, X+
ld T3, X+
mov TT, T3
1:
lsl TT
rol T0
rol T1
rol T2
rol T3
dec r20
brne 1b
st -X, T3
st -X, T2
st -X, T1
st -X, T0
ret
/* xor the dataword (4 byte) pointed by Z into X */
memeor32:
ldi T2, 4
1:
ld T0, X
ld T1, Z+
eor T0, T1
st X+, T0
dec T2
brne 1b
ret
.global serpent_lt
serpent_lt:
/* X0 := X0 <<< 13 */
movw r26, r24
ldi r20, 7
rcall memrotl32
ldi r20, 6
rcall memrotl32
/* X2 := X2 <<< 3 */
adiw r26, 8
ldi r20, 3
rcall memrotl32
/* X1 ^= X2 */
movw r30, r26
sbiw r26, 4
rcall memeor32
/* X1 ^= X0 */
sbiw r26, 4
sbiw r30, 12
rcall memeor32
/* X3 ^= X2 */
movw r30, r26
adiw r26, 4
rcall memeor32
/* T := X0 */
sbiw r26, 16
ld r18, X+
ld r19, X+
ld r20, X+
ld r21, X+
/* T := T<<3 */
ldi r22, 3
1:
lsl r18
rol r19
rol r20
rol r21
dec r22
brne 1b
clr r31
/* X3 ^= T */
adiw r26, 8
ldi r30, 18
rcall memeor32
/* X1 := X1<<<1 */
sbiw r26, 12
ldi r20, 1
rcall memrotl32
/* X3 := X3<<<7 */
adiw r26, 8
ldi r20, 7
rcall memrotl32
/* X0 ^= X3 */
movw r30, r26
sbiw r26, 12
rcall memeor32
/* X0 ^= X1 */
movw r30, r26
sbiw r26, 4
rcall memeor32
/* X2 ^= X3 */
adiw r26, 4
adiw r30, 4
rcall memeor32
/* T := X1<<<8 */
sbiw r26, 8
ld r19, X+
ld r20, X+
ld r21, X+
ld r18, X+
/* T := T>>>1; T&=0xfffffff8 */
lsr r18
ror r21
ror r20
ror r19
clr r18
ror r18
clr r31
ldi r30, 18
/* X2 ^= T */
rcall memeor32
/* X0 := X0 <<< 5 */
sbiw r26, 12
ldi r20, 5
rcall memrotl32
/* X3 := X3 >>> 10 */
adiw r26, 8
ldi r20, 7
rcall memrotr32
ldi r20, 3
rcall memrotr32
ret
.global serpent_inv_lt
serpent_inv_lt:
/* X0 := X0 >>> 5 */
movw r26, r24
ldi r20, 5
rcall memrotr32
/* X2 := X2 <<< 10 */
adiw r26, 8
ldi r20, 7
rcall memrotl32
ldi r20, 3
rcall memrotl32
/* X2 ^= X3 */
movw r30, r26
adiw r30, 4
rcall memeor32
sbiw r26, 4
sbiw r30, 12
/* T := X1<<7 */
ld r19, Z+
ld r20, Z+
ld r21, Z+
ld r18, Z+
lsr r18
ror r21
ror r20
ror r19
clr r18
ror r18
clr r31
/* X2 ^= T */
ldi r30, 18
rcall memeor32
/* X0 ^= X1 */
sbiw r26, 12
movw r30, r26
adiw r30, 4
rcall memeor32
/* X0 ^= X3 */
sbiw r26, 4
adiw r30, 4
rcall memeor32
/* X1 := X1>>>1 */
ldi r20, 1
rcall memrotr32
/* X3 := X3>>>7 */
adiw r26, 8
ldi r20, 7
rcall memrotr32
/* X3 ^= X2 */
sbiw r30, 8
rcall memeor32
sbiw r26, 4
/* T:= X0<<3 */
sbiw r30, 12
ld r18, Z+
ld r19, Z+
ld r20, Z+
ld r21, Z+
ldi r24, 3
1:
lsl r18
rol r19
rol r20
rol r21
dec r24
brne 1b
/* X3 ^= T */
clr r31
ldi r30, 18
rcall memeor32
/* X1 ^= X0 */
sbiw r26, 12
movw r30, r26
sbiw r30, 4
rcall memeor32
/* X1 ^= X2 */
movw r26, r30
adiw r30, 4
rcall memeor32
/* X2 := X2 >>> 3 */
ldi r20, 3
rcall memrotr32
/* X0 := X0 >>> 13 */
sbiw r26, 8
ldi r20, 7
rcall memrotr32
ldi r20, 6
rcall memrotr32
ret
/*
#define GOLDEN_RATIO 0x9e3779b9l
static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
uint32_t ret;
ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
ret = rotl32(ret, 11);
return ret;
}
*/
/*
* param b is passed in r24:r25
* param i is passed in r22
* return value is returned in r22.r23.r24.r25
*/
.global serpent_gen_w
serpent_gen_w:
movw r30, r24
/* ^i^b[0]*/
ld r21, Z+
eor r22, r21
ld r23, Z+
ld r24, Z+
ld r25, Z+
/* ^b[3]^b[5]^[b7] */
adiw r30, 4
ldi r20, 3
1:
adiw r30, 4
ld r21, Z+
eor r22, r21
ld r21, Z+
eor r23, r21
ld r21, Z+
eor r24, r21
ld r21, Z+
eor r25, r21
dec r20
brne 1b
/* ^0x9e3779b9l */
ldi r21, 0xb9
eor r22, r21
ldi r21, 0x79
eor r23, r21
ldi r21, 0x37
eor r24, r21
ldi r21, 0x9e
eor r25, r21
/* <<<11 */
mov r21, r25
mov r25, r24
mov r24, r23
mov r23, r22
mov r22, r21
mov r21, r25
ldi r20, 3
1:
lsl r21
rol r22
rol r23
rol r24
rol r25
dec r20
brne 1b
ret

View File

@ -83,7 +83,7 @@
.endm
/*******************************************************************************
* END of MACRO SECTION *
* END of MACRO SECTION *
*******************************************************************************/
serpent_sbox_fast:

View File

@ -83,7 +83,7 @@
.endm
/*******************************************************************************
* END of MACRO SECTION *
* END of MACRO SECTION *
*******************************************************************************/
serpent_sbox:
.byte 0x83, 0x1F, 0x6A, 0xB5, 0xDE, 0x24, 0x07, 0xC9

View File

@ -47,7 +47,9 @@ uint32_t rotr32(uint32_t a, uint8_t n){
#define X2 (((uint32_t*)b)[2])
#define X3 (((uint32_t*)b)[3])
static void lt(uint8_t *b){
void serpent_lt(uint8_t *b);
/*
static void serpent_lt(uint8_t *b){
X0 = rotl32(X0, 13);
X2 = rotl32(X2, 3);
X1 ^= X0 ^ X2;
@ -59,8 +61,12 @@ static void lt(uint8_t *b){
X0 = rotl32(X0, 5);
X2 = rotr32(X2, 10);
}
*/
static void inv_lt(uint8_t *b){
static void serpent_inv_lt(uint8_t *b);
/*
static void serpent_inv_lt(uint8_t *b){
X2 = rotl32(X2, 10);
X0 = rotr32(X0, 5);
X2 ^= X3 ^ (X1 << 7);
@ -72,17 +78,19 @@ static void inv_lt(uint8_t *b){
X2 = rotr32(X2, 3);
X0 = rotr32(X0, 13);
}
*/
uint32_t serpent_gen_w(uint32_t * b, uint8_t i);
/*
#define GOLDEN_RATIO 0x9e3779b9l
static uint32_t gen_w(uint32_t * b, uint8_t i){
static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
uint32_t ret;
ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
ret = rotl32(ret, 11);
return ret;
}
/* key must be 256bit (32 byte) large! */
*/
void serpent_init(const void* key, uint16_t keysize, serpent_ctx_t* ctx){
uint32_t buffer[8];
uint8_t i,j;
@ -97,7 +105,7 @@ void serpent_init(const void* key, uint16_t keysize, serpent_ctx_t* ctx){
}
for(i=0; i<33; ++i){
for(j=0; j<4; ++j){
ctx->k[i][j] = gen_w(buffer, i*4+j);
ctx->k[i][j] = serpent_gen_w(buffer, i*4+j);
memmove(buffer, &(buffer[1]), 7*4); /* shift buffer one to the "left" */
buffer[7] = ctx->k[i][j];
}
@ -113,7 +121,7 @@ void serpent_enc(void* buffer, const serpent_ctx_t* ctx){
for(i=0; i<31; ++i){
memxor(buffer, ctx->k[i], 16);
sbox128(buffer, i);
lt((uint8_t*)buffer);
serpent_lt((uint8_t*)buffer);
}
memxor(buffer, ctx->k[i], 16);
sbox128(buffer, i);
@ -130,7 +138,7 @@ void serpent_dec(void* buffer, const serpent_ctx_t* ctx){
memxor((uint8_t*)buffer, ctx->k[i], 16);
--i;
for(; i>=0; --i){
inv_lt(buffer);
serpent_inv_lt(buffer);
inv_sbox128(buffer, i);
memxor(buffer, ctx->k[i], 16);
}

View File

@ -21,8 +21,7 @@
* \author Daniel Otte
* \date 2007-06-07
* \brief SHABEA - a SHA Based Encryption Algorithm declarations
* \par License
* GPL
* \license GPLv3 or later
*
* SHABEAn-r where n is the blocksize and r the number of round used
*
@ -31,5 +30,16 @@
#ifndef SHABEA_H_
#define SHABEA_H_
void shabea256(void * block, void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds);
/** \fn void shabea256(void * block, const void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds);
* \brief shabea256 encryption/decryption
*
* \param block pointer to a 256 bit (32 byte block) to en/decrypt
* \param key pointer to the key material
* \param keysize_b length of the key in bits
* \param enc controls if encryption (1) or decryption (0) is done
* \param rounds rounds to be done by the cipher (it is not recommended to use less then four rounds)
*/
void shabea256(void * block, const void * key, uint16_t keysize_b,
uint8_t enc, uint8_t rounds);
#endif /*SHABEA_H_*/