optimized xtea asm implementation

This commit is contained in:
bg 2011-09-02 18:42:52 +00:00
parent 35dc9566e4
commit b246a2a058
13 changed files with 454 additions and 613 deletions

View File

@ -36,18 +36,24 @@
void arcfour_init(const void *key, uint16_t length_b, arcfour_ctx_t *ctx){
uint8_t t;
uint8_t length_B = length_b/8;
uint16_t x,y=0;
for(x=0; x<= 255; ++x)
uint8_t x=0,y=0;
uint8_t *kptr=key;
do{
ctx->s[x]=x;
}while(++x);
for(x=0; x<= 255; ++x){
y += ctx->s[x] + ((uint8_t*)key)[x % length_B];
do{
y += ctx->s[x] + *kptr++;
if(x==length_B){
kptr = key;
}
y &= 0xff;
/* ctx->s[y] <--> ctx->s[x] */
t = ctx->s[y];
ctx->s[y] = ctx->s[x];
ctx->s[x] = t;
}
}while(++x);
ctx->i = ctx->j = 0;
}

View File

@ -0,0 +1,51 @@
MCU_TARGET = atmega128
F_CPU = 14745600
OPTIMIZE = -Os # -Os
DEBUG = -gdwarf-2
WARNING = -pedantic -Wall -Wstrict-prototypes
PROGRAMMER = jtagmkII
PROG_PORT = usb
DEFS = -D$(call uc, $(MCU_TARGET)) -DF_CPU=$(F_CPU)
FLASHCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER) -U flash:w:# no space at the end
#FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end
RESETCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER)
DEP_DIR = deps/
TEST_DIR = test/
BIN_DIR = bin/
TESTSRC_DIR = test_src/
#uisp -dprog=bsd -dlpt=/dev/parport1 --upload if=$(PRG).hex
ERASECMD =
TESTPORT = /dev/ttyUSB0
TESTPORTBAUDR = 115200
TESTLOG_DIR = testlog/#
TESTPREFIX = nessie-
SPEEDTOOL = host/get_performance.rb
SPEEDLOG_DIR = speed_log/
SPEEDPREFIX =
SPEEDCMD = performance
SIZE_DIR = size_log/#
LIST_DIR = listings/#
STAT_DIR = stats/#
AUTOASM_DIR = autoasm/#
AUTOASM_OPT = -S
CC = avr-gcc
CSTD = c99
override CFLAGS_A = -MMD -MF$(DEP_DIR)$(patsubst %.o,%.d,$(notdir $(1))) $(DEBUG) $(WARNING) -std=$(CSTD) $(OPTIMIZE) -mmcu=$(MCU_TARGET) $(DEFS)
override CFLAGS = -MMD -MF$(DEP_DIR)$(patsubst %.o,%.d,$(notdir $@)) $(DEBUG) $(WARNING) -std=$(CSTD) $(OPTIMIZE) -mmcu=$(MCU_TARGET) $(DEFS)
override LDFLAGS = -gdwarf-2 -Wl,-Map,
override ASFLAGS = -mmcu=$(MCU_TARGET) -Wa,--gdwarf-2
SIZESTAT_FILE = sizestats.txt
OBJCOPY = avr-objcopy
OBJDUMP = avr-objdump
SIZE = avr-size
READELF = readelf
RUBY = ruby
GET_TEST = host/get_test.rb
MAKE = make
MAKE2GRAPH = ~/bin/make2graph.rb
TWOPI = twopi

View File

@ -1,13 +1,15 @@
MCU_TARGET = atmega644
F_CPU = 20000000
OPTIMIZE = -Os # -Os
DEBUG = -gdwarf-2
WARNING = -pedantic -Wall -Wstrict-prototypes
PROGRAMMER = avr911
DEFS = -D$(call uc, $(MCU_TARGET))
FLASHCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c $(PROGRAMMER) -U flash:w:# no space at the end
PROG_PORT = /dev/ttyUSB0
DEFS = -D$(call uc, $(MCU_TARGET)) -DF_CPU=$(F_CPU)
FLASHCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER) -U flash:w:# no space at the end
#FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end
RESETCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c $(PROGRAMMER)
RESETCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER)
DEP_DIR = deps/
TEST_DIR = test/
BIN_DIR = bin/

View File

@ -48,7 +48,7 @@ void xtea_dummy_dec(void* block, void* key){
const bcdesc_t xtea_desc PROGMEM = {
BCDESC_TYPE_BLOCKCIPHER,
BC_INIT_TYPE_2,
BC_INIT_TYPE_1,
xtea_str,
16,
64,

View File

@ -5,7 +5,9 @@ ALGO_NAME := A51
STREAM_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := A5_1.o
$(ALGO_NAME)_TEST_BIN := main-a5_1-test.o nessie_stream_test.o nessie_common.o $(CLI_STD)
$(ALGO_NAME)_DIR := a51/
$(ALGO_NAME)_INCDIR := memxor/ scal/
$(ALGO_NAME)_TEST_BIN := main-a5_1-test.o $(CLI_STD) $(SCAL_STD)
$(ALGO_NAME)_NESSIE_TEST := "nessie"
$(ALGO_NAME)_PERFORMANCE_TEST := "performance"

View File

@ -157,7 +157,7 @@
.macro CLEAR_BIT_IO io:req bit:req reg:req
.if _SFR_IO_REG_P(\io)
cbi _SFR_IO_ADDR(\io), bit
cbi _SFR_IO_ADDR(\io), \bit
.else
lds \reg, _SFR_MEM_ADDR(\io)
andi \reg, ~_BV(\bit)
@ -167,7 +167,7 @@
.macro SET_BIT_IO io:req bit:req reg:req
.if _SFR_IO_REG_P(\io)
sbi _SFR_IO_ADDR(\io),bit
sbi _SFR_IO_ADDR(\io), \bit
.else
lds \reg, _SFR_MEM_ADDR(\io)
ori \reg, _BV(\bit)

View File

@ -19,7 +19,7 @@
#ifndef __CONFIG_H__
#define __CONFIG_H__
#include <avr/io.h>
#define F_CPU 20000000
//#define F_CPU 20000000
// #define F_CPU 16000000 /* oscillator-frequency in Hz */
// #define F_CPU 14745600

View File

@ -415,6 +415,10 @@ ram_read_block:
* param addr: r20:r23
* param length: r18
*/
#ifdef EEWE
# define EEPE EEWE
#endif
.global ee_read_block
ee_read_block:
movw r26, r24

View File

@ -54,6 +54,25 @@ void testrun_performance_xtea(void){
bcal_performance_multiple(algolist);
}
void test_xtea(void){
uint8_t key[16];
uint8_t data[8];
memset(key, 0, 16);
key[0] = 0x80;
memset(data, 0, 8);
cli_putstr_P(PSTR("\r\n*** XTEA test ***\r\n key: "));
cli_hexdump(key, 16);
cli_putstr_P(PSTR("\r\n plain: "));
cli_hexdump(data, 8);
xtea_enc(data, data, key);
cli_putstr_P(PSTR("\r\n crypt: "));
cli_hexdump(data, 8);
xtea_dec(data, data, key);
cli_putstr_P(PSTR("\r\n plain: "));
cli_hexdump(data, 8);
}
/*****************************************************************************
* main *
*****************************************************************************/
@ -65,7 +84,7 @@ const char echo_str[] PROGMEM = "echo";
cmdlist_entry_t cmdlist[] PROGMEM = {
{ nessie_str, NULL, testrun_nessie_xtea},
{ test_str, NULL, testrun_nessie_xtea},
{ test_str, NULL, test_xtea},
{ performance_str, NULL, testrun_performance_xtea},
{ echo_str, (void*)1, (void_fpt)echo_ctrl},
{ NULL, NULL, NULL}

View File

@ -1,6 +1,6 @@
/* uart_defs.h */
/*
This file is part of the AVR-uart_ni.
This file is part of the AVR-uart_i.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
@ -16,19 +16,30 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file uart_defs.h
* \email daniel.otte@rub.de
* \author Daniel Otte
* \date 2009-07-24
* \license GPLv3 or later
* \addtogroup uart_config
* \brief definitions for uart configuration
* \details
* This file declares some macros for use in uart configuration
*/
/*@{*/
#ifndef UART_DEFS_H_
#define UART_DEFS_H_
#define UART_PARATY_NONE 0 /** \def UART_PARATY_NONE define no paraty */
#define UART_PARATY_EVEN 2 /** \def UART_PARATY_EVEN define even paraty */
#define UART_PARATY_ODD 3 /** \def UART_PARATY_ODD define odd paraty */
#define UART_STOPBITS_1 0 /** \def UART_STOPBITS_1 define 1 stop bit */
#define UART_STOPBITS_2 1 /** \def UART_STOPBITS_2 define 2 stop bits */
#define UART_DATABITS_5 0 /** \def UART_DATABITS_5 define 5 data bits */
#define UART_DATABITS_6 1 /** \def UART_DATABITS_6 define 6 data bits */
#define UART_DATABITS_7 2 /** \def UART_DATABITS_7 define 7 data bits */
#define UART_DATABITS_8 3 /** \def UART_DATABITS_8 define 8 data bits */
#define UART_DATABITS_9 7 /** \def UART_DATABITS_9 define 9 data bits */
#define UART_PARATY_NONE 0 /**< define no paraty */
#define UART_PARATY_EVEN 2 /**< define even paraty */
#define UART_PARATY_ODD 3 /**< define odd paraty */
#define UART_STOPBITS_1 0 /**< define 1 stop bit */
#define UART_STOPBITS_2 1 /**< define 2 stop bits */
#define UART_DATABITS_5 0 /**< define 5 data bits */
#define UART_DATABITS_6 1 /**< define 6 data bits */
#define UART_DATABITS_7 2 /**< define 7 data bits */
#define UART_DATABITS_8 3 /**< define 8 data bits */
#define UART_DATABITS_9 7 /**< define 9 data bits */
/*@}*/
#endif /* UART_DEFS_H_ */

View File

@ -19,13 +19,54 @@
/**
* \file uart_i.h
* \email daniel.otte@rub.de
* \author Daniel Otte
* \author Daniel Otte
* \date 2009-07-24
* \license GPLv3 or later
* \ingroup uart_i
* \brief declaration for non-interrupt uart
* \defgroup uart_i
* \brief declaration for interrupt based uart
* \details
* This implementation of the uart-interface of AVR microcontrollers uses the
* interrup architecture and can be used to handle serial communication in the
* background.
* The uart is configured at compile-time by some special defines starting with
* \a UART0_ for configuring the first uart and \a UART1_ for the second.
* Some settings use symbolic values defined in uart_defs.h .
* The following options are available:
* - \a *_I enables the interrupt based driver for this uart
* - \a 0 disables driver
* - \a 1 enables driver
* - \a *_BAUD_RATE sets the baudrate for this uart (value is the baudrate)
* - \a *_STOPBITS sets the amount of stop bits for this uart
* - \a UART_STOPBITS_1 for one stop bit
* - \a UART_STOPBITS_2 for two stop bits
* - \a *_DATABITS sets the amount of data bits for this uart
* - \a UART_DATABITS_5 for five data bits
* - \a UART_DATABITS_6 for six data bits
* - \a UART_DATABITS_7 for seven data bits
* - \a UART_DATABITS_8 for eight data bits
* - \a *_PARATY sets the mode for paraty calculation for this uart
* - \a UART_PARATY_NONE ignore paraty
* - \a UART_PARATY_ODD odd paraty
* - \a UART_PARATY_EVEN even paraty
* - \a *_RXBUFFER_SIZE size of the recieve buffer in bytes
* - \a *_TXBUFFER_SIZE size of the transmitt buffer in bytes
* - \a *_SWFLOWCTRL enable/diasable software flow control (via XON & XOFF)
* - \a 0 disable software flow control
* - \a 1 enable software flow control
* - \a *_THRESH_HIGH set upper limit for the rx buffer, which causes an XOFF
* to be send when crossed (only relevant if software flow
* control is enabled)
* - \a *_THRESH_LOW set lower limit for the rx buffer, which causes an XON to
* be send when crossed and an XOFF has been send previously
* (only relevant if software flow control is enabled)
* - \a *_HOOK enable/disable implementation of the hook feature
* (\ref uart0_sethook())
* - \a 0 disable hook feature
* - \a 1 enable hook feature
*
*/
/*@{*/
#ifndef UART_I_H_
#define UART_I_H_
@ -33,61 +74,82 @@
#include "circularbytebuffer.h"
#include <stdint.h>
/**
* \brief storage type for uart0 context
*
* This type is used to store uart0 specific global variables.
* It contains a pointer to the buffer instances and when neccessary
* a pointer to the hook function and an indicator if the hook is
* currently executed.
* If software flow control is enabled it also contains flags for flow control.
*/
typedef struct{
circularbytebuffer_t rxb;
circularbytebuffer_t txb;
circularbytebuffer_t rxb; /**< recieve buffer */
circularbytebuffer_t txb; /**< transmitt buffer*/
#if UART0_HOOK
void(*hook)(uint8_t);
volatile uint8_t hook_running;
void(*hook)(uint8_t); /**< pointer to the hook function */
volatile uint8_t hook_running; /**< flag indicating if the hook is running */
#endif
#if UART0_SWFLOWCTRL
volatile uint8_t txon;
volatile uint8_t rxon;
volatile uint8_t txon; /**< flag indicating if we are allowed to send data */
volatile uint8_t rxon; /**< flag indicating if we have send an \a XOFF */
#endif
} uart0_ctx_t;
/**
* \brief storage type for uart1 context
*
* This type is used to store uart1 specific global variables.
* It contains a pointer to the buffer instances and when neccessary
* a pointer to the hook function and an indicator if the hook is
* currently executed.
* If software flow control is enabled it also contains flags for flow control.
*/
typedef struct{
circularbytebuffer_t rxb;
circularbytebuffer_t txb;
circularbytebuffer_t rxb; /**< recieve buffer */
circularbytebuffer_t txb; /**< transmitt buffer */
#if UART1_HOOK
void(*hook)(uint8_t);
volatile uint8_t hook_running;
void(*hook)(uint8_t); /**< pointer to the hook function */
volatile uint8_t hook_running; /**< flag indicating if the hook is running */
#endif
#if UART1_SWFLOWCTRL
volatile uint8_t txon;
volatile uint8_t rxon;
volatile uint8_t txon; /**< flag indicating if we are allowed to send data */
volatile uint8_t rxon; /**< flag indicating if we have send an \a XOFF */
#endif
} uart1_ctx_t;
#if UART0_I
/** \fn uart0_init(void)
/**
* \brief initialize uart0.
* This function initializes the first uart according to the parameter specifyed
*
* This function initializes the first uart according to the parameter specified
* in config.h .
*/
void uart0_init(void);
/** \fn uart0_putc(uint16_t)
/**
* \brief send data through uart0.
* This function sends data through the first uart
*
* This function sends data through the first uart
* (the data size is debfined in config.h).
* \param c data to send
*/
void uart0_putc(uint16_t c);
/** \fn uart0_getc(void)
/**
* \brief read data from uart0.
* This function reads data from the first uart
*
* This function reads data from the first uart
* (the data size is debfined in config.h).
* \return data recived by uart0
*/
uint16_t uart0_getc(void);
/** \fn uart0_dataavail(void)
/**
* \brief checks if data is available.
*
*
* This function checks the state of the input buffer of uart0 and
* returns if data is available or not.
* \return zero if no data is available else a value different from zero is returned
@ -95,6 +157,20 @@ uint16_t uart0_getc(void);
uint8_t uart0_dataavail(void);
#if UART0_HOOK
/**
* \brief sets the hook for uart0.
*
* This function modifys the way the software handels incomming data.
* When the hook is set to \a NULL (which is the default) incomming data is buffered
* in a special ringbuffer and read by \ref uart0_getc(). If the hook is set to a
* different value, this value is interpret as a function pointer. The hook (the
* function where the function pointer points to) is called with the recieved data
* as single parameter. Any value returned by the hook is discarded.
* \note If the hook is set \ref uart0_getc() will not return, as the
* ringbuffer is bypassed.
* \param fpt pointer to thae handler function for recieved data
*/
void uart0_sethook(void(*fpt)(uint8_t));
#endif
@ -102,38 +178,58 @@ void uart0_sethook(void(*fpt)(uint8_t));
#endif /* UART0_I */
#if UART1_I
/** \fn uart1_init(void)
/**
* \brief initialize uart1.
*
* This function initializes the second uart according to the parameter specifyed
* in config.h .
*/
void uart1_init(void);
/** \fn uart1_putc(uint16_t)
/**
* \brief send data through uart1.
* This function sends data through the second uart
*
* This function sends data through the second uart
* (the data size is debfined in config.h).
* \param c data to send
*/
void uart1_putc(uint16_t c);
/** \fn uart1_getc(void)
/**
* \brief read data from uart1.
* This function reads data from the second uart
*
* This function reads data from the second uart
* (the data size is debfined in config.h).
* \return data recived by uart1
*/
uint16_t uart1_getc(void);
/** \fn uart1_dataavail(void)
/**
* \brief checks if data is available.
*
* This function checks the state of the input buffer of uart1 and
* returns if data is available or not.
* \return zero if no data is available else a value different from zero is returned
*/
uint8_t uart1_dataavail(void);
void uart0_sethook(void(*fpt)(uint8_t));
/**
* \brief sets the hook for uart1.
*
* This function modifys the way the software handels incomming data.
* When the hook is set to \a NULL (which is the default) incomming data is buffered
* in a special ringbuffer and read by \ref uart1_getc(). If the hook is set to a
* different value, this value is interpret as a function pointer. The hook (the
* function where the function pointer points to) is called with the recieved data
* as single parameter. Any value returned by the hook is discarded.
* \note If the hook is set \ref uart1_getc() will not return, as the
* ringbuffer is bypassed.
* \param fpt pointer to thae handler function for recieved data
*/
void uart1_sethook(void(*fpt)(uint8_t));
#endif
/*@}*/
#endif /* UART_I_H_ */

View File

@ -1,7 +1,7 @@
/* xtea-asm.S */
/* xtea-enc.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This file is part of the ARM-Crypto-Lib.
Copyright (C) 2006-2011 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -16,570 +16,221 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* xtea-asm.S
* Author: Daniel Otte
* Date: 2006-06-06
* License: GPLv3 or later
* Implementation of XTEA for AVR
* include xtea.h in your C-Project to use this functions.
*/
V01 = 2
V02 = 3
V03 = 4
V04 = 5
V11 = 6
V12 = 7
V13 = 8
V14 = 9
Accu1 = 14
Accu2 = 15
Accu3 = 16
Accu4 = 17
Sum1 = 18
Sum2 = 19
Sum3 = 20
Sum4 = 21
Func1 = 22
Func2 = 23
Func3 = 24
Func4 = 25
C = 28 /* der kleine Zaehler fuer zwischendurch */
#include "avr-asm-macros.S"
B0 = 4
B1 = 5
B2 = 6
B3 = 7
A0 = 8
A1 = 9
A2 = 10
A3 = 11
V10 = 12
V11 = 13
V12 = 14
V13 = 15
V00 = 16
V01 = 17
V02 = 18
V03 = 19
S0 = 20
S1 = 21
S2 = 22
S3 = 23
xchg_V0V1:
movw r26, V10
movw V10, V00
movw V00, r26
movw r26, V12
movw V12, V02
movw V02, r26
ret
eor_AB:
eor A0, B0
eor A1, B1
eor A2, B2
eor A3, B3
ret
g_func:
movw A0, V10
movw A2, V12
movw B0, V10
movw B2, V12
ldi r24, 4
10:
lsl A0
rol A1
rol A2
rol A3
dec r24
brne 10b
ldi r24, 5
10:
lsr B3
ror B2
ror B1
ror B0
dec r24
brne 10b
rcall eor_AB
add A0, V10
adc A1, V11
adc A2, V12
adc A3, V13
ret
sum_plus_k:
andi r24, (3<<2)
movw r26, r30
add r26, r24
adc r27, r1
ld B0, X+
ld B1, X+
ld B2, X+
ld B3, X+
add B0, S0
adc B1, S1
adc B2, S2
adc B3, S3
rcall eor_AB
brtc 20f
add V00, A0
adc V01, A1
adc V02, A2
adc V03, A3
ret
20: sub V00, A0
sbc V01, A1
sbc V02, A2
sbc V03, A3
ret
main1:
rcall g_func
mov r24, S0
lsl r24
lsl r24
rcall sum_plus_k
ret
main2:
rcall xchg_V0V1
rcall g_func
mov r24, S1
lsr r24
rcall sum_plus_k
rcall xchg_V0V1
ret
.global xtea_enc
; == xtea_enc ==
; xtea encrytion function
; param1: 16-bit pointer to destination for encrypted block
; given in r25,r24
; param2: 16-bit pointer to the block (64-bit) which is to encrypt
; given in r23,r22
; param3: 16-bit pointer to the key (128-bit)
; given in r21,r20
;
xtea_enc:
/* prolog */
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r14
push r15
push r16
push r17
push r28
/* load the block */
movw r26, r22 /* X points to block */
movw r30, r20 /* Z points to key */
ld V01, X+
ld V02, X+
ld V03, X+
ld V04, X+
ld V11, X+
ld V12, X+
ld V13, X+
ld V14, X+
; push r25
; push r24
movw r26, r24 /* X points to destination */
ldi Func1, 32
mov r0, Func1 /* r0 is cycle-counter */
clr Sum1
clr Sum2
movw Sum3, Sum1
clt
1:
movw Accu1, V11
movw Accu3, V13
ldi C, 4
2: lsl Accu1
rol Accu2
rol Accu3
rol Accu4
dec C
brne 2b /* Accu == V1 << 4 */
movw Func1, V11
movw Func3, V13
ldi C, 5
3: lsr Func4
ror Func3
ror Func2
ror Func1
dec C
brne 3b /* Func == V1 >> 5 */
eor Accu1, Func1
eor Accu2, Func2
eor Accu3, Func3
eor Accu4, Func4
add Accu1, V11
adc Accu2, V12
adc Accu3, V13
adc Accu4, V14 /* Accu == ( (V1<<4)^(V1>>5) ) + V1 */
brtc 4f
mov C, Sum2
lsr C
andi C,(0x03 <<2)
clt
rjmp 5f
4:
mov C, Sum1 /* calc key offset */
andi C, 0x03
lsl C
lsl C
set
5:
add r30, C
adc r31, r1
ld Func1, Z
ldd Func2, Z+1
ldd Func3, Z+2
ldd Func4, Z+3 /* Func = key[sum & 3] */
sub r30, C
sbci r31, 0
add Func1, Sum1
adc Func2, Sum2
adc Func3, Sum3
adc Func4, Sum4
eor Accu1, Func1
eor Accu2, Func2
eor Accu3, Func3
eor Accu4, Func4 /* Accu = ((V1<<4 ^ V1>>5) + V1) ^ (sum + key[sum&3]) */
add Accu1, V01
adc Accu2, V02
adc Accu3, V03
adc Accu4, V04
movw V01, V11
movw V03, V13
movw V11, Accu1
movw V13, Accu3
/* sum += delta */ /* delta == 0x9E3779B9 */
brtc 6f
ldi C, 0xB9
add Sum1, C
ldi C, 0x79
adc Sum2, C
ldi C, 0x37
adc Sum3, C
ldi C, 0x9E
adc Sum4, C
rjmp 1b
6:
dec r0
breq 7f
rjmp 1b
7:
/* write block back */
; pop r26
; pop r27
st X+, V01
st X+, V02
st X+, V03
st X+, V04
st X+, V11
st X+, V12
st X+, V13
st X+, V14
/* epilog */
pop r28
pop r17
pop r16
pop r15
pop r14
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
ret
xtea_intro:
clr r27
ldi r26, 4
ldi r30, 14
10:
ld r0, X+
push r0
dec r30
brne 10b
push r24
push r25
movw r30, r20
/* load block */
movw r26, r22
ld V00, X+
ld V01, X+
ld V02, X+
ld V03, X+
ld V10, X+
ld V11, X+
ld V12, X+
ld V13, X+
ldi r24, 32
mov r0, r24
brtc xtea_dec_start
clr S0
clr S1
movw S2, S0
10:
rcall main1
subi S0, 0x47
sbci S1, 0x86
sbci S2, 0xC8
sbci S3, 0x61
rcall main2
dec r0
brne 10b
/* store back */
xtea_enc_exit:
pop r27
pop r26
st X+, V00
st X+, V01
st X+, V02
st X+, V03
st X+, V10
st X+, V11
st X+, V12
st X+, V13
clr r27
ldi r26, 18
ldi r24, 14
10:
pop r0
st -X, r0
dec r24
brne 10b
ret
/******************************************************************************/
/******************************************************************************/
/******************************************************************************/
/******************************************************************************/
;####################################################################
/* #endif TWO_IN_ONE */
/* #ifdef TWO_IN_ONE */
/* now we use the same base-structure for enc- and decryption
to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
this is ok, since even the larges atmel today has "only" 8k of ram,
but you shouldn't use this feature while using external ram.
*/
.global xtea_enc
ori r21, 0x80
.global xtea_dec
; == xtea_dec ==
; xtea decrytion function
; param1: 16-bit pointer to destination for decrypted block
; given in r25,r24
; param2: 16-bit pointer to the block (64-bit) which is to derypt
; given in r23,r22
; param3: 16-bit pointer to the key (128-bit)
; given in r21,r20
;
/*
void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
uint32_t v0=v[0], v1=v[1], i;
uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
for(i=0; i<32; i++) {
v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
sum -= delta;
v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
}
dest[0]=v0; dest[1]=v1;
}
*/
xtea_dec:
/* prolog */
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r14
push r15
push r16
push r17
push r28
/* load the block */
movw r26, r22 /* Z points to block */
movw r30, r20 /* X points to key */
ld V01, X+
ld V02, X+
ld V03, X+
ld V04, X+
ld V11, X+
ld V12, X+
ld V13, X+
ld V14, X+
movw r26, r24 /* Z points to destination */
ldi Sum1, 32
mov r0, Sum1 /* r1 is cycle-counter */
ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
ldi Sum2, 0x37
ldi Sum3, 0xEF
ldi Sum4, 0xC6
clt
rjmp xtea_intro
xtea_dec_start:
ldi S0, 0x20 /* sum = 0xC6EF3720 */
ldi S1, 0x37
ldi S2, 0xEF
ldi S3, 0xC6
1:
movw Accu1, V01
movw Accu3, V03
ldi C, 4
2: lsl Accu1
rol Accu2
rol Accu3
rol Accu4
dec C
brne 2b /* Accu == V0 << 4 */
10:
rcall main2
subi S0, 0xB9
sbci S1, 0x79
sbci S2, 0x37
sbci S3, 0x9E
rcall main1
movw Func1, V01
movw Func3, V03
ldi C, 5
3: lsr Func4
ror Func3
ror Func2
ror Func1
dec C
brne 3b /* Func == V0 >> 5 */
eor Accu1, Func1
eor Accu2, Func2
eor Accu3, Func3
eor Accu4, Func4
add Accu1, V01
adc Accu2, V02
adc Accu3, V03
adc Accu4, V04 /* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
brts 4f
mov C, Sum2
lsr C
andi C,(0x03 <<2)
set
rjmp 5f
4:
mov C, Sum1 /* calc key offset */
andi C, 0x03
lsl C
lsl C
clt
5:
add r30, C
adc r31, r1
ld Func1, Z
ldd Func2, Z+1
ldd Func3, Z+2
ldd Func4, Z+3 /* Func = key[sum & 3] */
sub r30, C
sbci r31, 0
add Func1, Sum1
adc Func2, Sum2
adc Func3, Sum3
adc Func4, Sum4
eor Accu1, Func1
eor Accu2, Func2
eor Accu3, Func3
eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3]) */
sub V11, Accu1
sbc V12, Accu2
sbc V13, Accu3
sbc V14, Accu4
movw Accu1, V01
movw Accu3, V03
movw V01, V11
movw V03, V13
movw V11, Accu1
movw V13, Accu3
/* sum += delta */ /* delta == 0x9E3779B9 */
brtc 6f
subi Sum1, 0xB9
sbci Sum2, 0x79
sbci Sum3, 0x37
sbci Sum4, 0x9E
rjmp 1b
6:
dec r0
breq 7f
rjmp 1b
7:
/* write block back */
st X+, V01
st X+, V02
st X+, V03
st X+, V04
st X+, V11
st X+, V12
st X+, V13
st X+, V14
/* epilog */
pop r28
pop r17
pop r16
pop r15
pop r14
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
ret
/* #endif */
brne 10b
/* store back */
rjmp xtea_enc_exit
;####################################################################
#ifdef TWO_IN_ONE
/* now we use the same base-structure for enc- and decryption
to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
this is ok, since even the larges atmel today has "only" 8k of ram,
but you shouldn't use this feature while using external ram.
*/
.global xtea_enc
ori r21, 0x80
.global xtea_dec
; == xtea_dec ==
; xtea decrytion function
; param1: 16-bit pointer to destination for decrypted block
; given in r25,r24
; param2: 16-bit pointer to the block (64-bit) which is to derypt
; given in r23,r22
; param3: 16-bit pointer to the key (128-bit)
; given in r21,r20
;
/*
void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
uint32_t v0=v[0], v1=v[1], i;
uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
for(i=0; i<32; i++) {
v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
sum -= delta;
v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
}
dest[0]=v0; dest[1]=v1;
}
*/
xtea_dec:
/* prolog */
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r14
push r15
push r16
push r17
push r28
/* set T-bit if we are going to encrypt, clear otherwise */
bst r21, 7
andi r21, 0x7f /* fix r21:r22 to a real addr */
/* load the block */
movw r26, r22 /* Z points to block */
movw r30, r20 /* X points to key */
ld V01, X+
ld V02, X+
ld V03, X+
ld V04, X+
ld V11, X+
ld V12, X+
ld V13, X+
ld V14, X+
movw r26, r24 /* Z points to destination */
ldi Sum1, 32
mov r0, Sum1 /* r1 is cycle-counter */
ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
ldi Sum2, 0x37
ldi Sum3, 0xEF
ldi Sum4, 0xC6
clt
1:
movw Accu1, V01
movw Accu3, V03
ldi C, 4
2: lsl Accu1
rol Accu2
rol Accu3
rol Accu4
dec C
brne 2b /* Accu == V0 << 4 */
movw Func1, V01
movw Func3, V03
ldi C, 5
3: lsr Func4
ror Func3
ror Func2
ror Func1
dec C
brne 3b /* Func == V0 >> 5 */
eor Accu1, Func1
eor Accu2, Func2
eor Accu3, Func3
eor Accu4, Func4
add Accu1, V01
adc Accu2, V02
adc Accu3, V03
adc Accu4, V04 /* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
brts 4f
mov C, Sum2
lsr C
andi C,(0x03 <<2)
set
rjmp 5f
4:
mov C, Sum1 /* calc key offset */
andi C, 0x03
lsl C
lsl C
clt
5:
add r30, C
adc r31, r1
ld Func1, Z
ldd Func2, Z+1
ldd Func3, Z+2
ldd Func4, Z+3 /* Func = key[sum & 3] */
sub r30, C
sbci r31, 0
add Func1, Sum1
adc Func2, Sum2
adc Func3, Sum3
adc Func4, Sum4
eor Accu1, Func1
eor Accu2, Func2
eor Accu3, Func3
eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3]) */
sub V11, Accu1
sbc V12, Accu2
sbc V13, Accu3
sbc V14, Accu4
movw Accu1, V01
movw Accu3, V03
movw V01, V11
movw V03, V13
movw V11, Accu1
movw V13, Accu3
/* sum += delta */ /* delta == 0x9E3779B9 */
brtc 6f
subi Sum1, 0xB9
sbci Sum2, 0x79
sbci Sum3, 0x37
sbci Sum4, 0x9E
rjmp 1b
6:
dec r0
breq 7f
rjmp 1b
7:
/* write block back */
st X+, V01
st X+, V02
st X+, V03
st X+, V04
st X+, V11
st X+, V12
st X+, V13
st X+, V14
/* epilog */
pop r28
pop r17
pop r16
pop r15
pop r14
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
ret
#endif

View File

@ -24,7 +24,6 @@
*/
#include <stdint.h>
void xtea_enc(void* dest, const void* v, const void* k) {
uint8_t i;