avr-crypto-lib/ubi1024_asm.S

329 lines
6.3 KiB
ArmAsm

/* ubi1024_asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* \author Daniel Otte
* \email daniel.otte@rub.de
* \date 2009-03-16
* \license GPLv3 or later
*/
#include "avr-asm-macros.S"
/******************************************************************************/
/*
void ubi1024_init(ubi1024_ctx_t* ctx, const void* g, uint8_t type){
memset(ctx->tweak, 0, 15);
ctx->tweak[15] = 0x40+type;
memcpy(ctx->g, g, UBI1024_BLOCKSIZE_B);
}
*/
/*
* param ctx: r24:r25
* param g: r22:r23
* param type: r20
*/
.global ubi1024_init
ubi1024_init:
movw r26, r24
ldi r21, 15
1: st X+, r1
dec r21
brne 1b
ori r20, 0x40
st X+, r20
ldi r21, 128
movw r30, r22
2: ld r20, Z+
st X+, r20
dec r21
brne 2b
ret
/******************************************************************************/
/*
void ubi1024_ctx2hash(void* dest, const ubi1024_ctx_t* ctx){
memcpy(dest, ctx->g, UBI1024_BLOCKSIZE_B);
}
*/
/*
* param dest: r24:r24
* param ctx: r22:r23
*/
.global ubi1024_ctx2hash
ubi1024_ctx2hash:
movw r26, r24
movw r30, r22
adiw r30, 16
ldi r22, 128
1: ld r23, Z+
st X+, r23
dec r22
brne 1b
ret
/******************************************************************************/
/*
void ubi1024_nextBlock(ubi1024_ctx_t* ctx, const void* block){
threefish1024_ctx_t tfctx;
((uint64_t*)(ctx->tweak))[0] += UBI1024_BLOCKSIZE_B;
threefish1024_init(ctx->g, ctx->tweak, &tfctx);
memcpy(ctx->g, block, UBI1024_BLOCKSIZE_B);
threefish1024_enc(ctx->g, &tfctx);
memxor(ctx->g, block, UBI1024_BLOCKSIZE_B);
ctx->tweak[15] &= (uint8_t)~0x40;
}
*/
/*
* param ctx: r24:r25
* param block: r22:r23
*/
CTX0 = 2
CTX1 = 3
BLOCK0 = 4
BLOCK1 = 5
TFCTX0 = 6
TFCTX1 = 7
.global ubi1024_nextBlock
ubi1024_nextBlock:
stack_alloc_large 20*8
push_range 2, 7
adiw r30, 1 /* Z points to tfctx */
movw TFCTX0, r30
movw CTX0, r24
movw BLOCK0, r22
movw r26, r24
/* add BLOCKSIZE_B (128) to tweak */
ldi r25, 128
ld r24, X
add r24, r25
st X+, r24
ldi r25, 11
1: ld r24, X
adc r24, r1
st X+, r24
dec r25
brne 1b
/* call threefish1024_init */
movw r24, CTX0
adiw r24, 16
movw r22, CTX0
movw CTX0, r24 /* CTX points to ctx->g */
movw r20, TFCTX0
rcall threefish1024_init
/* copy block to ctx->g */
movw r26, CTX0
movw r30, BLOCK0
ldi r25, 128
1: ld r24, Z+
st X+, r24
dec r25
brne 1b
/* call threefish1024_enc */
movw r24, CTX0
movw r22, TFCTX0
rcall threefish1024_enc
/* xor block into ctx->g */
movw r26, BLOCK0
movw r30, CTX0
ldi r25, 128
1: ld r24, X+
ld r23, Z
eor r23, r24
st Z+, r23
dec r25
brne 1b
/* clear 'first' bit in tweak */
sbiw r30, 1+2
sbiw r30, 63
sbiw r30, 63
ld r24, Z
andi r24, ~0x40
st Z, r24
exit:
pop_range 2, 7
stack_free_large2 20*8
ret
/******************************************************************************/
/*
void ubi1024_lastBlock(ubi1024_ctx_t* ctx, const void* block, uint16_t length_b){
threefish1024_ctx_t tfctx;
while(length_b>UBI1024_BLOCKSIZE){
ubi1024_nextBlock(ctx, block);
block = (uint8_t*)block + UBI1024_BLOCKSIZE_B;
length_b -= UBI1024_BLOCKSIZE;
}
ctx->tweak[15] |= 0x80;
((uint64_t*)(ctx->tweak))[0] += (length_b+7)/8;
if(length_b & 0x07)
ctx->tweak[14] |= 0x80;
threefish1024_init(ctx->g, ctx->tweak, &tfctx);
memset(ctx->g, 0, UBI1024_BLOCKSIZE_B);
memcpy(ctx->g, block, (length_b+7)/8);
if(length_b & 0x07)
ctx->g[(length_b+7)/8-1] |= 0x80>>(length_b&7);
threefish1024_enc(ctx->g, &tfctx);
memxor(ctx->g, block, (length_b+7)/8);
if(length_b & 0x07){
ctx->g[((length_b+7)/8)-1] ^= 0x80>>(length_b&7);
}
}
*/
/*
* param ctx: r24:r25
* param block: r22:r23
* param ength_b: r20:r21
*/
MASK_B = 8
LEN_B = 9
TFCTX0 = 10
TFCTX1 = 11
CTX0 = 12
CTX1 = 13
BLOCK0 = 14
BLOCK1 = 15
LENGTH0 = 16
LENGTH1 = 17
.global ubi1024_lastBlock
ubi1024_lastBlock:
/* run nextBlock for preceding blocks*/
push_range 8, 17
movw CTX0, r24
movw BLOCK0, r22
movw LENGTH0, r20
1: cpi LENGTH1, 5
brlo 2f
movw r24, CTX0
movw r22, BLOCK0
rcall ubi1024_nextBlock
ldi r25, 128
add BLOCK0, r25
adc BLOCK1, r1
subi LENGTH1, 4
rjmp 1b
2: cpi LENGTH1, 4
brlo 3f
tst LENGTH0
breq 3f
movw r24, CTX0
movw r22, BLOCK0
rcall ubi1024_nextBlock
ldi r25, 128
add BLOCK0, r25
adc BLOCK1, r1
subi LENGTH1, 4
3: /* now the real fun */
stack_alloc_large 20*8
adiw r30, 1
movw TFCTX0, r30
/* calculate LEN_B */
movw r24, LENGTH0
adiw r24, 7
lsr r25
ror r24
lsr r25
ror r24
lsr r25
ror r24
mov LEN_B, r24
/* add length to tweak */
movw r30, CTX0
ld r24, Z
add r24, LEN_B
st Z+, r24
ldi r25, 11
1: ld r24, Z
adc r24, r1
st Z+, r24
dec r25
brne 1b
/* set 'final' bit*/
movw r30, CTX0
ldd r24, Z+15
ori r24, 0x80
std Z+15, r24
/* store in MASK_B if we do bit processing and set 'BitPad' bit*/
clr MASK_B
mov r24, LENGTH0
andi r24, 0x07
tst r24
breq 4f
ldd r25, Z+14
ori r25, 0x80
std Z+14, r25
ldi r25, 0x80
mov MASK_B, r25
1: lsr MASK_B
dec r24
brne 1b
4: /* call threefish1024_init*/
movw r24, CTX0
adiw r24, 16
movw r22, CTX0
movw CTX0, r24 /* CTX points at ctx->g */
movw r20, TFCTX0
rcall threefish1024_init
/* copy block to ctx->g */
movw r26, BLOCK0
movw r30, CTX0
mov r24, LEN_B
ldi r25, 128
sub r25, LEN_B
tst r24
1: breq 2f
ld r22, X+
st Z+, r22
dec r24
rjmp 1b
2: tst MASK_B
breq 29f
or r22, MASK_B
st -Z, r22
adiw r30, 1
29: tst r25
3: breq 4f
st Z+, r1
dec r25
rjmp 3b
4: /* call threefish1024_enc */
movw r24, CTX0
movw r22, TFCTX0
rcall threefish1024_enc
/* xor block into ctx->g */
movw r30, CTX0
movw r26, BLOCK0
tst LEN_B
5: breq 6f
ld r22, X+
ld r23, Z
eor r23, r22
st Z+, r23
dec LEN_B
rjmp 5b
6: tst MASK_B
breq 7f
eor r23, MASK_B
st -Z, r23
7: stack_free_large2 20*8
pop_range 8, 17
ret