329 lines
6.3 KiB
ArmAsm
329 lines
6.3 KiB
ArmAsm
/* ubi1024_asm.S */
|
|
/*
|
|
This file is part of the AVR-Crypto-Lib.
|
|
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
/*
|
|
* \author Daniel Otte
|
|
* \email daniel.otte@rub.de
|
|
* \date 2009-03-16
|
|
* \license GPLv3 or later
|
|
*/
|
|
|
|
#include "avr-asm-macros.S"
|
|
|
|
/******************************************************************************/
|
|
/*
|
|
void ubi1024_init(ubi1024_ctx_t *ctx, const void *g, uint8_t type){
|
|
memset(ctx->tweak, 0, 15);
|
|
ctx->tweak[15] = 0x40+type;
|
|
memcpy(ctx->g, g, UBI1024_BLOCKSIZE_B);
|
|
}
|
|
*/
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param g: r22:r23
|
|
* param type: r20
|
|
*/
|
|
.global ubi1024_init
|
|
ubi1024_init:
|
|
movw r26, r24
|
|
ldi r21, 15
|
|
1: st X+, r1
|
|
dec r21
|
|
brne 1b
|
|
ori r20, 0x40
|
|
st X+, r20
|
|
ldi r21, 128
|
|
movw r30, r22
|
|
2: ld r20, Z+
|
|
st X+, r20
|
|
dec r21
|
|
brne 2b
|
|
ret
|
|
|
|
/******************************************************************************/
|
|
/*
|
|
void ubi1024_ctx2hash(void *dest, const ubi1024_ctx_t *ctx){
|
|
memcpy(dest, ctx->g, UBI1024_BLOCKSIZE_B);
|
|
}
|
|
*/
|
|
/*
|
|
* param dest: r24:r24
|
|
* param ctx: r22:r23
|
|
*/
|
|
.global ubi1024_ctx2hash
|
|
ubi1024_ctx2hash:
|
|
movw r26, r24
|
|
movw r30, r22
|
|
adiw r30, 16
|
|
ldi r22, 128
|
|
1: ld r23, Z+
|
|
st X+, r23
|
|
dec r22
|
|
brne 1b
|
|
ret
|
|
|
|
/******************************************************************************/
|
|
/*
|
|
void ubi1024_nextBlock(ubi1024_ctx_t *ctx, const void *block){
|
|
threefish1024_ctx_t tfctx;
|
|
((uint64_t*)(ctx->tweak))[0] += UBI1024_BLOCKSIZE_B;
|
|
threefish1024_init(ctx->g, ctx->tweak, &tfctx);
|
|
memcpy(ctx->g, block, UBI1024_BLOCKSIZE_B);
|
|
threefish1024_enc(ctx->g, &tfctx);
|
|
memxor(ctx->g, block, UBI1024_BLOCKSIZE_B);
|
|
ctx->tweak[15] &= (uint8_t)~0x40;
|
|
}
|
|
*/
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param block: r22:r23
|
|
*/
|
|
CTX0 = 2
|
|
CTX1 = 3
|
|
BLOCK0 = 4
|
|
BLOCK1 = 5
|
|
TFCTX0 = 6
|
|
TFCTX1 = 7
|
|
.global ubi1024_nextBlock
|
|
ubi1024_nextBlock:
|
|
stack_alloc_large 20*8
|
|
push_range 2, 7
|
|
adiw r30, 1 /* Z points to tfctx */
|
|
movw TFCTX0, r30
|
|
movw CTX0, r24
|
|
movw BLOCK0, r22
|
|
movw r26, r24
|
|
/* add BLOCKSIZE_B (128) to tweak */
|
|
ldi r25, 128
|
|
ld r24, X
|
|
add r24, r25
|
|
st X+, r24
|
|
ldi r25, 11
|
|
1: ld r24, X
|
|
adc r24, r1
|
|
st X+, r24
|
|
dec r25
|
|
brne 1b
|
|
/* call threefish1024_init */
|
|
movw r24, CTX0
|
|
adiw r24, 16
|
|
movw r22, CTX0
|
|
movw CTX0, r24 /* CTX points to ctx->g */
|
|
movw r20, TFCTX0
|
|
rcall threefish1024_init
|
|
/* copy block to ctx->g */
|
|
movw r26, CTX0
|
|
movw r30, BLOCK0
|
|
ldi r25, 128
|
|
1: ld r24, Z+
|
|
st X+, r24
|
|
dec r25
|
|
brne 1b
|
|
/* call threefish1024_enc */
|
|
movw r24, CTX0
|
|
movw r22, TFCTX0
|
|
rcall threefish1024_enc
|
|
/* xor block into ctx->g */
|
|
movw r26, BLOCK0
|
|
movw r30, CTX0
|
|
ldi r25, 128
|
|
1: ld r24, X+
|
|
ld r23, Z
|
|
eor r23, r24
|
|
st Z+, r23
|
|
dec r25
|
|
brne 1b
|
|
/* clear 'first' bit in tweak */
|
|
sbiw r30, 1+2
|
|
sbiw r30, 63
|
|
sbiw r30, 63
|
|
ld r24, Z
|
|
andi r24, ~0x40
|
|
st Z, r24
|
|
exit:
|
|
pop_range 2, 7
|
|
stack_free_large2 20*8
|
|
ret
|
|
|
|
/******************************************************************************/
|
|
/*
|
|
void ubi1024_lastBlock(ubi1024_ctx_t *ctx, const void *block, uint16_t length_b){
|
|
threefish1024_ctx_t tfctx;
|
|
while(length_b>UBI1024_BLOCKSIZE){
|
|
ubi1024_nextBlock(ctx, block);
|
|
block = (uint8_t*)block + UBI1024_BLOCKSIZE_B;
|
|
length_b -= UBI1024_BLOCKSIZE;
|
|
}
|
|
ctx->tweak[15] |= 0x80;
|
|
((uint64_t*)(ctx->tweak))[0] += (length_b+7)/8;
|
|
if(length_b & 0x07)
|
|
ctx->tweak[14] |= 0x80;
|
|
threefish1024_init(ctx->g, ctx->tweak, &tfctx);
|
|
memset(ctx->g, 0, UBI1024_BLOCKSIZE_B);
|
|
memcpy(ctx->g, block, (length_b+7)/8);
|
|
if(length_b & 0x07)
|
|
ctx->g[(length_b+7)/8-1] |= 0x80>>(length_b&7);
|
|
threefish1024_enc(ctx->g, &tfctx);
|
|
memxor(ctx->g, block, (length_b+7)/8);
|
|
if(length_b & 0x07){
|
|
ctx->g[((length_b+7)/8)-1] ^= 0x80>>(length_b&7);
|
|
}
|
|
}
|
|
*/
|
|
/*
|
|
* param ctx: r24:r25
|
|
* param block: r22:r23
|
|
* param ength_b: r20:r21
|
|
*/
|
|
MASK_B = 8
|
|
LEN_B = 9
|
|
TFCTX0 = 10
|
|
TFCTX1 = 11
|
|
CTX0 = 12
|
|
CTX1 = 13
|
|
BLOCK0 = 14
|
|
BLOCK1 = 15
|
|
LENGTH0 = 16
|
|
LENGTH1 = 17
|
|
.global ubi1024_lastBlock
|
|
ubi1024_lastBlock:
|
|
/* run nextBlock for preceding blocks*/
|
|
push_range 8, 17
|
|
movw CTX0, r24
|
|
movw BLOCK0, r22
|
|
movw LENGTH0, r20
|
|
1: cpi LENGTH1, 5
|
|
brlo 2f
|
|
movw r24, CTX0
|
|
movw r22, BLOCK0
|
|
rcall ubi1024_nextBlock
|
|
ldi r25, 128
|
|
add BLOCK0, r25
|
|
adc BLOCK1, r1
|
|
subi LENGTH1, 4
|
|
rjmp 1b
|
|
2: cpi LENGTH1, 4
|
|
brlo 3f
|
|
tst LENGTH0
|
|
breq 3f
|
|
movw r24, CTX0
|
|
movw r22, BLOCK0
|
|
rcall ubi1024_nextBlock
|
|
ldi r25, 128
|
|
add BLOCK0, r25
|
|
adc BLOCK1, r1
|
|
subi LENGTH1, 4
|
|
3: /* now the real fun */
|
|
stack_alloc_large 20*8
|
|
adiw r30, 1
|
|
movw TFCTX0, r30
|
|
/* calculate LEN_B */
|
|
movw r24, LENGTH0
|
|
adiw r24, 7
|
|
lsr r25
|
|
ror r24
|
|
lsr r25
|
|
ror r24
|
|
lsr r25
|
|
ror r24
|
|
mov LEN_B, r24
|
|
/* add length to tweak */
|
|
movw r30, CTX0
|
|
ld r24, Z
|
|
add r24, LEN_B
|
|
st Z+, r24
|
|
ldi r25, 11
|
|
1: ld r24, Z
|
|
adc r24, r1
|
|
st Z+, r24
|
|
dec r25
|
|
brne 1b
|
|
/* set 'final' bit*/
|
|
movw r30, CTX0
|
|
ldd r24, Z+15
|
|
ori r24, 0x80
|
|
std Z+15, r24
|
|
/* store in MASK_B if we do bit processing and set 'BitPad' bit*/
|
|
clr MASK_B
|
|
mov r24, LENGTH0
|
|
andi r24, 0x07
|
|
tst r24
|
|
breq 4f
|
|
ldd r25, Z+14
|
|
ori r25, 0x80
|
|
std Z+14, r25
|
|
ldi r25, 0x80
|
|
mov MASK_B, r25
|
|
1: lsr MASK_B
|
|
dec r24
|
|
brne 1b
|
|
4: /* call threefish1024_init*/
|
|
movw r24, CTX0
|
|
adiw r24, 16
|
|
movw r22, CTX0
|
|
movw CTX0, r24 /* CTX points at ctx->g */
|
|
movw r20, TFCTX0
|
|
rcall threefish1024_init
|
|
/* copy block to ctx->g */
|
|
movw r26, BLOCK0
|
|
movw r30, CTX0
|
|
mov r24, LEN_B
|
|
ldi r25, 128
|
|
sub r25, LEN_B
|
|
tst r24
|
|
1: breq 2f
|
|
ld r22, X+
|
|
st Z+, r22
|
|
dec r24
|
|
rjmp 1b
|
|
2: tst MASK_B
|
|
breq 29f
|
|
or r22, MASK_B
|
|
st -Z, r22
|
|
adiw r30, 1
|
|
29: tst r25
|
|
3: breq 4f
|
|
st Z+, r1
|
|
dec r25
|
|
rjmp 3b
|
|
4: /* call threefish1024_enc */
|
|
movw r24, CTX0
|
|
movw r22, TFCTX0
|
|
rcall threefish1024_enc
|
|
/* xor block into ctx->g */
|
|
movw r30, CTX0
|
|
movw r26, BLOCK0
|
|
tst LEN_B
|
|
5: breq 6f
|
|
ld r22, X+
|
|
ld r23, Z
|
|
eor r23, r22
|
|
st Z+, r23
|
|
dec LEN_B
|
|
rjmp 5b
|
|
6: tst MASK_B
|
|
breq 7f
|
|
eor r23, MASK_B
|
|
st -Z, r23
|
|
|
|
7: stack_free_large2 20*8
|
|
pop_range 8, 17
|
|
ret
|
|
|
|
|