1571 lines
25 KiB
ArmAsm
1571 lines
25 KiB
ArmAsm
|
/* pi16cipher-asm.S */
|
|||
|
/*
|
|||
|
This file is part of the AVR-Crypto-Lib.
|
|||
|
Copyright (C) 2015 Daniel Otte (bg@nerilex.org)
|
|||
|
|
|||
|
This program is free software: you can redistribute it and/or modify
|
|||
|
it under the terms of the GNU General Public License as published by
|
|||
|
the Free Software Foundation, either version 3 of the License, or
|
|||
|
(at your option) any later version.
|
|||
|
|
|||
|
This program is distributed in the hope that it will be useful,
|
|||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
GNU General Public License for more details.
|
|||
|
|
|||
|
You should have received a copy of the GNU General Public License
|
|||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
|
*/
|
|||
|
|
|||
|
#include <avr/io.h>
|
|||
|
#include "avr-asm-macros.S"
|
|||
|
|
|||
|
.struct 0
|
|||
|
ctx:
|
|||
|
ctx_state:
|
|||
|
.struct ctx_state + 4 * 4 * 2
|
|||
|
ctx_tag:
|
|||
|
.struct ctx_tag + 4 * 2 * 2
|
|||
|
ctx_ctr:
|
|||
|
.struct ctx_ctr + 8
|
|||
|
ctx_end:
|
|||
|
ctx_size:
|
|||
|
|
|||
|
.text
|
|||
|
/*
|
|||
|
void phi16(
|
|||
|
word_t dest[4],
|
|||
|
const word_t x[4],
|
|||
|
const word_t c[4],
|
|||
|
const uint8_t v[8],
|
|||
|
const uint8_t rot[4])
|
|||
|
{
|
|||
|
word_t sum = 0;
|
|||
|
uint8_t i;
|
|||
|
i = 4;
|
|||
|
do {
|
|||
|
--i;
|
|||
|
sum += x[i];
|
|||
|
} while (i);
|
|||
|
i = 4;
|
|||
|
do {
|
|||
|
--i;
|
|||
|
dest[i] = rotl(pgm_read_word(&c[i]) + sum - x[pgm_read_byte(&v[i])], pgm_read_byte(&rot[i]) );
|
|||
|
} while (i);
|
|||
|
sum = 0;
|
|||
|
i = 4;
|
|||
|
do {
|
|||
|
--i;
|
|||
|
sum ^= dest[i];
|
|||
|
} while (i);
|
|||
|
i = 4;
|
|||
|
do {
|
|||
|
--i;
|
|||
|
dest[i] ^= sum;
|
|||
|
} while (i);
|
|||
|
}
|
|||
|
|
|||
|
*/
|
|||
|
|
|||
|
.global phi16
|
|||
|
|
|||
|
/*
|
|||
|
r24:r25 - destination
|
|||
|
r22:r23 - input
|
|||
|
r20:r21 - constants
|
|||
|
r18:r19 - drop constants (v)
|
|||
|
r16:r17 - rotation constants
|
|||
|
*/
|
|||
|
phi16:
|
|||
|
movw r28, r24
|
|||
|
movw r26, r22
|
|||
|
movw r6, r16
|
|||
|
ldi r16, 3
|
|||
|
ld r24, X+
|
|||
|
ld r25, X+
|
|||
|
1:
|
|||
|
ld r0, X+
|
|||
|
add r24, r0
|
|||
|
ld r0, X+
|
|||
|
adc r25, r0
|
|||
|
dec r16
|
|||
|
brne 1b
|
|||
|
sbiw r26, 8
|
|||
|
movw r2, r26
|
|||
|
/* --- */
|
|||
|
ldi r16, 4
|
|||
|
2:
|
|||
|
movw r30, r20
|
|||
|
lpm r22, Z+
|
|||
|
lpm r23, Z+
|
|||
|
movw r20, r30
|
|||
|
add r22, r24
|
|||
|
adc r23, r25
|
|||
|
movw r30, r18
|
|||
|
lpm r0, Z+
|
|||
|
movw r18, r30
|
|||
|
movw r26, r2
|
|||
|
add r26, r0
|
|||
|
adc r27, r1
|
|||
|
ld r4, X+
|
|||
|
ld r5, X+
|
|||
|
sub r22, r4
|
|||
|
sbc r23, r5
|
|||
|
movw r30, r6
|
|||
|
lpm r0, Z+
|
|||
|
movw r6, r30
|
|||
|
5:
|
|||
|
mov r17, r23
|
|||
|
lsl r17
|
|||
|
rol r22
|
|||
|
rol r23
|
|||
|
dec r0
|
|||
|
brne 5b
|
|||
|
/* --- */
|
|||
|
st Y+, r22
|
|||
|
st Y+, r23
|
|||
|
dec r16
|
|||
|
brne 2b
|
|||
|
/* --- */
|
|||
|
sbiw r28, 8
|
|||
|
movw r26, r28
|
|||
|
ldi r16, 3
|
|||
|
ld r24, X+
|
|||
|
ld r25, X+
|
|||
|
1:
|
|||
|
ld r0, X+
|
|||
|
eor r24, r0
|
|||
|
ld r0, X+
|
|||
|
eor r25, r0
|
|||
|
dec r16
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
ldi r16, 4
|
|||
|
1:
|
|||
|
ld r0, Y
|
|||
|
eor r0, r24
|
|||
|
st Y+, r0
|
|||
|
ld r0, Y
|
|||
|
eor r0, r25
|
|||
|
st Y+, r0
|
|||
|
dec r16
|
|||
|
brne 1b
|
|||
|
sbiw r28, 8
|
|||
|
ret
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
static void ny(
|
|||
|
word_t dest[4],
|
|||
|
const word_t x[4])
|
|||
|
{
|
|||
|
phi16(dest, x, ny_const, ny_v_const, ny_rot_const);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
ny16_const:
|
|||
|
.word 0xD1CC, 0xCAC9, 0xC6C5, 0xC3B8
|
|||
|
|
|||
|
ny16_v_const:
|
|||
|
.byte 1 * 2, 0 * 2, 3 * 2, 2 * 2
|
|||
|
|
|||
|
ny16_rot_const:
|
|||
|
.byte 2, 5, 7, 13
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
static void mu(
|
|||
|
word_t dest[4],
|
|||
|
const word_t x[4])
|
|||
|
{
|
|||
|
phi16(dest, x, mu_const, mu_v_const, mu_rot_const);
|
|||
|
}
|
|||
|
*/
|
|||
|
mu16_const:
|
|||
|
.word 0xF0E8, 0xE4E2, 0xE1D8, 0xD4D2
|
|||
|
|
|||
|
mu16_v_const:
|
|||
|
.byte 3 * 2, 2 * 2, 1 * 2, 0 * 2
|
|||
|
|
|||
|
mu16_rot_const:
|
|||
|
.byte 1, 4, 9, 11
|
|||
|
|
|||
|
|
|||
|
.global ast16
|
|||
|
|
|||
|
ast16:
|
|||
|
push_range 2, 7
|
|||
|
push r16
|
|||
|
push r17
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
stack_alloc 8 ; pointer to stack space is stored in Z
|
|||
|
adiw r30, 1
|
|||
|
push r20
|
|||
|
push r21
|
|||
|
push r30
|
|||
|
push r31
|
|||
|
mu16:
|
|||
|
ldi r16, lo8(mu16_rot_const)
|
|||
|
ldi r17, hi8(mu16_rot_const)
|
|||
|
ldi r18, lo8(mu16_v_const)
|
|||
|
ldi r19, hi8(mu16_v_const)
|
|||
|
ldi r20, lo8(mu16_const)
|
|||
|
ldi r21, hi8(mu16_const)
|
|||
|
rcall phi16
|
|||
|
pop r25
|
|||
|
pop r24
|
|||
|
pop r23
|
|||
|
pop r22
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
ny16:
|
|||
|
ldi r16, lo8(ny16_rot_const)
|
|||
|
ldi r17, hi8(ny16_rot_const)
|
|||
|
ldi r18, lo8(ny16_v_const)
|
|||
|
ldi r19, hi8(ny16_v_const)
|
|||
|
ldi r20, lo8(ny16_const)
|
|||
|
ldi r21, hi8(ny16_const)
|
|||
|
rcall phi16
|
|||
|
pop r31
|
|||
|
pop r30
|
|||
|
ldd r16, Z + 0
|
|||
|
ldd r17, Z + 1
|
|||
|
|
|||
|
ldd r18, Z + 3 * 2 + 0
|
|||
|
ldd r19, Z + 3 * 2 + 1
|
|||
|
ldd r20, Y + 1 * 2 + 0
|
|||
|
ldd r21, Y + 1 * 2 + 1
|
|||
|
add r18, r20
|
|||
|
adc r19, r21
|
|||
|
std Z + 0 * 2 + 0, r18
|
|||
|
std Z + 0 * 2 + 1, r19
|
|||
|
|
|||
|
ldd r18, Z + 2 * 2 + 0
|
|||
|
ldd r19, Z + 2 * 2 + 1
|
|||
|
ldd r20, Y + 0 * 2 + 0
|
|||
|
ldd r21, Y + 0 * 2 + 1
|
|||
|
add r18, r20
|
|||
|
adc r19, r21
|
|||
|
std Z + 3 * 2 + 0, r18
|
|||
|
std Z + 3 * 2 + 1, r19
|
|||
|
|
|||
|
ldd r18, Z + 1 * 2 + 0
|
|||
|
ldd r19, Z + 1 * 2 + 1
|
|||
|
ldd r20, Y + 3 * 2 + 0
|
|||
|
ldd r21, Y + 3 * 2 + 1
|
|||
|
add r18, r20
|
|||
|
adc r19, r21
|
|||
|
std Z + 2 * 2 + 0, r18
|
|||
|
std Z + 2 * 2 + 1, r19
|
|||
|
|
|||
|
movw r18, r16
|
|||
|
ldd r20, Y + 2 * 2 + 0
|
|||
|
ldd r21, Y + 2 * 2 + 1
|
|||
|
add r18, r20
|
|||
|
adc r19, r21
|
|||
|
std Z + 1 * 2 + 0, r18
|
|||
|
std Z + 1 * 2 + 1, r19
|
|||
|
|
|||
|
stack_free 8, reg1 = r26, reg2 = r27
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
pop r17
|
|||
|
pop r16
|
|||
|
pop_range 2, 7
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void e1_16(
|
|||
|
word_t *dest,
|
|||
|
const word_t c[4],
|
|||
|
const word_t *i )
|
|||
|
{
|
|||
|
uint8_t n = PI_N - 1;
|
|||
|
{
|
|||
|
word_t t[4];
|
|||
|
memcpy_P(t, c, sizeof(word_t) * 4);
|
|||
|
ast16(dest, t, i);
|
|||
|
}
|
|||
|
do {
|
|||
|
i = &i[4];
|
|||
|
ast16(&dest[4], dest, i);
|
|||
|
dest = &dest[4];
|
|||
|
} while (--n);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global e1_16
|
|||
|
|
|||
|
e1_16:
|
|||
|
push_range 8, 10
|
|||
|
movw r8, r20
|
|||
|
movw r30, r22
|
|||
|
stack_alloc 8, reg1=r26, reg2=r27
|
|||
|
adiw r26, 1
|
|||
|
movw r22, r26
|
|||
|
ldi r18, 8
|
|||
|
1:
|
|||
|
lpm r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r18
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
ldi r18, 3
|
|||
|
mov r10, r18
|
|||
|
rcall ast16
|
|||
|
1:
|
|||
|
movw r22, r30
|
|||
|
adiw r30, 8
|
|||
|
movw r24, r30
|
|||
|
movw r26, r8
|
|||
|
adiw r26, 8
|
|||
|
movw r20, r26
|
|||
|
movw r8, r26
|
|||
|
rcall ast16
|
|||
|
dec r10
|
|||
|
brne 1b
|
|||
|
sbiw r30, 3 * 4 * 2
|
|||
|
/* --- */
|
|||
|
stack_free 8, reg1=r26, reg2=r27
|
|||
|
pop_range 8, 10
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void e2_16(
|
|||
|
word_t *dest,
|
|||
|
const word_t c[4],
|
|||
|
const word_t *i )
|
|||
|
{
|
|||
|
uint8_t n = PI_N - 1;
|
|||
|
{
|
|||
|
word_t t[4];
|
|||
|
memcpy_P(t, c, sizeof(word_t) * 4);
|
|||
|
ast16(&dest[4 * n], &i[4 * n], t);
|
|||
|
}
|
|||
|
while (n--) {
|
|||
|
ast16(&dest[4 * n], &i[4 * n], &dest[4 * (n + 1)]);
|
|||
|
}
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global e2_16
|
|||
|
|
|||
|
e2_16:
|
|||
|
push_range 8, 10
|
|||
|
movw r30, r22
|
|||
|
movw r26, r20
|
|||
|
adiw r26, 24
|
|||
|
movw r8, r26
|
|||
|
movw r22, r26
|
|||
|
stack_alloc 8, reg1 = r26, reg2 = r27
|
|||
|
adiw r26, 1
|
|||
|
movw r20, r26
|
|||
|
ldi r18, 8
|
|||
|
1:
|
|||
|
lpm r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r18
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
ldi r18, 3
|
|||
|
mov r10, r18
|
|||
|
adiw r24, 24
|
|||
|
rcall ast16
|
|||
|
1:
|
|||
|
movw r20, r30
|
|||
|
sbiw r30, 8
|
|||
|
movw r24, r30
|
|||
|
movw r26, r8
|
|||
|
sbiw r26, 8
|
|||
|
movw r22, r26
|
|||
|
movw r8, r26
|
|||
|
rcall ast16
|
|||
|
dec r10
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
stack_free 8, reg1 = r26, reg2 = r27
|
|||
|
pop_range 8, 10
|
|||
|
ret
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void pi(
|
|||
|
word_t *a )
|
|||
|
{
|
|||
|
uint8_t r = PI_ROUNDS;
|
|||
|
word_t t[4 * 4];
|
|||
|
const word_t *c = (const word_t *)pi_const;
|
|||
|
do {
|
|||
|
e1_16(t, c, a);
|
|||
|
c = &c[4];
|
|||
|
e2_16(a, c, t);
|
|||
|
c = &c[4];
|
|||
|
} while (--r);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
PI_CONST:
|
|||
|
.word 0xB4B2, 0xB1AC, 0xAAA9, 0xA6A5
|
|||
|
.word 0xA39C, 0x9A99, 0x9695, 0x938E
|
|||
|
.word 0x8D8B, 0x8778, 0x7472, 0x716C
|
|||
|
.word 0x6A69, 0x6665, 0x635C, 0x5A59
|
|||
|
.word 0x5655, 0x534E, 0x4D4B, 0x473C
|
|||
|
.word 0x3A39, 0x3635, 0x332E, 0x2D2B
|
|||
|
.word 0x271E, 0x1D1B, 0x170F, 0xF0E8
|
|||
|
.word 0xE4E2, 0xE1D8, 0xD4D2, 0xD1CC
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void ctr_trans(
|
|||
|
const PI_CTX *ctx,
|
|||
|
state_t a,
|
|||
|
unsigned long ctr )
|
|||
|
{
|
|||
|
uint64_t t;
|
|||
|
int i;
|
|||
|
if ((void *)ctx->cis != (void *)a) {
|
|||
|
memcpy(a, ctx->cis, sizeof(state_t));
|
|||
|
}
|
|||
|
t = ctx->ctr + ctr;
|
|||
|
for (i = 0; i * PI_WORD_SIZE < 64; ++i) {
|
|||
|
a[0][i] ^= (word_t)t;
|
|||
|
t >>= PI_WORD_SIZE;
|
|||
|
}
|
|||
|
pi((word_t*)a);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global ctr_trans
|
|||
|
|
|||
|
ctr_trans:
|
|||
|
push_range 16, 17
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
movw r30, r24
|
|||
|
movw r26, r22
|
|||
|
cp r24, r22
|
|||
|
cpc r25, r23
|
|||
|
breq 2f
|
|||
|
ldi r22, 32
|
|||
|
1:
|
|||
|
ld r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r22
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
sbiw r30, 32
|
|||
|
sbiw r26, 32
|
|||
|
2:
|
|||
|
movw r16, r18
|
|||
|
movw r18, r20
|
|||
|
clr r20
|
|||
|
clr r21
|
|||
|
movw r22, r20
|
|||
|
adiw r30, ctx_ctr ; Z points at lsb of ctr
|
|||
|
ldi r28, 16 ; Y points at r16
|
|||
|
clr r29
|
|||
|
clc
|
|||
|
ldi r25, 8
|
|||
|
3:
|
|||
|
ld r0, Y+
|
|||
|
ld r24, Z+
|
|||
|
adc r24, r0
|
|||
|
ld r0, X
|
|||
|
eor r0, r24
|
|||
|
st X+, r0
|
|||
|
dec r25
|
|||
|
brne 3b
|
|||
|
/* --- */
|
|||
|
sbiw r26, 8
|
|||
|
movw r24, r26
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
pop_range 16, 17
|
|||
|
; rjmp pi
|
|||
|
|
|||
|
/* at the end of pi dest is in Z */
|
|||
|
|
|||
|
.global pi
|
|||
|
pi:
|
|||
|
push r6
|
|||
|
push r7
|
|||
|
push r16
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
stack_alloc 32, reg1 = r28, reg2 = r29
|
|||
|
adiw r28, 1
|
|||
|
movw r6, r28
|
|||
|
ldi r28, lo8(PI_CONST - 8)
|
|||
|
ldi r29, hi8(PI_CONST - 8)
|
|||
|
ldi r16, 3
|
|||
|
movw r30, r24
|
|||
|
1:
|
|||
|
movw r24, r6
|
|||
|
movw r6, r30
|
|||
|
movw r20, r30
|
|||
|
adiw r28, 8
|
|||
|
movw r22, r28
|
|||
|
rcall e1_16
|
|||
|
|
|||
|
movw r24, r6
|
|||
|
movw r6, r30
|
|||
|
movw r20, r30
|
|||
|
adiw r28, 8
|
|||
|
movw r22, r28
|
|||
|
rcall e2_16
|
|||
|
dec r16
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
stack_free 32, reg1 = r26, reg2 = r27
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
pop r16
|
|||
|
pop r7
|
|||
|
pop r6
|
|||
|
ret
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void add_tag(
|
|||
|
PI_CTX *ctx,
|
|||
|
state_t a )
|
|||
|
{
|
|||
|
uint8_t i;
|
|||
|
i = 3;
|
|||
|
do {
|
|||
|
ctx->tag[i + 0] += a[0][i];
|
|||
|
ctx->tag[i + 4] += a[2][i];
|
|||
|
} while(i--);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global add_tag
|
|||
|
add_tag:
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
movw r30, r24
|
|||
|
adiw r30, ctx_tag
|
|||
|
movw r28, r22
|
|||
|
ldi r19, 2
|
|||
|
1:
|
|||
|
ldi r18, 4
|
|||
|
2:
|
|||
|
ld r24, Y+
|
|||
|
ld r25, Y+
|
|||
|
ldd r22, Z + 0
|
|||
|
ldd r23, Z + 1
|
|||
|
add r24, r22
|
|||
|
adc r25, r23
|
|||
|
st Z+, r24
|
|||
|
st Z+, r25
|
|||
|
dec r18
|
|||
|
brne 2b
|
|||
|
adiw r28, 8
|
|||
|
dec r19
|
|||
|
brne 1b
|
|||
|
/* --- */
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
ret
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void inject_tag(
|
|||
|
state_t a,
|
|||
|
const word_t x[8] )
|
|||
|
{
|
|||
|
int i;
|
|||
|
for (i = 0; i < 4; ++i) {
|
|||
|
a[0][i] ^= x[i];
|
|||
|
}
|
|||
|
for (; i < 8; ++i) {
|
|||
|
a[2][i - 4] ^= x[i];
|
|||
|
}
|
|||
|
}
|
|||
|
*/
|
|||
|
.global inject_block
|
|||
|
.global inject_tag
|
|||
|
|
|||
|
|
|||
|
inject_block:
|
|||
|
inject_tag:
|
|||
|
movw r30, r24
|
|||
|
movw r26, r22
|
|||
|
ldi r23, 2
|
|||
|
1:
|
|||
|
ldi r22, 8
|
|||
|
2:
|
|||
|
ld r24, Z
|
|||
|
ld r25, X+
|
|||
|
eor r24, r25
|
|||
|
st Z+, r24
|
|||
|
dec r22
|
|||
|
brne 2b
|
|||
|
adiw r30, 8
|
|||
|
dec r23
|
|||
|
brne 1b
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void extract_block(
|
|||
|
void *block,
|
|||
|
state_t a)
|
|||
|
{
|
|||
|
int i;
|
|||
|
for (i = 0; i < 4; ++i) {
|
|||
|
store_word_little(&((word_t *)block)[i], a[0][i]);
|
|||
|
}
|
|||
|
for (; i < 8; ++i) {
|
|||
|
store_word_little(&((word_t *)block)[i], a[2][i - 4]);
|
|||
|
}
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global extract_block
|
|||
|
|
|||
|
extract_block:
|
|||
|
movw r26, r24
|
|||
|
movw r30, r22
|
|||
|
ldi r23, 2
|
|||
|
1:
|
|||
|
ldi r22, 8
|
|||
|
2:
|
|||
|
ld r24, Z+
|
|||
|
st X+, r24
|
|||
|
dec r22
|
|||
|
brne 2b
|
|||
|
adiw r30, 8
|
|||
|
dec r23
|
|||
|
brne 1b
|
|||
|
ret
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void replace_block(
|
|||
|
state_t a,
|
|||
|
const void *block )
|
|||
|
{
|
|||
|
word_t x;
|
|||
|
int i;
|
|||
|
for (i = 0; i < 4; ++i) {
|
|||
|
x = load_word_little(&((const word_t *)block)[i]);
|
|||
|
a[0][i] = x;
|
|||
|
}
|
|||
|
for (; i < 8; ++i) {
|
|||
|
x = load_word_little(&((const word_t *)block)[i]);
|
|||
|
a[2][i - 4] = x;
|
|||
|
}
|
|||
|
}
|
|||
|
*/
|
|||
|
/*
|
|||
|
.global replace_block
|
|||
|
|
|||
|
replace_block:
|
|||
|
movw r30, r24
|
|||
|
movw r26, r22
|
|||
|
ldi r23, 2
|
|||
|
1:
|
|||
|
ldi r22, 8
|
|||
|
2:
|
|||
|
ld r24, X+
|
|||
|
st Z+, r24
|
|||
|
dec r22
|
|||
|
brne 2b
|
|||
|
adiw r30, 8
|
|||
|
dec r23
|
|||
|
brne 1b
|
|||
|
ret
|
|||
|
*/
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void inject_last_block(
|
|||
|
state_t a,
|
|||
|
const void *block,
|
|||
|
size_t length_Bb )
|
|||
|
{
|
|||
|
uint8_t t[PI_RATE_BYTES];
|
|||
|
if (length_b >= PI_RATE_BITS) {
|
|||
|
/ * error * /
|
|||
|
printf("ERROR <%s %s %d>\n", __FILE__, __func__, __LINE__);
|
|||
|
return;
|
|||
|
}
|
|||
|
memset(t, 0, sizeof(t));
|
|||
|
memcpy(t, block, (length_b + 7) / 8);
|
|||
|
t[length_b / 8] |= 1 << (length_b & 7);
|
|||
|
inject_block(a, t);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global inject_last_block
|
|||
|
|
|||
|
inject_last_block:
|
|||
|
movw r30, r24
|
|||
|
movw r26, r22
|
|||
|
ldi r23, 2
|
|||
|
1:
|
|||
|
ldi r22, 8
|
|||
|
2:
|
|||
|
tst r20
|
|||
|
brne 3f
|
|||
|
ld r24, Z
|
|||
|
ldi r25, 1
|
|||
|
eor r24, r25
|
|||
|
st Z, r24
|
|||
|
return:
|
|||
|
ret
|
|||
|
3:
|
|||
|
dec r20
|
|||
|
ld r25, X+
|
|||
|
ld r24, Z
|
|||
|
eor r24, r25
|
|||
|
st Z+, r24
|
|||
|
dec r22
|
|||
|
brne 2b
|
|||
|
adiw r30, 8
|
|||
|
dec r23
|
|||
|
brne 1b
|
|||
|
; ret ; this should never been reached
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void replace_last_block(
|
|||
|
state_t a,
|
|||
|
const void *block,
|
|||
|
size_t length_B )
|
|||
|
{
|
|||
|
uint8_t t[PI_RATE_BYTES];
|
|||
|
if (length_B >= PI_RATE_BYTES) {
|
|||
|
/ * error * /
|
|||
|
printf("ERROR <%s %s %d>\n", __FILE__, __func__, __LINE__);
|
|||
|
return;
|
|||
|
}
|
|||
|
extract_block(t, a);
|
|||
|
memcpy(t, block, length_B);
|
|||
|
replace_block(a, t);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global replace_last_block
|
|||
|
.global replace_block
|
|||
|
|
|||
|
replace_block:
|
|||
|
ldi r20, 32
|
|||
|
replace_last_block:
|
|||
|
movw r30, r24
|
|||
|
movw r26, r22
|
|||
|
ldi r23, 2
|
|||
|
1:
|
|||
|
ldi r22, 8
|
|||
|
2:
|
|||
|
tst r20
|
|||
|
breq return
|
|||
|
dec r20
|
|||
|
ld r24, X+
|
|||
|
st Z+, r24
|
|||
|
dec r22
|
|||
|
brne 2b
|
|||
|
adiw r30, 8
|
|||
|
dec r23
|
|||
|
brne 1b
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
int PI_INIT(
|
|||
|
PI_CTX *ctx,
|
|||
|
const void *key,
|
|||
|
size_t key_length_B,
|
|||
|
const void *pmn,
|
|||
|
size_t pmn_length_B)
|
|||
|
{
|
|||
|
int i;
|
|||
|
uint8_t setup_buf[PI_IS_BYTES];
|
|||
|
if (key_length_B + pmn_length_B + 1 > PI_IS_BYTES) {
|
|||
|
return -1;
|
|||
|
}
|
|||
|
memset(ctx->tag, 0, sizeof(ctx->tag));
|
|||
|
memset(setup_buf, 0, sizeof(setup_buf));
|
|||
|
memcpy(setup_buf, key, key_length_B);
|
|||
|
memcpy(&setup_buf[key_length_B], pmn, pmn_length_B);
|
|||
|
setup_buf[key_length_B + pmn_length_B] = 1;
|
|||
|
for (i = 0; i < 16; ++i) {
|
|||
|
ctx->cis[i / 4][i % 4] = load_word_little(&setup_buf[i * PI_WORD_SIZE / 8]);
|
|||
|
}
|
|||
|
pi((word_t*)ctx->cis);
|
|||
|
ctx->ctr = 0;
|
|||
|
for (i = 0; i * PI_WORD_SIZE < 64; ++i) {
|
|||
|
ctx->ctr |= (uint64_t)ctx->cis[1][i] << (i * PI_WORD_SIZE);
|
|||
|
}
|
|||
|
return 0;
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global pi16_init
|
|||
|
|
|||
|
pi16_init:
|
|||
|
movw r26, 20
|
|||
|
add r26, r16
|
|||
|
adc r27, r17
|
|||
|
mov r21, r26 ; r21 = key_len + nonce_len
|
|||
|
sbiw r26, 32
|
|||
|
brmi 1f
|
|||
|
return_error:
|
|||
|
ser r24
|
|||
|
ser r25
|
|||
|
ret
|
|||
|
1:
|
|||
|
push r16
|
|||
|
ldi r17, 32 + 16 - 1 ; state_size + tag_size - 1
|
|||
|
sub r17, r21 ; r17 = rest of state to clear
|
|||
|
movw r30, r24 ; Z points at ctx->cis
|
|||
|
movw r26, r22 ; X points at key
|
|||
|
3:
|
|||
|
tst r20
|
|||
|
brne 5f
|
|||
|
movw r26, r18 ; set X to pi´oint at nonce
|
|||
|
5:
|
|||
|
dec r20
|
|||
|
ld r0, X+
|
|||
|
st Z+, r0
|
|||
|
dec r21
|
|||
|
brne 3b
|
|||
|
/* --- */
|
|||
|
ldi r21, 1
|
|||
|
st Z+, r21 ; store padding '1'
|
|||
|
6:
|
|||
|
st Z+, r1
|
|||
|
dec r17
|
|||
|
brne 6b
|
|||
|
/* --- */
|
|||
|
movw r24, r30
|
|||
|
sbiw r24, 32 + 16
|
|||
|
rcall pi
|
|||
|
movw r26, r30
|
|||
|
adiw r26, 32 + 16 ; X points at ctx->ctr
|
|||
|
adiw r30, 8 ; Z points at ctx->cis[1][0]
|
|||
|
ldi r24, 8
|
|||
|
1:
|
|||
|
ld r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r24
|
|||
|
brne 1b
|
|||
|
pop r16
|
|||
|
clr r25
|
|||
|
ret
|
|||
|
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_PROCESS_AD_BLOCK(
|
|||
|
PI_CTX *ctx,
|
|||
|
const void *ad,
|
|||
|
unsigned long ad_num )
|
|||
|
{
|
|||
|
state_t a;
|
|||
|
ctr_trans(ctx, a, ad_num);
|
|||
|
inject_block(a, ad);
|
|||
|
pi((word_t*)a);
|
|||
|
add_tag(ctx, a);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global pi16_process_ad_block
|
|||
|
|
|||
|
pi16_process_ad_block:
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
stack_alloc 32, reg1 = r28, reg2 = r29
|
|||
|
adiw r28, 1
|
|||
|
push r24
|
|||
|
push r25
|
|||
|
push r22
|
|||
|
push r23
|
|||
|
movw r22, r28
|
|||
|
rcall ctr_trans
|
|||
|
movw r24, r28
|
|||
|
pop r23
|
|||
|
pop r22
|
|||
|
rcall inject_block
|
|||
|
movw r24, r28
|
|||
|
rcall pi
|
|||
|
movw r22, r28
|
|||
|
pop r25
|
|||
|
pop r24
|
|||
|
rcall add_tag
|
|||
|
stack_free 32, reg1 = r28, reg2 = r29
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_PROCESS_AD_LAST_BLOCK(
|
|||
|
PI_CTX *ctx,
|
|||
|
const void *ad,
|
|||
|
size_t ad_length_B,
|
|||
|
unsigned long ad_num )
|
|||
|
{
|
|||
|
state_t a;
|
|||
|
while (ad_length_B >= PI_AD_BLOCK_LENGTH_BYTES) {
|
|||
|
PI_PROCESS_AD_BLOCK(ctx, ad, ad_num);
|
|||
|
ad_num++;
|
|||
|
ad_length_B -= PI_AD_BLOCK_LENGTH_BYTES;
|
|||
|
ad = &((uint8_t*)ad)[PI_AD_BLOCK_LENGTH_BYTES];
|
|||
|
}
|
|||
|
|
|||
|
ctr_trans(ctx, a, ad_num);
|
|||
|
inject_last_block(a, ad, ad_length_B);
|
|||
|
pi((word_t*)a);
|
|||
|
add_tag(ctx, a);
|
|||
|
ctx->ctr += ad_num;
|
|||
|
inject_tag(ctx->cis, ctx->tag);
|
|||
|
pi((word_t*)ctx->cis);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global pi16_process_ad_last_block
|
|||
|
|
|||
|
pi16_process_ad_last_block:
|
|||
|
push_range 10, 17
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
movw r10, r24 ; ctx
|
|||
|
movw r12, r22 ; ad
|
|||
|
movw r14, r16 ; lo16(ad_num)
|
|||
|
movw r16, r18 ; hi16(ad_num)
|
|||
|
movw r28, r20 ; r28:r29 contains ad_length_B
|
|||
|
1:
|
|||
|
sbiw r28, 16
|
|||
|
brmi 6f
|
|||
|
movw r18, r14
|
|||
|
movw r20, r16
|
|||
|
movw r22, r12
|
|||
|
movw r24, r10
|
|||
|
rcall pi16_process_ad_block
|
|||
|
; increment num_counter
|
|||
|
sec
|
|||
|
adc r14, r1
|
|||
|
adc r15, r1
|
|||
|
adc r16, r1
|
|||
|
adc r17, r1
|
|||
|
ldi r24, 16
|
|||
|
add r12, r24
|
|||
|
adc r13, r1
|
|||
|
rjmp 1b
|
|||
|
/* --- */
|
|||
|
6:
|
|||
|
adiw r28, 16
|
|||
|
stack_alloc 32, reg1 = r30, reg2 = r31
|
|||
|
adiw r30, 1
|
|||
|
push r28
|
|||
|
movw r28, r30 ; Y points at a (on stack)
|
|||
|
movw r18, r14
|
|||
|
movw r20, r16
|
|||
|
movw r22, r28
|
|||
|
movw r24, r10
|
|||
|
rcall ctr_trans
|
|||
|
movw r24, r28
|
|||
|
movw r22, r12
|
|||
|
clr r21
|
|||
|
pop r20
|
|||
|
rcall inject_last_block
|
|||
|
movw r24, r28
|
|||
|
rcall pi
|
|||
|
movw r24, r10
|
|||
|
movw r22, r28
|
|||
|
rcall add_tag
|
|||
|
stack_free 32, reg1 = r30, reg2 = r31
|
|||
|
movw r30, r10
|
|||
|
adiw r30, ctx_ctr
|
|||
|
clr r0
|
|||
|
movw r18, r0 ; clear top 4 bytes to have 64-bit ad_num in register-file
|
|||
|
movw r20, r0
|
|||
|
ldi r28, 14 ; Y points to r14 (ad_num)
|
|||
|
clr r29
|
|||
|
ldi r25, 8
|
|||
|
1:
|
|||
|
ld r24, Y+
|
|||
|
ld r0, Z
|
|||
|
adc r0, r24
|
|||
|
st Z+, r0
|
|||
|
dec r25
|
|||
|
brne 1b
|
|||
|
|
|||
|
sbiw r30, 8 + 16
|
|||
|
movw r22, r30
|
|||
|
movw r24, r10
|
|||
|
rcall inject_tag
|
|||
|
movw r24, r10
|
|||
|
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
pop_range 10, 17
|
|||
|
rjmp pi
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_PROCESS_SMN(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *c0,
|
|||
|
const void *smn)
|
|||
|
{
|
|||
|
ctx->ctr++;
|
|||
|
ctr_trans(ctx, ctx->cis, 0);
|
|||
|
inject_block(ctx->cis, smn);
|
|||
|
if (c0) {
|
|||
|
extract_block(c0, ctx->cis);
|
|||
|
}
|
|||
|
pi((word_t*)ctx->cis);
|
|||
|
add_tag(ctx, ctx->cis);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global pi16_encrypt_smn
|
|||
|
|
|||
|
pi16_encrypt_smn:
|
|||
|
clt
|
|||
|
pi16_process_smn:
|
|||
|
push_range 12, 17
|
|||
|
movw r12, r24 ; ctx
|
|||
|
movw r14, r22 ; c0
|
|||
|
movw r16, r20 ; smn
|
|||
|
movw r26, r24
|
|||
|
adiw r26, ctx_ctr
|
|||
|
ldi r18, 8
|
|||
|
sec
|
|||
|
1:
|
|||
|
ld r0, X
|
|||
|
adc r0, r1
|
|||
|
st X+, r0
|
|||
|
dec r18
|
|||
|
brne 1b
|
|||
|
|
|||
|
movw r22, r24
|
|||
|
clr r0
|
|||
|
movw r20, r0
|
|||
|
movw r18, r0
|
|||
|
rcall ctr_trans
|
|||
|
|
|||
|
movw r24, r12
|
|||
|
movw r22, r16
|
|||
|
rcall inject_block
|
|||
|
|
|||
|
cp r14, r1
|
|||
|
cpc r15, r1
|
|||
|
breq 4f
|
|||
|
|
|||
|
movw r24, r14
|
|||
|
movw r22, r12
|
|||
|
rcall extract_block
|
|||
|
4:
|
|||
|
brtc 5f
|
|||
|
movw r24, r12
|
|||
|
movw r22, r16
|
|||
|
rcall replace_block
|
|||
|
5:
|
|||
|
movw r24, r12
|
|||
|
rcall pi
|
|||
|
|
|||
|
movw r22, r12
|
|||
|
movw r24, r12
|
|||
|
pop_range 12, 17
|
|||
|
rjmp add_tag
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_DECRYPT_SMN(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *smn,
|
|||
|
const void *c0)
|
|||
|
{
|
|||
|
ctx->ctr++;
|
|||
|
ctr_trans(ctx, ctx->cis, 0);
|
|||
|
inject_block(ctx->cis, c0);
|
|||
|
if (smn) {
|
|||
|
extract_block(smn, ctx->cis);
|
|||
|
}
|
|||
|
replace_block(ctx->cis, c0);
|
|||
|
pi((word_t*)ctx->cis);
|
|||
|
add_tag(ctx, ctx->cis);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global pi16_decrypt_smn
|
|||
|
|
|||
|
pi16_decrypt_smn:
|
|||
|
set
|
|||
|
rjmp pi16_process_smn
|
|||
|
/*
|
|||
|
push_range 12, 17
|
|||
|
movw r12, r24 ; ctx
|
|||
|
movw r14, r22 ; smn
|
|||
|
movw r16, r20 ; c0
|
|||
|
movw r26, r24
|
|||
|
adiw r26, ctx_ctr
|
|||
|
ldi r18, 8
|
|||
|
sec
|
|||
|
1:
|
|||
|
ld r0, X
|
|||
|
adc r0, r1
|
|||
|
st X+, r0
|
|||
|
dec r18
|
|||
|
brne 1b
|
|||
|
|
|||
|
movw r22, r24
|
|||
|
clr r0
|
|||
|
movw r20, r0
|
|||
|
movw r18, r0
|
|||
|
rcall ctr_trans
|
|||
|
|
|||
|
movw r24, r12
|
|||
|
movw r22, r16
|
|||
|
rcall inject_block
|
|||
|
|
|||
|
cp r14, r1
|
|||
|
cpc r15, r1
|
|||
|
breq 4f
|
|||
|
|
|||
|
movw r24, r14
|
|||
|
movw r22, r12
|
|||
|
rcall extract_block
|
|||
|
|
|||
|
4:
|
|||
|
movw r24, r12
|
|||
|
movw r22, r16
|
|||
|
rcall replace_block
|
|||
|
|
|||
|
movw r24, r12
|
|||
|
rcall pi
|
|||
|
|
|||
|
movw r22, r12
|
|||
|
movw r24, r12
|
|||
|
pop_range 12, 17
|
|||
|
rjmp add_tag
|
|||
|
*/
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_EXTRACT_TAG(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *dest )
|
|||
|
{
|
|||
|
uint8_t buf[8 * PI_WORD_SIZE / 8];
|
|||
|
int i;
|
|||
|
for (i = 0; i < 8; ++i) {
|
|||
|
store_word_little(&buf[i * PI_WORD_SIZE / 8], ctx->tag[i]);
|
|||
|
}
|
|||
|
memcpy(dest, buf, PI_TAG_BYTES);
|
|||
|
}
|
|||
|
*/
|
|||
|
|
|||
|
.global pi16_extract_tag
|
|||
|
|
|||
|
pi16_extract_tag:
|
|||
|
movw r30, r24
|
|||
|
movw r26, r22
|
|||
|
adiw r30, ctx_tag
|
|||
|
ldi r24, 16
|
|||
|
1:
|
|||
|
ld r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r24
|
|||
|
brne 1b
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_ENCRYPT_BLOCK(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *dest,
|
|||
|
const void *src,
|
|||
|
unsigned long num )
|
|||
|
{
|
|||
|
state_t a;
|
|||
|
ctr_trans(ctx, a, num);
|
|||
|
inject_block(a, src);
|
|||
|
if (dest) {
|
|||
|
extract_block(dest, a);
|
|||
|
}
|
|||
|
pi((word_t*)a);
|
|||
|
add_tag(ctx, a);
|
|||
|
}
|
|||
|
*/
|
|||
|
.global pi16_encrypt_block
|
|||
|
|
|||
|
pi16_encrypt_block:
|
|||
|
clt
|
|||
|
pi16_process_block:
|
|||
|
push_range 8, 17
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
stack_alloc 32, reg1 = r28, reg2 = r29
|
|||
|
adiw r28, 1
|
|||
|
movw r8, r24 ; ctx
|
|||
|
movw r10, r22 ; dest
|
|||
|
movw r12, r20 ; src
|
|||
|
|
|||
|
movw r22, r28
|
|||
|
movw r20, r18
|
|||
|
movw r18, r16
|
|||
|
rcall ctr_trans
|
|||
|
|
|||
|
movw r24, r30
|
|||
|
movw r22, r12
|
|||
|
rcall inject_block
|
|||
|
|
|||
|
cp r10, r1
|
|||
|
cpc r11, r1
|
|||
|
breq 4f
|
|||
|
|
|||
|
movw r24, r10
|
|||
|
movw r22, r28
|
|||
|
rcall extract_block
|
|||
|
|
|||
|
4:
|
|||
|
brtc 5f
|
|||
|
movw r24, r28
|
|||
|
movw r22, r12
|
|||
|
rcall replace_block
|
|||
|
5:
|
|||
|
movw r24, r28
|
|||
|
rcall pi
|
|||
|
|
|||
|
movw r22, r28
|
|||
|
movw r24, r8
|
|||
|
rcall add_tag
|
|||
|
|
|||
|
stack_free 32, reg1 = r30, reg2 = r31
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
pop_range 8, 17
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_DECRYPT_BLOCK(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *dest,
|
|||
|
const void *src,
|
|||
|
unsigned long num )
|
|||
|
{
|
|||
|
state_t a;
|
|||
|
ctr_trans(ctx, a, num);
|
|||
|
inject_block(a, src);
|
|||
|
if (dest) {
|
|||
|
extract_block(dest, a);
|
|||
|
}
|
|||
|
replace_block(a, src);
|
|||
|
pi((word_t*)a);
|
|||
|
add_tag(ctx, a);
|
|||
|
}
|
|||
|
*/
|
|||
|
.global pi16_decrypt_block
|
|||
|
|
|||
|
pi16_decrypt_block:
|
|||
|
set
|
|||
|
rjmp pi16_process_block
|
|||
|
/*
|
|||
|
push_range 8, 17
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
stack_alloc 32, reg1 = r28, reg2 = r29
|
|||
|
adiw r28, 1
|
|||
|
movw r8, r24 ; ctx
|
|||
|
movw r10, r22 ; dest
|
|||
|
movw r12, r20 ; src
|
|||
|
|
|||
|
movw r22, r28
|
|||
|
movw r20, r18
|
|||
|
movw r18, r16
|
|||
|
rcall ctr_trans
|
|||
|
|
|||
|
movw r24, r30
|
|||
|
movw r22, r12
|
|||
|
rcall inject_block
|
|||
|
|
|||
|
cp r10, r1
|
|||
|
cpc r11, r1
|
|||
|
breq 4f
|
|||
|
|
|||
|
movw r24, r10
|
|||
|
movw r22, r28
|
|||
|
rcall extract_block
|
|||
|
|
|||
|
4:
|
|||
|
movw r24, r28
|
|||
|
movw r22, r12
|
|||
|
rcall replace_block
|
|||
|
|
|||
|
movw r24, r28
|
|||
|
rcall pi
|
|||
|
|
|||
|
movw r22, r28
|
|||
|
movw r24, r8
|
|||
|
rcall add_tag
|
|||
|
|
|||
|
stack_free 32, reg1 = r30, reg2 = r31
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
pop_range 8, 17
|
|||
|
ret
|
|||
|
*/
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_ENCRYPT_LAST_BLOCK(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *dest,
|
|||
|
const void *src,
|
|||
|
size_t length_B,
|
|||
|
unsigned long num )
|
|||
|
{
|
|||
|
state_t a;
|
|||
|
while (length_B >= PI_PT_BLOCK_LENGTH_BYTES) {
|
|||
|
PI_ENCRYPT_BLOCK(ctx, dest, src, num);
|
|||
|
num++;
|
|||
|
length_B -= PI_PT_BLOCK_LENGTH_BYTES;
|
|||
|
src = &((uint8_t*)src)[PI_PT_BLOCK_LENGTH_BYTES];
|
|||
|
if (dest) {
|
|||
|
dest = &((uint8_t*)dest)[PI_CT_BLOCK_LENGTH_BYTES];
|
|||
|
}
|
|||
|
}
|
|||
|
ctr_trans(ctx, a, num);
|
|||
|
inject_last_block(a, src, length_B);
|
|||
|
if (dest) {
|
|||
|
uint8_t tmp[PI_PT_BLOCK_LENGTH_BYTES];
|
|||
|
extract_block(tmp, a);
|
|||
|
memcpy(dest, tmp, length_B);
|
|||
|
}
|
|||
|
pi((word_t*)a);
|
|||
|
add_tag(ctx, a);
|
|||
|
}
|
|||
|
*/
|
|||
|
.global pi16_encrypt_last_block
|
|||
|
|
|||
|
pi16_encrypt_last_block:
|
|||
|
clt
|
|||
|
pi16_process_last_block:
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
push_range 4, 15
|
|||
|
movw r4, r24 ; ctx
|
|||
|
movw r6, r22 ; dest
|
|||
|
movw r8, r20 ; src
|
|||
|
movw r10, r18 ; len
|
|||
|
movw r12, r14 ; lo16(num)
|
|||
|
movw r14, r16 ; hi16(num)
|
|||
|
movw r28, r18
|
|||
|
1:
|
|||
|
sbiw r28, 16
|
|||
|
brmi 4f
|
|||
|
movw r24, r4
|
|||
|
movw r22, r6
|
|||
|
movw r20, r8
|
|||
|
movw r18, r14
|
|||
|
movw r16, r12
|
|||
|
brts 2f
|
|||
|
rcall pi16_encrypt_block
|
|||
|
rjmp 3f
|
|||
|
2: rcall pi16_decrypt_block
|
|||
|
3:
|
|||
|
sec
|
|||
|
adc r12, r1
|
|||
|
adc r13, r1
|
|||
|
adc r14, r1
|
|||
|
adc r15, r1
|
|||
|
ldi r24, 16
|
|||
|
add r8, r24
|
|||
|
adc r9, r1
|
|||
|
cp r6, r1
|
|||
|
cpc r7, r1
|
|||
|
breq 1b
|
|||
|
add r6, r24
|
|||
|
adc r7, r1
|
|||
|
rjmp 1b
|
|||
|
4:
|
|||
|
stack_alloc 32 + 16, reg1 = r30, reg2 = r31
|
|||
|
adiw r28, 16
|
|||
|
movw r10, r28
|
|||
|
adiw r30, 1
|
|||
|
movw r28, r30
|
|||
|
|
|||
|
movw r24, r4
|
|||
|
movw r22, r28
|
|||
|
movw r20, r14
|
|||
|
movw r18, r12
|
|||
|
rcall ctr_trans
|
|||
|
|
|||
|
movw r24, r28
|
|||
|
movw r22, r8
|
|||
|
movw r20, r10
|
|||
|
rcall inject_last_block
|
|||
|
|
|||
|
cp r6, r1
|
|||
|
cpc r7, r1
|
|||
|
breq 6f
|
|||
|
tst r10
|
|||
|
breq 6f
|
|||
|
|
|||
|
movw r24, r28
|
|||
|
adiw r24, 32
|
|||
|
movw r22, r28
|
|||
|
rcall extract_block
|
|||
|
movw r30, r28
|
|||
|
adiw r30, 32
|
|||
|
movw r26, r6
|
|||
|
mov r24, r10
|
|||
|
3:
|
|||
|
ld r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r24
|
|||
|
brne 3b
|
|||
|
6:
|
|||
|
brtc 7f
|
|||
|
movw r24, r28
|
|||
|
movw r22, r8
|
|||
|
movw r20, r10
|
|||
|
rcall replace_last_block
|
|||
|
7:
|
|||
|
movw r24, r28
|
|||
|
rcall pi
|
|||
|
|
|||
|
movw r24, r4
|
|||
|
movw r22, r28
|
|||
|
rcall add_tag
|
|||
|
|
|||
|
stack_free 32 + 16
|
|||
|
pop_range 4, 15
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
ret
|
|||
|
|
|||
|
/******************************************************************************/
|
|||
|
/*
|
|||
|
void PI_DECRYPT_LAST_BLOCK(
|
|||
|
PI_CTX *ctx,
|
|||
|
void *dest,
|
|||
|
const void *src,
|
|||
|
size_t length_B,
|
|||
|
unsigned long num )
|
|||
|
{
|
|||
|
state_t a;
|
|||
|
ctr_trans(ctx, a, num);
|
|||
|
inject_last_block(a, src, length_B);
|
|||
|
if (dest) {
|
|||
|
uint8_t tmp[PI_PT_BLOCK_LENGTH_BYTES];
|
|||
|
extract_block(tmp, a);
|
|||
|
memcpy(dest, tmp, length_B);
|
|||
|
}
|
|||
|
replace_last_block(a, src, length_B);
|
|||
|
pi((word_t*)a);
|
|||
|
add_tag(ctx, a);
|
|||
|
}
|
|||
|
*/
|
|||
|
.global pi16_decrypt_last_block
|
|||
|
|
|||
|
pi16_decrypt_last_block:
|
|||
|
set
|
|||
|
rjmp pi16_process_last_block
|
|||
|
/*
|
|||
|
push r28
|
|||
|
push r29
|
|||
|
push_range 4, 15
|
|||
|
movw r4, r24 ; ctx
|
|||
|
movw r6, r22 ; dest
|
|||
|
movw r8, r20 ; src
|
|||
|
movw r10, r18 ; len
|
|||
|
movw r12, r14 ; lo16(num)
|
|||
|
movw r14, r16 ; hi16(num)
|
|||
|
movw r28, r18
|
|||
|
1:
|
|||
|
sbiw r28, 16
|
|||
|
brmi 2f
|
|||
|
movw r24, r4
|
|||
|
movw r22, r6
|
|||
|
movw r20, r8
|
|||
|
movw r18, r14
|
|||
|
; movw r16, r16
|
|||
|
rcall pi16_encrypt_block
|
|||
|
|
|||
|
sec
|
|||
|
adc r12, r1
|
|||
|
adc r13, r1
|
|||
|
adc r14, r1
|
|||
|
adc r15, r1
|
|||
|
ldi r24, 16
|
|||
|
add r8, r24
|
|||
|
adc r9, r1
|
|||
|
cp r6, r1
|
|||
|
cpc r7, r1
|
|||
|
breq 1b
|
|||
|
add r6, r24
|
|||
|
adc r7, r1
|
|||
|
rjmp 1b
|
|||
|
2:
|
|||
|
stack_alloc 32 + 16, reg1 = r30, reg2 = r31
|
|||
|
adiw r28, 16
|
|||
|
movw r10, r28
|
|||
|
adiw r30, 1
|
|||
|
movw r28, r30
|
|||
|
|
|||
|
movw r24, r4
|
|||
|
movw r22, r28
|
|||
|
movw r20, r14
|
|||
|
movw r18, r12
|
|||
|
rcall ctr_trans
|
|||
|
|
|||
|
movw r24, r28
|
|||
|
movw r22, r8
|
|||
|
movw r20, r10
|
|||
|
rcall inject_last_block
|
|||
|
|
|||
|
cp r6, r1
|
|||
|
cpc r7, r1
|
|||
|
breq 6f
|
|||
|
tst r10
|
|||
|
breq 6f
|
|||
|
|
|||
|
movw r24, r28
|
|||
|
adiw r24, 32
|
|||
|
movw r22, r28
|
|||
|
rcall extract_block
|
|||
|
movw r30, r28
|
|||
|
adiw r30, 32
|
|||
|
movw r26, r6
|
|||
|
mov r24, r10
|
|||
|
3:
|
|||
|
ld r0, Z+
|
|||
|
st X+, r0
|
|||
|
dec r24
|
|||
|
brne 3b
|
|||
|
6:
|
|||
|
movw r24, r28
|
|||
|
movw r22, r8
|
|||
|
movw r20, r10
|
|||
|
rcall replace_last_block
|
|||
|
|
|||
|
movw r24, r28
|
|||
|
rcall pi
|
|||
|
|
|||
|
movw r24, r4
|
|||
|
movw r22, r28
|
|||
|
rcall add_tag
|
|||
|
|
|||
|
stack_free 32 + 16
|
|||
|
pop_range 4, 15
|
|||
|
pop r29
|
|||
|
pop r28
|
|||
|
ret
|
|||
|
*/
|
|||
|
|
|||
|
|