[keccak-asm] keccak_f1600 in asm

This commit is contained in:
bg 2012-12-26 21:35:01 +01:00
parent 4128060fcc
commit e7925dc3d2
2 changed files with 55 additions and 96 deletions

View File

@ -166,14 +166,18 @@ chi_step:
brne 10b
ret
.global keccak_theta
keccak_theta:
push_range 2, 8
.global keccak_f1600
keccak_f1600:
push_range 2, 9
push r16
push_range 28, 29
stack_alloc_large 200, r26, r27
adiw XL, 1
clr r9
5:
movw r30, r24 ; Z = a
movw r26, r22 ; X = b
ldi r19, 5
10:
@ -350,6 +354,7 @@ keccak_theta:
; Z points at b
movw XL, ZL
movw r4, ZL
adiw XL, 8
adiw ZL, 16
movw YL, r2
@ -366,8 +371,51 @@ keccak_theta:
adiw ZL, 5 * 8
dec r18
brne 10b
/* -- iota -- */
ldi r30, lo8(keccak_rc_comp)
ldi r31, hi8(keccak_rc_comp)
add r30, r9
adc r31, __zero_reg__
lpm r20, Z+
movw YL, r2
ldi r21, 0x80
bst r20, 6
brtc 10f
ldd r22, Y+7
eor r22, r21
std Y+7, r22
10:
bst r20, 5
brtc 10f
ldd r22, Y+3
eor r22, r21
std Y+3, r22
10:
bst r20, 4
brtc 10f
ldd r22, Y+1
eor r22, r21
std Y+1, r22
10:
andi r20, 0x8f
ld r22, Y
eor r22, r20
st Y, r22
inc r9
mov r16, r9
cpi r16, 24
breq 20f
movw r24, YL
movw r26, r4
rjmp 5b
20:
stack_free_large3 200
pop_range 28, 29
pop r16
pop_range 2, 8
pop_range 2, 9
ret

View File

@ -79,7 +79,7 @@ const uint64_t rc[] PROGMEM = {
};
*/
const static uint8_t rc_comp[] PROGMEM = {
const uint8_t keccak_rc_comp[] PROGMEM = {
0x01, 0x92, 0xda, 0x70,
0x9b, 0x21, 0xf1, 0x59,
0x8a, 0x88, 0x39, 0x2a,
@ -104,96 +104,7 @@ const uint8_t keccak_rotate_codes[5][5] PROGMEM = {
{ ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) }
};
void keccak_theta(uint64_t *a, uint64_t *b);
extern const uint8_t rho_pi_idx_table[25] PROGMEM;
static inline
void keccak_round(uint64_t a[5][5], uint8_t rci){
uint64_t b[5][5];
// uint8_t i, j;
union {
uint64_t v64;
uint8_t v8[8];
} t;
/* theta */
keccak_theta((uint64_t*)a, (uint64_t*)b);
#if DEBUG
cli_putstr_P(PSTR("\r\nAfter theta:"));
keccak_dump_state(a);
#endif
/* rho & pi */
/*
const uint8_t* rot_code = (const uint8_t*)keccak_rotate_codes;
const uint8_t* idx_idx = (const uint8_t*)rho_pi_idx_table;
uint64_t *a_tmp = (uint64_t*)a;
for(i = 0; i < 25; ++i){
*((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) =
rotate64left_code(*a_tmp++, pgm_read_byte(rot_code++));
}
*/
#if DEBUG & 0
cli_putstr_P(PSTR("\r\n--- after rho & pi ---"));
keccak_dump_state(a);
#endif
/* chi */
// memcpy(a, b, 5 * 5 * 8);
// for(i = 1; i < 5; ++i){
/*
for(j = 0; j < 5; ++j){
a[i][j] = b[i][j] ^ ((~(b[i][(j + 1) % 5])) & (b[i][(j + 2) % 5]));
}
* /
// a[i][0] ^= ((~(b[i][1])) & (b[i][2]));
// a[i][1] ^= ((~(b[i][2])) & (b[i][3]));
// a[i][2] ^= ((~(b[i][3])) & (b[i][4]));
for(j = 0; j < 3 * 8; ++j){
((uint8_t*)a)[i * 5 * 8 + j] ^=
(~((uint8_t*)b)[i * 5 * 8 + j + 8]) & ((uint8_t*)a)[i * 5 * 8 + j + 16];
}
a[i][3] ^= ((~(b[i][4])) & (b[i][0]));
a[i][4] ^= ((~(b[i][0])) & (b[i][1]));
}
*/
#if DEBUG & 0
cli_putstr_P(PSTR("\r\nAfter chi:"));
keccak_dump_state(a);
#endif
/* iota */
// memcpy_P(&t, &(rc_comp[rci]), 8);
t.v64 = 0;
t.v8[0] = pgm_read_byte(&(rc_comp[rci]));
if(t.v8[0] & 0x40){
t.v8[7] = 0x80;
}
if(t.v8[0] & 0x20){
t.v8[3] = 0x80;
}
if(t.v8[0] & 0x10){
t.v8[1] = 0x80;
}
t.v8[0] &= 0x8F;
a[0][0] ^= t.v64;
#if DEBUG & 0
cli_putstr_P(PSTR("\r\nAfter iota:"));
keccak_dump_state(a);
#endif
}
void keccak_f1600(uint64_t a[5][5]){
uint8_t i = 0;
do {
#if DEBUG
cli_putstr_P(PSTR("\r\n\r\n--- Round "));
cli_hexdump(&i, 1);
cli_putstr_P(PSTR(" ---"));
#endif
keccak_round(a, i);
} while (++i < 24);
}
void keccak_f1600(uint64_t a[5][5]);
void keccak_nextBlock(keccak_ctx_t* ctx, const void* block){
memxor(ctx->a, block, ctx->bs);