/* pi16cipher-asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2015 Daniel Otte (bg@nerilex.org) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include "avr-asm-macros.S" .struct 0 ctx: ctx_state: .struct ctx_state + 4 * 4 * 2 ctx_tag: .struct ctx_tag + 4 * 2 * 2 ctx_ctr: .struct ctx_ctr + 8 ctx_end: ctx_size: .text /* void phi16( word_t dest[4], const word_t x[4], const word_t c[4], const uint8_t v[8], const uint8_t rot[4]) { word_t sum = 0; uint8_t i; i = 4; do { --i; sum += x[i]; } while (i); i = 4; do { --i; dest[i] = rotl(pgm_read_word(&c[i]) + sum - x[pgm_read_byte(&v[i])], pgm_read_byte(&rot[i]) ); } while (i); sum = 0; i = 4; do { --i; sum ^= dest[i]; } while (i); i = 4; do { --i; dest[i] ^= sum; } while (i); } */ .global phi16 /* r24:r25 - destination r22:r23 - input r20:r21 - constants r18:r19 - drop constants (v) r16:r17 - rotation constants */ phi16: movw r28, r24 movw r26, r22 movw r6, r16 ldi r16, 3 ld r24, X+ ld r25, X+ 1: ld r0, X+ add r24, r0 ld r0, X+ adc r25, r0 dec r16 brne 1b sbiw r26, 8 movw r2, r26 /* --- */ ldi r16, 4 2: movw r30, r20 lpm r22, Z+ lpm r23, Z+ movw r20, r30 add r22, r24 adc r23, r25 movw r30, r18 lpm r0, Z+ movw r18, r30 movw r26, r2 add r26, r0 adc r27, r1 ld r4, X+ ld r5, X+ sub r22, r4 sbc r23, r5 movw r30, r6 lpm r0, Z+ movw r6, r30 5: mov r17, r23 lsl r17 rol r22 rol r23 dec r0 brne 5b /* --- */ st Y+, r22 st Y+, r23 dec r16 brne 2b /* --- */ sbiw r28, 8 movw r26, r28 ldi r16, 3 ld r24, X+ ld r25, X+ 1: ld r0, X+ eor r24, r0 ld r0, X+ eor r25, r0 dec r16 brne 1b /* --- */ ldi r16, 4 1: ld r0, Y eor r0, r24 st Y+, r0 ld r0, Y eor r0, r25 st Y+, r0 dec r16 brne 1b sbiw r28, 8 ret /******************************************************************************/ /* static void ny( word_t dest[4], const word_t x[4]) { phi16(dest, x, ny_const, ny_v_const, ny_rot_const); } */ ny16_const: .word 0xD1CC, 0xCAC9, 0xC6C5, 0xC3B8 ny16_v_const: .byte 1 * 2, 0 * 2, 3 * 2, 2 * 2 ny16_rot_const: .byte 2, 5, 7, 13 /******************************************************************************/ /* static void mu( word_t dest[4], const word_t x[4]) { phi16(dest, x, mu_const, mu_v_const, mu_rot_const); } */ mu16_const: .word 0xF0E8, 0xE4E2, 0xE1D8, 0xD4D2 mu16_v_const: .byte 3 * 2, 2 * 2, 1 * 2, 0 * 2 mu16_rot_const: .byte 1, 4, 9, 11 .global ast16 ast16: push_range 2, 7 push r16 push r17 push r28 push r29 stack_alloc 8 ; pointer to stack space is stored in Z adiw r30, 1 push r20 push r21 push r30 push r31 mu16: ldi r16, lo8(mu16_rot_const) ldi r17, hi8(mu16_rot_const) ldi r18, lo8(mu16_v_const) ldi r19, hi8(mu16_v_const) ldi r20, lo8(mu16_const) ldi r21, hi8(mu16_const) rcall phi16 pop r25 pop r24 pop r23 pop r22 push r28 push r29 ny16: ldi r16, lo8(ny16_rot_const) ldi r17, hi8(ny16_rot_const) ldi r18, lo8(ny16_v_const) ldi r19, hi8(ny16_v_const) ldi r20, lo8(ny16_const) ldi r21, hi8(ny16_const) rcall phi16 pop r31 pop r30 ldd r16, Z + 0 ldd r17, Z + 1 ldd r18, Z + 3 * 2 + 0 ldd r19, Z + 3 * 2 + 1 ldd r20, Y + 1 * 2 + 0 ldd r21, Y + 1 * 2 + 1 add r18, r20 adc r19, r21 std Z + 0 * 2 + 0, r18 std Z + 0 * 2 + 1, r19 ldd r18, Z + 2 * 2 + 0 ldd r19, Z + 2 * 2 + 1 ldd r20, Y + 0 * 2 + 0 ldd r21, Y + 0 * 2 + 1 add r18, r20 adc r19, r21 std Z + 3 * 2 + 0, r18 std Z + 3 * 2 + 1, r19 ldd r18, Z + 1 * 2 + 0 ldd r19, Z + 1 * 2 + 1 ldd r20, Y + 3 * 2 + 0 ldd r21, Y + 3 * 2 + 1 add r18, r20 adc r19, r21 std Z + 2 * 2 + 0, r18 std Z + 2 * 2 + 1, r19 movw r18, r16 ldd r20, Y + 2 * 2 + 0 ldd r21, Y + 2 * 2 + 1 add r18, r20 adc r19, r21 std Z + 1 * 2 + 0, r18 std Z + 1 * 2 + 1, r19 stack_free 8, reg1 = r26, reg2 = r27 pop r29 pop r28 pop r17 pop r16 pop_range 2, 7 ret /******************************************************************************/ /* void e1_16( word_t *dest, const word_t c[4], const word_t *i ) { uint8_t n = PI_N - 1; { word_t t[4]; memcpy_P(t, c, sizeof(word_t) * 4); ast16(dest, t, i); } do { i = &i[4]; ast16(&dest[4], dest, i); dest = &dest[4]; } while (--n); } */ .global e1_16 e1_16: push_range 8, 10 movw r8, r20 movw r30, r22 stack_alloc 8, reg1=r26, reg2=r27 adiw r26, 1 movw r22, r26 ldi r18, 8 1: lpm r0, Z+ st X+, r0 dec r18 brne 1b /* --- */ ldi r18, 3 mov r10, r18 rcall ast16 1: movw r22, r30 adiw r30, 8 movw r24, r30 movw r26, r8 adiw r26, 8 movw r20, r26 movw r8, r26 rcall ast16 dec r10 brne 1b sbiw r30, 3 * 4 * 2 /* --- */ stack_free 8, reg1=r26, reg2=r27 pop_range 8, 10 ret /******************************************************************************/ /* void e2_16( word_t *dest, const word_t c[4], const word_t *i ) { uint8_t n = PI_N - 1; { word_t t[4]; memcpy_P(t, c, sizeof(word_t) * 4); ast16(&dest[4 * n], &i[4 * n], t); } while (n--) { ast16(&dest[4 * n], &i[4 * n], &dest[4 * (n + 1)]); } } */ .global e2_16 e2_16: push_range 8, 10 movw r30, r22 movw r26, r20 adiw r26, 24 movw r8, r26 movw r22, r26 stack_alloc 8, reg1 = r26, reg2 = r27 adiw r26, 1 movw r20, r26 ldi r18, 8 1: lpm r0, Z+ st X+, r0 dec r18 brne 1b /* --- */ ldi r18, 3 mov r10, r18 adiw r24, 24 rcall ast16 1: movw r20, r30 sbiw r30, 8 movw r24, r30 movw r26, r8 sbiw r26, 8 movw r22, r26 movw r8, r26 rcall ast16 dec r10 brne 1b /* --- */ stack_free 8, reg1 = r26, reg2 = r27 pop_range 8, 10 ret /******************************************************************************/ /* void pi( word_t *a ) { uint8_t r = PI_ROUNDS; word_t t[4 * 4]; const word_t *c = (const word_t *)pi_const; do { e1_16(t, c, a); c = &c[4]; e2_16(a, c, t); c = &c[4]; } while (--r); } */ PI_CONST: .word 0xB4B2, 0xB1AC, 0xAAA9, 0xA6A5 .word 0xA39C, 0x9A99, 0x9695, 0x938E .word 0x8D8B, 0x8778, 0x7472, 0x716C .word 0x6A69, 0x6665, 0x635C, 0x5A59 .word 0x5655, 0x534E, 0x4D4B, 0x473C .word 0x3A39, 0x3635, 0x332E, 0x2D2B .word 0x271E, 0x1D1B, 0x170F, 0xF0E8 .word 0xE4E2, 0xE1D8, 0xD4D2, 0xD1CC /******************************************************************************/ /* void ctr_trans( const PI_CTX *ctx, state_t a, unsigned long ctr ) { uint64_t t; int i; if ((void *)ctx->cis != (void *)a) { memcpy(a, ctx->cis, sizeof(state_t)); } t = ctx->ctr + ctr; for (i = 0; i * PI_WORD_SIZE < 64; ++i) { a[0][i] ^= (word_t)t; t >>= PI_WORD_SIZE; } pi((word_t*)a); } */ .global ctr_trans ctr_trans: push_range 16, 17 push r28 push r29 movw r30, r24 movw r26, r22 cp r24, r22 cpc r25, r23 breq 2f ldi r22, 32 1: ld r0, Z+ st X+, r0 dec r22 brne 1b /* --- */ sbiw r30, 32 sbiw r26, 32 2: movw r16, r18 movw r18, r20 clr r20 clr r21 movw r22, r20 adiw r30, ctx_ctr ; Z points at lsb of ctr ldi r28, 16 ; Y points at r16 clr r29 clc ldi r25, 8 3: ld r0, Y+ ld r24, Z+ adc r24, r0 ld r0, X eor r0, r24 st X+, r0 dec r25 brne 3b /* --- */ sbiw r26, 8 movw r24, r26 pop r29 pop r28 pop_range 16, 17 ; rjmp pi /* at the end of pi dest is in Z */ .global pi pi: push r6 push r7 push r16 push r28 push r29 stack_alloc 32, reg1 = r28, reg2 = r29 adiw r28, 1 movw r6, r28 ldi r28, lo8(PI_CONST - 8) ldi r29, hi8(PI_CONST - 8) ldi r16, 3 movw r30, r24 1: movw r24, r6 movw r6, r30 movw r20, r30 adiw r28, 8 movw r22, r28 rcall e1_16 movw r24, r6 movw r6, r30 movw r20, r30 adiw r28, 8 movw r22, r28 rcall e2_16 dec r16 brne 1b /* --- */ stack_free 32, reg1 = r26, reg2 = r27 pop r29 pop r28 pop r16 pop r7 pop r6 ret /******************************************************************************/ /* void add_tag( PI_CTX *ctx, state_t a ) { uint8_t i; i = 3; do { ctx->tag[i + 0] += a[0][i]; ctx->tag[i + 4] += a[2][i]; } while(i--); } */ .global add_tag add_tag: push r28 push r29 movw r30, r24 adiw r30, ctx_tag movw r28, r22 ldi r19, 2 1: ldi r18, 4 2: ld r24, Y+ ld r25, Y+ ldd r22, Z + 0 ldd r23, Z + 1 add r24, r22 adc r25, r23 st Z+, r24 st Z+, r25 dec r18 brne 2b adiw r28, 8 dec r19 brne 1b /* --- */ pop r29 pop r28 ret /******************************************************************************/ /* void inject_tag( state_t a, const word_t x[8] ) { int i; for (i = 0; i < 4; ++i) { a[0][i] ^= x[i]; } for (; i < 8; ++i) { a[2][i - 4] ^= x[i]; } } */ .global inject_block .global inject_tag inject_block: inject_tag: movw r30, r24 movw r26, r22 ldi r23, 2 1: ldi r22, 8 2: ld r24, Z ld r25, X+ eor r24, r25 st Z+, r24 dec r22 brne 2b adiw r30, 8 dec r23 brne 1b ret /******************************************************************************/ /* void extract_block( void *block, state_t a) { int i; for (i = 0; i < 4; ++i) { store_word_little(&((word_t *)block)[i], a[0][i]); } for (; i < 8; ++i) { store_word_little(&((word_t *)block)[i], a[2][i - 4]); } } */ .global extract_block extract_block: movw r26, r24 movw r30, r22 ldi r23, 2 1: ldi r22, 8 2: ld r24, Z+ st X+, r24 dec r22 brne 2b adiw r30, 8 dec r23 brne 1b ret /******************************************************************************/ /* void replace_block( state_t a, const void *block ) { word_t x; int i; for (i = 0; i < 4; ++i) { x = load_word_little(&((const word_t *)block)[i]); a[0][i] = x; } for (; i < 8; ++i) { x = load_word_little(&((const word_t *)block)[i]); a[2][i - 4] = x; } } */ /* .global replace_block replace_block: movw r30, r24 movw r26, r22 ldi r23, 2 1: ldi r22, 8 2: ld r24, X+ st Z+, r24 dec r22 brne 2b adiw r30, 8 dec r23 brne 1b ret */ /******************************************************************************/ /* void inject_last_block( state_t a, const void *block, size_t length_Bb ) { uint8_t t[PI_RATE_BYTES]; if (length_b >= PI_RATE_BITS) { / * error * / printf("ERROR <%s %s %d>\n", __FILE__, __func__, __LINE__); return; } memset(t, 0, sizeof(t)); memcpy(t, block, (length_b + 7) / 8); t[length_b / 8] |= 1 << (length_b & 7); inject_block(a, t); } */ .global inject_last_block inject_last_block: movw r30, r24 movw r26, r22 ldi r23, 2 1: ldi r22, 8 2: tst r20 brne 3f ld r24, Z ldi r25, 1 eor r24, r25 st Z, r24 return: ret 3: dec r20 ld r25, X+ ld r24, Z eor r24, r25 st Z+, r24 dec r22 brne 2b adiw r30, 8 dec r23 brne 1b ; ret ; this should never been reached /******************************************************************************/ /* void replace_last_block( state_t a, const void *block, size_t length_B ) { uint8_t t[PI_RATE_BYTES]; if (length_B >= PI_RATE_BYTES) { / * error * / printf("ERROR <%s %s %d>\n", __FILE__, __func__, __LINE__); return; } extract_block(t, a); memcpy(t, block, length_B); replace_block(a, t); } */ .global replace_last_block .global replace_block replace_block: ldi r20, 32 replace_last_block: movw r30, r24 movw r26, r22 ldi r23, 2 1: ldi r22, 8 2: tst r20 breq return dec r20 ld r24, X+ st Z+, r24 dec r22 brne 2b adiw r30, 8 dec r23 brne 1b ret /******************************************************************************/ /* int PI_INIT( PI_CTX *ctx, const void *key, size_t key_length_B, const void *pmn, size_t pmn_length_B) { int i; uint8_t setup_buf[PI_IS_BYTES]; if (key_length_B + pmn_length_B + 1 > PI_IS_BYTES) { return -1; } memset(ctx->tag, 0, sizeof(ctx->tag)); memset(setup_buf, 0, sizeof(setup_buf)); memcpy(setup_buf, key, key_length_B); memcpy(&setup_buf[key_length_B], pmn, pmn_length_B); setup_buf[key_length_B + pmn_length_B] = 1; for (i = 0; i < 16; ++i) { ctx->cis[i / 4][i % 4] = load_word_little(&setup_buf[i * PI_WORD_SIZE / 8]); } pi((word_t*)ctx->cis); ctx->ctr = 0; for (i = 0; i * PI_WORD_SIZE < 64; ++i) { ctx->ctr |= (uint64_t)ctx->cis[1][i] << (i * PI_WORD_SIZE); } return 0; } */ .global pi16_init pi16_init: movw r26, 20 add r26, r16 adc r27, r17 mov r21, r26 ; r21 = key_len + nonce_len sbiw r26, 32 brmi 1f return_error: ser r24 ser r25 ret 1: push r16 ldi r17, 32 + 16 - 1 ; state_size + tag_size - 1 sub r17, r21 ; r17 = rest of state to clear movw r30, r24 ; Z points at ctx->cis movw r26, r22 ; X points at key 3: tst r20 brne 5f movw r26, r18 ; set X to pi´oint at nonce 5: dec r20 ld r0, X+ st Z+, r0 dec r21 brne 3b /* --- */ ldi r21, 1 st Z+, r21 ; store padding '1' 6: st Z+, r1 dec r17 brne 6b /* --- */ movw r24, r30 sbiw r24, 32 + 16 rcall pi movw r26, r30 adiw r26, 32 + 16 ; X points at ctx->ctr adiw r30, 8 ; Z points at ctx->cis[1][0] ldi r24, 8 1: ld r0, Z+ st X+, r0 dec r24 brne 1b pop r16 clr r25 ret /******************************************************************************/ /* void PI_PROCESS_AD_BLOCK( PI_CTX *ctx, const void *ad, unsigned long ad_num ) { state_t a; ctr_trans(ctx, a, ad_num); inject_block(a, ad); pi((word_t*)a); add_tag(ctx, a); } */ .global pi16_process_ad_block pi16_process_ad_block: push r28 push r29 stack_alloc 32, reg1 = r28, reg2 = r29 adiw r28, 1 push r24 push r25 push r22 push r23 movw r22, r28 rcall ctr_trans movw r24, r28 pop r23 pop r22 rcall inject_block movw r24, r28 rcall pi movw r22, r28 pop r25 pop r24 rcall add_tag stack_free 32, reg1 = r28, reg2 = r29 pop r29 pop r28 ret /******************************************************************************/ /* void PI_PROCESS_AD_LAST_BLOCK( PI_CTX *ctx, const void *ad, size_t ad_length_B, unsigned long ad_num ) { state_t a; while (ad_length_B >= PI_AD_BLOCK_LENGTH_BYTES) { PI_PROCESS_AD_BLOCK(ctx, ad, ad_num); ad_num++; ad_length_B -= PI_AD_BLOCK_LENGTH_BYTES; ad = &((uint8_t*)ad)[PI_AD_BLOCK_LENGTH_BYTES]; } ctr_trans(ctx, a, ad_num); inject_last_block(a, ad, ad_length_B); pi((word_t*)a); add_tag(ctx, a); ctx->ctr += ad_num; inject_tag(ctx->cis, ctx->tag); pi((word_t*)ctx->cis); } */ .global pi16_process_ad_last_block pi16_process_ad_last_block: push_range 10, 17 push r28 push r29 movw r10, r24 ; ctx movw r12, r22 ; ad movw r14, r16 ; lo16(ad_num) movw r16, r18 ; hi16(ad_num) movw r28, r20 ; r28:r29 contains ad_length_B 1: sbiw r28, 16 brmi 6f movw r18, r14 movw r20, r16 movw r22, r12 movw r24, r10 rcall pi16_process_ad_block ; increment num_counter sec adc r14, r1 adc r15, r1 adc r16, r1 adc r17, r1 ldi r24, 16 add r12, r24 adc r13, r1 rjmp 1b /* --- */ 6: adiw r28, 16 stack_alloc 32, reg1 = r30, reg2 = r31 adiw r30, 1 push r28 movw r28, r30 ; Y points at a (on stack) movw r18, r14 movw r20, r16 movw r22, r28 movw r24, r10 rcall ctr_trans movw r24, r28 movw r22, r12 clr r21 pop r20 rcall inject_last_block movw r24, r28 rcall pi movw r24, r10 movw r22, r28 rcall add_tag stack_free 32, reg1 = r30, reg2 = r31 movw r30, r10 adiw r30, ctx_ctr clr r0 movw r18, r0 ; clear top 4 bytes to have 64-bit ad_num in register-file movw r20, r0 ldi r28, 14 ; Y points to r14 (ad_num) clr r29 ldi r25, 8 1: ld r24, Y+ ld r0, Z adc r0, r24 st Z+, r0 dec r25 brne 1b sbiw r30, 8 + 16 movw r22, r30 movw r24, r10 rcall inject_tag movw r24, r10 pop r29 pop r28 pop_range 10, 17 rjmp pi /******************************************************************************/ /* void PI_PROCESS_SMN( PI_CTX *ctx, void *c0, const void *smn) { ctx->ctr++; ctr_trans(ctx, ctx->cis, 0); inject_block(ctx->cis, smn); if (c0) { extract_block(c0, ctx->cis); } pi((word_t*)ctx->cis); add_tag(ctx, ctx->cis); } */ .global pi16_encrypt_smn pi16_encrypt_smn: clt pi16_process_smn: push_range 12, 17 movw r12, r24 ; ctx movw r14, r22 ; c0 movw r16, r20 ; smn movw r26, r24 adiw r26, ctx_ctr ldi r18, 8 sec 1: ld r0, X adc r0, r1 st X+, r0 dec r18 brne 1b movw r22, r24 clr r0 movw r20, r0 movw r18, r0 rcall ctr_trans movw r24, r12 movw r22, r16 rcall inject_block cp r14, r1 cpc r15, r1 breq 4f movw r24, r14 movw r22, r12 rcall extract_block 4: brtc 5f movw r24, r12 movw r22, r16 rcall replace_block 5: movw r24, r12 rcall pi movw r22, r12 movw r24, r12 pop_range 12, 17 rjmp add_tag /******************************************************************************/ /* void PI_DECRYPT_SMN( PI_CTX *ctx, void *smn, const void *c0) { ctx->ctr++; ctr_trans(ctx, ctx->cis, 0); inject_block(ctx->cis, c0); if (smn) { extract_block(smn, ctx->cis); } replace_block(ctx->cis, c0); pi((word_t*)ctx->cis); add_tag(ctx, ctx->cis); } */ .global pi16_decrypt_smn pi16_decrypt_smn: set rjmp pi16_process_smn /* push_range 12, 17 movw r12, r24 ; ctx movw r14, r22 ; smn movw r16, r20 ; c0 movw r26, r24 adiw r26, ctx_ctr ldi r18, 8 sec 1: ld r0, X adc r0, r1 st X+, r0 dec r18 brne 1b movw r22, r24 clr r0 movw r20, r0 movw r18, r0 rcall ctr_trans movw r24, r12 movw r22, r16 rcall inject_block cp r14, r1 cpc r15, r1 breq 4f movw r24, r14 movw r22, r12 rcall extract_block 4: movw r24, r12 movw r22, r16 rcall replace_block movw r24, r12 rcall pi movw r22, r12 movw r24, r12 pop_range 12, 17 rjmp add_tag */ /******************************************************************************/ /* void PI_EXTRACT_TAG( PI_CTX *ctx, void *dest ) { uint8_t buf[8 * PI_WORD_SIZE / 8]; int i; for (i = 0; i < 8; ++i) { store_word_little(&buf[i * PI_WORD_SIZE / 8], ctx->tag[i]); } memcpy(dest, buf, PI_TAG_BYTES); } */ .global pi16_extract_tag pi16_extract_tag: movw r30, r24 movw r26, r22 adiw r30, ctx_tag ldi r24, 16 1: ld r0, Z+ st X+, r0 dec r24 brne 1b ret /******************************************************************************/ /* void PI_ENCRYPT_BLOCK( PI_CTX *ctx, void *dest, const void *src, unsigned long num ) { state_t a; ctr_trans(ctx, a, num); inject_block(a, src); if (dest) { extract_block(dest, a); } pi((word_t*)a); add_tag(ctx, a); } */ .global pi16_encrypt_block pi16_encrypt_block: clt pi16_process_block: push_range 8, 17 push r28 push r29 stack_alloc 32, reg1 = r28, reg2 = r29 adiw r28, 1 movw r8, r24 ; ctx movw r10, r22 ; dest movw r12, r20 ; src movw r22, r28 movw r20, r18 movw r18, r16 rcall ctr_trans movw r24, r30 movw r22, r12 rcall inject_block cp r10, r1 cpc r11, r1 breq 4f movw r24, r10 movw r22, r28 rcall extract_block 4: brtc 5f movw r24, r28 movw r22, r12 rcall replace_block 5: movw r24, r28 rcall pi movw r22, r28 movw r24, r8 rcall add_tag stack_free 32, reg1 = r30, reg2 = r31 pop r29 pop r28 pop_range 8, 17 ret /******************************************************************************/ /* void PI_DECRYPT_BLOCK( PI_CTX *ctx, void *dest, const void *src, unsigned long num ) { state_t a; ctr_trans(ctx, a, num); inject_block(a, src); if (dest) { extract_block(dest, a); } replace_block(a, src); pi((word_t*)a); add_tag(ctx, a); } */ .global pi16_decrypt_block pi16_decrypt_block: set rjmp pi16_process_block /* push_range 8, 17 push r28 push r29 stack_alloc 32, reg1 = r28, reg2 = r29 adiw r28, 1 movw r8, r24 ; ctx movw r10, r22 ; dest movw r12, r20 ; src movw r22, r28 movw r20, r18 movw r18, r16 rcall ctr_trans movw r24, r30 movw r22, r12 rcall inject_block cp r10, r1 cpc r11, r1 breq 4f movw r24, r10 movw r22, r28 rcall extract_block 4: movw r24, r28 movw r22, r12 rcall replace_block movw r24, r28 rcall pi movw r22, r28 movw r24, r8 rcall add_tag stack_free 32, reg1 = r30, reg2 = r31 pop r29 pop r28 pop_range 8, 17 ret */ /******************************************************************************/ /* void PI_ENCRYPT_LAST_BLOCK( PI_CTX *ctx, void *dest, const void *src, size_t length_B, unsigned long num ) { state_t a; while (length_B >= PI_PT_BLOCK_LENGTH_BYTES) { PI_ENCRYPT_BLOCK(ctx, dest, src, num); num++; length_B -= PI_PT_BLOCK_LENGTH_BYTES; src = &((uint8_t*)src)[PI_PT_BLOCK_LENGTH_BYTES]; if (dest) { dest = &((uint8_t*)dest)[PI_CT_BLOCK_LENGTH_BYTES]; } } ctr_trans(ctx, a, num); inject_last_block(a, src, length_B); if (dest) { uint8_t tmp[PI_PT_BLOCK_LENGTH_BYTES]; extract_block(tmp, a); memcpy(dest, tmp, length_B); } pi((word_t*)a); add_tag(ctx, a); } */ .global pi16_encrypt_last_block pi16_encrypt_last_block: clt pi16_process_last_block: push r28 push r29 push_range 4, 15 movw r4, r24 ; ctx movw r6, r22 ; dest movw r8, r20 ; src movw r10, r18 ; len movw r12, r14 ; lo16(num) movw r14, r16 ; hi16(num) movw r28, r18 1: sbiw r28, 16 brmi 4f movw r24, r4 movw r22, r6 movw r20, r8 movw r18, r14 movw r16, r12 brts 2f rcall pi16_encrypt_block rjmp 3f 2: rcall pi16_decrypt_block 3: sec adc r12, r1 adc r13, r1 adc r14, r1 adc r15, r1 ldi r24, 16 add r8, r24 adc r9, r1 cp r6, r1 cpc r7, r1 breq 1b add r6, r24 adc r7, r1 rjmp 1b 4: stack_alloc 32 + 16, reg1 = r30, reg2 = r31 adiw r28, 16 movw r10, r28 adiw r30, 1 movw r28, r30 movw r24, r4 movw r22, r28 movw r20, r14 movw r18, r12 rcall ctr_trans movw r24, r28 movw r22, r8 movw r20, r10 rcall inject_last_block cp r6, r1 cpc r7, r1 breq 6f tst r10 breq 6f movw r24, r28 adiw r24, 32 movw r22, r28 rcall extract_block movw r30, r28 adiw r30, 32 movw r26, r6 mov r24, r10 3: ld r0, Z+ st X+, r0 dec r24 brne 3b 6: brtc 7f movw r24, r28 movw r22, r8 movw r20, r10 rcall replace_last_block 7: movw r24, r28 rcall pi movw r24, r4 movw r22, r28 rcall add_tag stack_free 32 + 16 pop_range 4, 15 pop r29 pop r28 ret /******************************************************************************/ /* void PI_DECRYPT_LAST_BLOCK( PI_CTX *ctx, void *dest, const void *src, size_t length_B, unsigned long num ) { state_t a; ctr_trans(ctx, a, num); inject_last_block(a, src, length_B); if (dest) { uint8_t tmp[PI_PT_BLOCK_LENGTH_BYTES]; extract_block(tmp, a); memcpy(dest, tmp, length_B); } replace_last_block(a, src, length_B); pi((word_t*)a); add_tag(ctx, a); } */ .global pi16_decrypt_last_block pi16_decrypt_last_block: set rjmp pi16_process_last_block /* push r28 push r29 push_range 4, 15 movw r4, r24 ; ctx movw r6, r22 ; dest movw r8, r20 ; src movw r10, r18 ; len movw r12, r14 ; lo16(num) movw r14, r16 ; hi16(num) movw r28, r18 1: sbiw r28, 16 brmi 2f movw r24, r4 movw r22, r6 movw r20, r8 movw r18, r14 ; movw r16, r16 rcall pi16_encrypt_block sec adc r12, r1 adc r13, r1 adc r14, r1 adc r15, r1 ldi r24, 16 add r8, r24 adc r9, r1 cp r6, r1 cpc r7, r1 breq 1b add r6, r24 adc r7, r1 rjmp 1b 2: stack_alloc 32 + 16, reg1 = r30, reg2 = r31 adiw r28, 16 movw r10, r28 adiw r30, 1 movw r28, r30 movw r24, r4 movw r22, r28 movw r20, r14 movw r18, r12 rcall ctr_trans movw r24, r28 movw r22, r8 movw r20, r10 rcall inject_last_block cp r6, r1 cpc r7, r1 breq 6f tst r10 breq 6f movw r24, r28 adiw r24, 32 movw r22, r28 rcall extract_block movw r30, r28 adiw r30, 32 movw r26, r6 mov r24, r10 3: ld r0, Z+ st X+, r0 dec r24 brne 3b 6: movw r24, r28 movw r22, r8 movw r20, r10 rcall replace_last_block movw r24, r28 rcall pi movw r24, r4 movw r22, r28 rcall add_tag stack_free 32 + 16 pop_range 4, 15 pop r29 pop r28 ret */