diff --git a/aes_dec-asm_faster.S b/aes_dec-asm_faster.S index 08d3b97..b400623 100644 --- a/aes_dec-asm_faster.S +++ b/aes_dec-asm_faster.S @@ -10,7 +10,7 @@ This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License @@ -92,13 +92,11 @@ aes_decrypt_core: add r26, r20 adc r27, r1 clt -; ldi CTR, 2 .irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33 ld \param, Z+ .endr ldi xREDUCER, 0x1b /* load reducer */ - ldi r31, hi8(aes_invsbox) .irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00 @@ -110,6 +108,7 @@ aes_decrypt_core: brne 2f set 2: + ldi r31, hi8(aes_invsbox) /* substitute and invShift */ .irp param, ST00, ST10, ST20, ST30 mov r30, \param @@ -384,7 +383,6 @@ exit: 3: eor T1, T3 eor ST30, T1 - ldi r31, hi8(aes_invsbox) rjmp 1b .balign 256 diff --git a/avr-makefile.inc b/avr-makefile.inc index e65e585..77cae4d 100644 --- a/avr-makefile.inc +++ b/avr-makefile.inc @@ -1,7 +1,7 @@ MCU_TARGET = atmega644 OPTIMIZE = -Os -PROGRAMMER = stk500 # avr911 +PROGRAMMER = stk500v2 # avr911 DEFS = -D$(call uc, $(MCU_TARGET)) FLASHCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c $(PROGRAMMER) -U flash:w:# no space at the end #FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end diff --git a/mkfiles/serpent-bitslice.mk b/mkfiles/serpent-bitslice.mk index bde2ce4..efd8bf6 100644 --- a/mkfiles/serpent-bitslice.mk +++ b/mkfiles/serpent-bitslice.mk @@ -5,7 +5,7 @@ ALGO_NAME := SERPENT_BITSLICE BLOCK_CIPHERS += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-bitslice.o memxor.o +$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-bitslice-asm.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o debug.o uart.o serial-tools.o \ nessie_bc_test.o nessie_common.o cli.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/mkfiles/serpent_asm_bitslice.mk b/mkfiles/serpent_asm_bitslice.mk new file mode 100644 index 0000000..ddfc418 --- /dev/null +++ b/mkfiles/serpent_asm_bitslice.mk @@ -0,0 +1,13 @@ +# Makefile for serpent +ALGO_NAME := SERPENT_ASM_BITSLICE + +# comment out the following line for removement of serpent from the build process +BLOCK_CIPHERS += $(ALGO_NAME) + + +$(ALGO_NAME)_OBJ := serpent-sboxes-bitslice-asm.o serpent-asm.o memxor.o +$(ALGO_NAME)_TEST_BIN := main-serpent-test.o debug.o uart.o serial-tools.o \ + nessie_bc_test.o nessie_common.o cli.o performance_test.o +$(ALGO_NAME)_NESSIE_TEST := "nessie" +$(ALGO_NAME)_PERFORMANCE_TEST := "performance" + diff --git a/serpent-asm.S b/serpent-asm.S index e920a67..50e2082 100644 --- a/serpent-asm.S +++ b/serpent-asm.S @@ -706,7 +706,7 @@ serpent_dec: movw r24, r14 ldi r22, 31 - rcall inv_sbox128 + call inv_sbox128 movw r24, r14 ldi r20, 16 diff --git a/serpent-sboxes-bitslice-asm.S b/serpent-sboxes-bitslice-asm.S new file mode 100644 index 0000000..be66a11 --- /dev/null +++ b/serpent-sboxes-bitslice-asm.S @@ -0,0 +1,854 @@ +/* serpent-sboxes-bitslice.c */ +/* + This file is part of the Crypto-avr-lib/microcrypt-lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* serpent-sboxes.c + * a bitsliced implementation of the serpent sboxes + * author: Daniel Otte + * email: daniel.otte@rub.de + * license: GPLv3 + */ + +#include "avr-asm-macros.S" +IN0 = 22 +IN1 = 23 +IN2 = 24 +IN3 = 25 +OUT0 = 18 +OUT1 = 19 +OUT2 = 20 +OUT3 = 21 +T00 = 2 +T00 = 3 +T01 = 4 +T02 = 5 +T03 = 6 +T04 = 7 +T05 = 8 +T06 = 9 +T07 = 10 +T08 = 11 +T09 = 12 +T10 = 13 +T11 = 14 +T12 = 15 +T13 = 16 +T14 = 17 +T15 = 26 +T16 = 27 +T17 = 0 + +/* S0: 3 8 15 1 10 6 5 11 14 13 4 2 7 0 9 12 */ + +/* depth = 5,7,4,2, Total gates=18 */ +sb0: + mov T00, IN1 + eor T00, IN2 + mov T01, IN0 + or T01, IN3 + mov T02, IN0 + eor T02, IN1 + mov OUT3, T01 + eor OUT3, T00 + mov T04, IN2 + or T04, OUT3 + mov T05, IN0 + eor T05, IN3 + mov T06, IN1 + or T06, IN2 + mov T07, IN3 + and T07, T04 + mov T08, T02 + and T08, T06 + mov OUT2, T08 + eor OUT2, T07 + mov T10, T08 + and T10, OUT2 + mov T11, IN2 + eor T11, IN3 + mov T12, T06 + eor T12, T10 + mov T13, IN1 + and T13, T05 + mov T14, T05 + eor T14, T12 + mov OUT0, T14 + com OUT0 + mov T16, OUT0 + eor T16, T13 + mov OUT1, T11 + eor OUT1, T16 + ret + + +/* InvS0: 13 3 11 0 10 6 5 12 1 14 4 7 15 9 8 2 */ + +/* depth = 8,4,3,6, Total gates=19 */ +sb0_inv: + mov T00, IN2 + eor T00, IN3 + mov T01, IN0 + or T01, IN1 + mov T02, IN1 + or T02, IN2 + mov T03, IN2 + and T03, T00 + mov T04, T01 + eor T04, T00 + mov T05, IN0 + or T05, T03 + mov OUT2, T04 + com OUT2 + mov T07, IN1 + eor T07, IN3 + mov T08, T02 + and T08, T07 + mov T09, IN3 + or T09, OUT2 + mov OUT1, T08 + eor OUT1, T05 + mov T11, IN0 + or T11, T04 + mov T12, OUT1 + eor T12, T11 + mov T13, T02 + eor T13, T09 + mov T14, IN0 + eor T14, IN2 + mov OUT3, T13 + eor OUT3, T12 + mov T16, T04 + and T16, T12 + mov T17, T13 + or T17, T16 + mov OUT0, T14 + eor OUT0, T17 + ret + + +/* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */ + +/* depth = 10,7,3,5, Total gates=18 */ +sb1: + mov T00, IN0 + or T00, IN3 + mov T01, IN2 + eor T01, IN3 + mov T02, IN1 + com T02 + mov T03, IN0 + eor T03, IN2 + mov T04, IN0 + or T04, T02 + mov T05, IN3 + and T05, T03 + mov T06, T00 + and T06, T01 + mov T07, IN1 + or T07, T05 + mov OUT2, T01 + eor OUT2, T04 + mov T09, T06 + eor T09, T07 + mov T10, T00 + eor T10, T09 + mov T11, OUT2 + eor T11, T10 + mov T12, IN1 + and T12, IN3 + mov OUT3, T09 + com OUT3 + mov OUT1, T12 + eor OUT1, T11 + mov T15, T09 + or T15, OUT1 + mov T16, T04 + and T16, T15 + mov OUT0, IN2 + eor OUT0, T16 + ret + + +/* InvS1: 5 8 2 14 15 6 12 3 11 4 7 9 1 13 10 0 */ + +/* depth = 7,4,5,3, Total gates=18 */ +sb1_inv: + mov T00, IN0 + eor T00, IN1 + mov T01, IN1 + or T01, IN3 + mov T02, IN0 + and T02, IN2 + mov T03, IN2 + eor T03, T01 + mov T04, IN0 + or T04, T03 + mov T05, T00 + and T05, T04 + mov T06, IN3 + or T06, T02 + mov T07, IN1 + eor T07, T05 + mov T08, T06 + eor T08, T05 + mov T09, T03 + or T09, T02 + mov T10, IN3 + and T10, T07 + mov OUT2, T08 + com OUT2 + mov OUT1, T09 + eor OUT1, T10 + mov T13, IN0 + or T13, OUT2 + mov T14, T05 + eor T14, OUT1 + mov OUT3, T00 + eor OUT3, T03 + mov T16, IN2 + eor T16, T14 + mov OUT0, T13 + eor OUT0, T16 + ret + +/* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */ + +/* depth = 3,8,11,7, Total gates=16 */ +sb2: + mov T00, IN0 + or T00, IN2 + mov T01, IN0 + eor T01, IN1 + mov T02, IN3 + eor T02, T00 + mov OUT0, T01 + eor OUT0, T02 + mov T04, IN2 + eor T04, OUT0 + mov T05, IN1 + eor T05, T04 + mov T06, IN1 + or T06, T04 + mov T07, T00 + and T07, T05 + mov T08, T02 + eor T08, T06 + mov T09, T01 + or T09, T08 + mov OUT1, T09 + eor OUT1, T07 + mov T11, IN0 + or T11, IN3 + mov T12, T08 + eor T12, OUT1 + mov T13, IN1 + eor T13, T12 + mov OUT3, T08 + com OUT3 + mov OUT2, T11 + eor OUT2, T13 + ret + +/* InvS2: 12 9 15 4 11 14 1 2 0 3 6 13 5 8 10 7 */ + +/* depth = 3,6,8,3, Total gates=18 */ +sb2_inv: + mov T00, IN0 + eor T00, IN3 + mov T01, IN2 + eor T01, IN3 + mov T02, IN0 + and T02, IN2 + mov T03, IN1 + or T03, T01 + mov OUT0, T00 + eor OUT0, T03 + mov T05, IN0 + or T05, IN2 + mov T06, IN3 + or T06, OUT0 + mov T07, IN3 + com T07 + mov T08, IN1 + and T08, T05 + mov T09, T07 + or T09, T02 + mov T10, IN1 + and T10, T06 + mov T11, T05 + and T11, T01 + mov OUT3, T08 + eor OUT3, T09 + mov OUT1, T11 + eor OUT1, T10 + mov T14, IN2 + and T14, OUT3 + mov T15, OUT0 + eor T15, OUT1 + mov T16, T09 + eor T16, T14 + mov OUT2, T15 + eor OUT2, T16 + ret + +/* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */ + +/* depth = 8,3,5,5, Total gates=18 */ +sb3: + mov T00, IN0 + eor T00, IN2 + mov T01, IN0 + or T01, IN3 + mov T02, IN0 + and T02, IN3 + mov T03, T00 + and T03, T01 + mov T04, IN1 + or T04, T02 + mov T05, IN0 + and T05, IN1 + mov T06, IN3 + eor T06, T03 + mov T07, IN2 + or T07, T05 + mov T08, IN1 + eor T08, T06 + mov T09, IN3 + and T09, T04 + mov T10, T01 + eor T10, T09 + mov OUT3, T07 + eor OUT3, T08 + mov T12, IN3 + or T12, OUT3 + mov T13, IN0 + or T13, T06 + mov T14, IN1 + and T14, T12 + mov OUT2, T07 + eor OUT2, T10 + mov OUT0, T13 + eor OUT0, T14 + mov OUT1, T04 + eor OUT1, T03 + ret + +/* InvS3: 0 9 10 7 11 14 6 13 3 5 12 2 4 8 15 1 */ + +/* depth = 3,6,4,4, Total gates=17 */ +sb3_inv: + mov T00, IN2 + or T00, IN3 + mov T01, IN0 + or T01, IN3 + mov T02, IN2 + eor T02, T01 + mov T03, IN1 + eor T03, T01 + mov T04, IN0 + eor T04, IN3 + mov T05, T03 + and T05, T02 + mov T06, IN1 + and T06, T00 + mov OUT2, T04 + eor OUT2, T05 + mov T08, IN0 + eor T08, T02 + mov OUT0, T06 + eor OUT0, T02 + mov T10, OUT0 + or T10, T04 + mov T11, T08 + and T11, T10 + mov T12, IN0 + and T12, OUT2 + mov T13, T00 + eor T13, T04 + mov OUT1, IN1 + eor OUT1, T11 + mov T15, IN1 + or T15, T12 + mov OUT3, T13 + eor OUT3, T15 + ret + +/* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */ + +/* depth = 6,7,5,3, Total gates=19 */ +sb4: + mov T00, IN0 + or T00, IN1 + mov T01, IN1 + or T01, IN2 + mov T02, IN0 + eor T02, T01 + mov T03, IN1 + eor T03, IN3 + mov T04, IN3 + or T04, T02 + mov T05, IN3 + and T05, T00 + mov OUT3, T02 + eor OUT3, T05 + mov T07, OUT3 + and T07, T03 + mov T08, T03 + and T08, T04 + mov T09, IN2 + eor T09, T05 + mov T10, IN1 + and T10, IN2 + mov T11, T03 + eor T11, T07 + mov T12, T10 + or T12, T02 + mov T13, T09 + eor T13, T08 + mov T14, IN0 + and T14, T04 + mov T15, T10 + or T15, T11 + mov OUT2, T12 + eor OUT2, T07 + mov OUT1, T14 + eor OUT1, T15 + mov OUT0, T13 + com OUT0 + ret + +/* InvS4: 5 0 8 3 10 9 7 14 2 12 11 6 4 15 13 1 */ + +/* depth = 6,4,7,3, Total gates=17 */ +sb4_inv: + mov T00, IN1 + or T00, IN3 + mov T01, IN2 + or T01, IN3 + mov T02, IN0 + and T02, T00 + mov T03, IN1 + eor T03, T01 + mov T04, IN2 + eor T04, IN3 + mov T05, T02 + com T05 + mov T06, IN0 + and T06, T03 + mov OUT1, T04 + eor OUT1, T06 + mov T08, OUT1 + or T08, T05 + mov T09, IN0 + eor T09, T06 + mov T10, T00 + eor T10, T08 + mov T11, IN3 + eor T11, T03 + mov T12, IN2 + or T12, T09 + mov OUT3, T02 + eor OUT3, T11 + mov T14, IN0 + eor T14, T03 + mov OUT2, T10 + eor OUT2, T12 + mov OUT0, T14 + eor OUT0, T08 + ret + +/* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */ + +/* depth = 4,6,8,6, Total gates=17 */ +sb5: + mov T00, IN1 + eor T00, IN3 + mov T01, IN1 + or T01, IN3 + mov T02, IN0 + and T02, T00 + mov T03, IN2 + eor T03, T01 + mov T04, T02 + eor T04, T03 + mov OUT0, T04 + com OUT0 + mov T06, IN0 + eor T06, T00 + mov T07, IN3 + or T07, OUT0 + mov T08, IN1 + or T08, T04 + mov T09, IN3 + eor T09, T07 + mov T10, IN1 + or T10, T06 + mov T11, T02 + or T11, OUT0 + mov T12, T06 + or T12, T09 + mov T13, T00 + eor T13, T10 + mov OUT2, T08 + eor OUT2, T12 + mov OUT1, T06 + eor OUT1, T07 + mov OUT3, T11 + eor OUT3, T13 + ret + +/* InvS5: 8 15 2 9 4 1 13 14 11 6 5 3 7 12 10 0 */ + +/* depth = 4,6,9,7, Total gates=17 */ +sb5_inv: + mov T00, IN0 + and T00, IN3 + mov T01, IN2 + eor T01, T00 + mov T02, IN0 + eor T02, IN3 + mov T03, IN1 + and T03, T01 + mov T04, IN0 + and T04, IN2 + mov OUT0, T02 + eor OUT0, T03 + mov T06, IN0 + and T06, OUT0 + mov T07, T00 + eor T07, OUT0 + mov T08, IN1 + or T08, T04 + mov T09, IN1 + com T09 + mov OUT1, T07 + eor OUT1, T08 + mov T11, T09 + or T11, T06 + mov T12, OUT0 + or T12, OUT1 + mov OUT3, T01 + eor OUT3, T11 + mov T14, T01 + eor T14, T12 + mov T15, IN1 + eor T15, IN3 + mov OUT2, T15 + eor OUT2, T14 + ret + +/* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */ + +/* depth = 8,3,6,3, Total gates=19 */ +sb6: + mov T00, IN0 + and T00, IN3 + mov T01, IN1 + eor T01, IN2 + mov T02, IN0 + eor T02, IN3 + mov T03, T00 + eor T03, T01 + mov T04, IN1 + or T04, IN2 + mov OUT1, T03 + com OUT1 + mov T06, T02 + and T06, T04 + mov T07, IN1 + and T07, OUT1 + mov T08, IN0 + or T08, IN2 + mov T09, T06 + eor T09, T07 + mov T10, IN1 + or T10, IN3 + mov T11, IN2 + eor T11, T10 + mov T12, T08 + eor T12, T09 + mov OUT2, T12 + com OUT2 + mov T14, OUT1 + and T14, T02 + mov OUT3, T11 + eor OUT3, T06 + mov T16, IN0 + eor T16, IN1 + mov T17, OUT2 + eor T17, T14 + mov OUT0, T16 + eor OUT0, T17 + ret + +/* InvS6: 15 10 1 13 5 3 6 0 4 9 14 7 2 12 8 11 */ + +/* depth = 5,3,8,6, Total gates=19 */ +sb6_inv: + mov T00, IN0 + eor T00, IN2 + mov T01, IN2 + com T01 + mov T02, IN1 + and T02, T00 + mov T03, IN1 + or T03, T01 + mov T04, IN3 + or T04, T02 + mov T05, IN1 + eor T05, IN3 + mov T06, IN0 + and T06, T03 + mov T07, IN0 + or T07, T01 + mov T08, T06 + eor T08, T04 + mov OUT1, T05 + eor OUT1, T07 + mov OUT0, T08 + com OUT0 + mov T11, IN1 + and T11, OUT0 + mov T12, T00 + and T12, T04 + mov T13, T00 + eor T13, T11 + mov T14, T06 + eor T14, T12 + mov T15, IN3 + or T15, T01 + mov T16, IN0 + eor T16, OUT1 + mov OUT3, T16 + eor OUT3, T14 + mov OUT2, T15 + eor OUT2, T13 + ret + +/* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */ + +/* depth = 10,7,10,4, Total gates=19 */ +sb7: + mov T00, IN0 + and T00, IN2 + mov T01, IN3 + com T01 + mov T02, IN0 + and T02, T01 + mov T03, IN1 + or T03, T00 + mov T04, IN0 + and T04, IN1 + mov T05, IN2 + eor T05, T03 + mov OUT3, T02 + eor OUT3, T05 + mov T07, IN2 + or T07, OUT3 + mov T08, IN3 + or T08, T04 + mov T09, IN0 + eor T09, T07 + mov T10, T03 + and T10, OUT3 + mov OUT1, T08 + eor OUT1, T09 + mov T12, IN1 + eor T12, OUT1 + mov T13, T00 + eor T13, OUT1 + mov T14, IN2 + eor T14, T04 + mov T15, T10 + or T15, T12 + mov T16, T01 + or T16, T13 + mov OUT0, T14 + eor OUT0, T16 + mov OUT2, IN0 + eor OUT2, T15 + ret + +/* InvS7: 3 0 6 13 9 14 15 8 5 12 11 7 10 1 4 2 */ + +/* depth = 9,7,3,3, Total gates=18 */ +sb7_inv: + mov T00, IN0 + and T00, IN1 + mov T01, IN0 + or T01, IN1 + mov T02, IN2 + or T02, T00 + mov T03, IN3 + and T03, T01 + mov OUT3, T02 + eor OUT3, T03 + mov T05, IN1 + eor T05, T03 + mov T06, IN3 + eor T06, OUT3 + mov T07, T06 + com T07 + mov T08, T05 + or T08, T07 + mov T09, IN1 + eor T09, IN3 + mov T10, IN0 + or T10, IN3 + mov OUT1, IN0 + eor OUT1, T08 + mov T12, IN2 + eor T12, T05 + mov T13, IN2 + and T13, T10 + mov T14, IN3 + or T14, OUT1 + mov T15, T00 + or T15, T09 + mov OUT0, T12 + eor OUT0, T14 + mov OUT2, T13 + eor OUT2, T15 + ret + +sf_tab: +.word sb0, sb1, sb2, sb3 +.word sb4, sb5, sb6, sb7 + +sinvf_tab: +.word sb0_inv, sb1_inv, sb2_inv, sb3_inv +.word sb4_inv, sb5_inv, sb6_inv, sb7_inv + +/* +.byte pm_lo8(sb0), pm_hi8(sb0) +.byte pm_lo8(sb1), pm_hi8(sb1) +.byte pm_lo8(sb2), pm_hi8(sb2) +.byte pm_lo8(sb3), pm_hi8(sb3) +.byte pm_lo8(sb4), pm_hi8(sb4) +.byte pm_lo8(sb5), pm_hi8(sb5) +.byte pm_lo8(sb6), pm_hi8(sb6) +.byte pm_lo8(sb7), pm_hi8(sb7) + + +sinvf_tab: +.byte pm_lo8(sb0_inv), pm_hi8(sb0_inv) +.byte pm_lo8(sb1_inv), pm_hi8(sb1_inv) +.byte pm_lo8(sb2_inv), pm_hi8(sb2_inv) +.byte pm_lo8(sb3_inv), pm_hi8(sb3_inv) +.byte pm_lo8(sb4_inv), pm_hi8(sb4_inv) +.byte pm_lo8(sb5_inv), pm_hi8(sb5_inv) +.byte pm_lo8(sb6_inv), pm_hi8(sb6_inv) +.byte pm_lo8(sb7_inv), pm_hi8(sb7_inv) +*/ +/* +void sbox128(void * w, uint8_t box){ + uint8_t i, buffer[16]; + box &= 0x7; + + sb_fpt fp; + fp = (sb_fpt)pgm_read_word(&(sf_tab[box])); + for(i=0; i<4; ++i){ + fp(buffer+i, (uint8_t*)w+i); + } + memcpy(w, buffer, 16); +} +*/ +.global sbox128 +sbox128: + ldi r30, lo8(sf_tab) + ldi r31, hi8(sf_tab) +1: +; clr r1 + andi r22, 0x07 + lsl r22 + add r30, r22 + adc r31, r1 + lpm r26, Z+ + lpm r27, Z + lsr r27 + ror r26 + push r28 + push r29 + movw r30, r26 + movw r28, r24 + push_range 2, 17 + ldd IN0, Y+0 + ldd IN1, Y+4 + ldd IN2, Y+8 + ldd IN3, Y+12 + icall + std Y+0, OUT0 + std Y+4, OUT1 + std Y+8, OUT2 + std Y+12, OUT3 + ldd IN0, Y+0+1 + ldd IN1, Y+4+1 + ldd IN2, Y+8+1 + ldd IN3, Y+12+1 + icall + std Y+0+1, OUT0 + std Y+4+1, OUT1 + std Y+8+1, OUT2 + std Y+12+1, OUT3 + ldd IN0, Y+0+2 + ldd IN1, Y+4+2 + ldd IN2, Y+8+2 + ldd IN3, Y+12+2 + icall + std Y+0+2, OUT0 + std Y+4+2, OUT1 + std Y+8+2, OUT2 + std Y+12+2, OUT3 + ldd IN0, Y+0+3 + ldd IN1, Y+4+3 + ldd IN2, Y+8+3 + ldd IN3, Y+12+3 + icall + std Y+0+3, OUT0 + std Y+4+3, OUT1 + std Y+8+3, OUT2 + std Y+12+3, OUT3 + pop_range 2, 17 + pop r29 + pop r28 + ret + +.global inv_sbox128 +inv_sbox128: + ldi r30, lo8(sinvf_tab) + ldi r31, hi8(sinvf_tab) + rjmp 1b +/* +void inv_sbox128(void * w, uint8_t box){ + uint8_t i, buffer[16]; + box &= 0x7; + + sb_fpt fp; + fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box])); + for(i=0; i<4; ++i){ + fp(buffer+i, (uint8_t*)w+i); + } + memcpy(w, buffer, 16); +} +*/ + + + + + + + diff --git a/serpent-sboxes-bitslice.c b/serpent-sboxes-bitslice.c index fe4d668..8d4e80d 100644 --- a/serpent-sboxes-bitslice.c +++ b/serpent-sboxes-bitslice.c @@ -35,406 +35,406 @@ /* depth = 5,7,4,2, Total gates=18 */ static -void sb0(uint8_t* out, const uint8_t* in, uint8_t i){ +void sb0(uint8_t* out, const uint8_t* in){ // (a,b,c,d,w,x,y,z) uint8_t t01, t02, t03, t05, t06, t07, t08, t09, t11, t12, t13, t14, t15, t17; - t01 = in[4*1+i] ^ in[4*2+i]; - t02 = in[4*0+i] | in[4*3+i]; - t03 = in[4*0+i] ^ in[4*1+i]; - out[4*3+i] = t02 ^ t01; - t05 = in[4*2+i] | out[4*3+i]; - t06 = in[4*0+i] ^ in[4*3+i]; - t07 = in[4*1+i] | in[4*2+i]; - t08 = in[4*3+i] & t05; + t01 = in[4*1] ^ in[4*2]; + t02 = in[4*0] | in[4*3]; + t03 = in[4*0] ^ in[4*1]; + out[4*3] = t02 ^ t01; + t05 = in[4*2] | out[4*3]; + t06 = in[4*0] ^ in[4*3]; + t07 = in[4*1] | in[4*2]; + t08 = in[4*3] & t05; t09 = t03 & t07; - out[4*2+i] = t09 ^ t08; - t11 = t09 & out[4*2+i]; - t12 = in[4*2+i] ^ in[4*3+i]; + out[4*2] = t09 ^ t08; + t11 = t09 & out[4*2]; + t12 = in[4*2] ^ in[4*3]; t13 = t07 ^ t11; - t14 = in[4*1+i] & t06; + t14 = in[4*1] & t06; t15 = t06 ^ t13; - out[4*0+i] = ~ t15; - t17 = out[4*0+i] ^ t14; - out[4*1+i] = t12 ^ t17; + out[4*0] = ~ t15; + t17 = out[4*0] ^ t14; + out[4*1] = t12 ^ t17; } /* InvS0: 13 3 11 0 10 6 5 12 1 14 4 7 15 9 8 2 */ /* depth = 8,4,3,6, Total gates=19 */ static -void sb0_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +void sb0_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t12, t13, t14, t15, t17, t18, t01; - t01 = in[4*2+i] ^ in[4*3+i]; - t02 = in[4*0+i] | in[4*1+i]; - t03 = in[4*1+i] | in[4*2+i]; - t04 = in[4*2+i] & t01; + t01 = in[4*2] ^ in[4*3]; + t02 = in[4*0] | in[4*1]; + t03 = in[4*1] | in[4*2]; + t04 = in[4*2] & t01; t05 = t02 ^ t01; - t06 = in[4*0+i] | t04; - out[4*2+i] = ~ t05; - t08 = in[4*1+i] ^ in[4*3+i]; + t06 = in[4*0] | t04; + out[4*2] = ~ t05; + t08 = in[4*1] ^ in[4*3]; t09 = t03 & t08; - t10 = in[4*3+i] | out[4*2+i]; - out[4*1+i] = t09 ^ t06; - t12 = in[4*0+i] | t05; - t13 = out[4*1+i] ^ t12; + t10 = in[4*3] | out[4*2]; + out[4*1] = t09 ^ t06; + t12 = in[4*0] | t05; + t13 = out[4*1] ^ t12; t14 = t03 ^ t10; - t15 = in[4*0+i] ^ in[4*2+i]; - out[4*3+i] = t14 ^ t13; + t15 = in[4*0] ^ in[4*2]; + out[4*3] = t14 ^ t13; t17 = t05 & t13; t18 = t14 | t17; - out[4*0+i] = t15 ^ t18; + out[4*0] = t15 ^ t18; } /* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */ /* depth = 10,7,3,5, Total gates=18 */ static -void sb1(uint8_t* out, const uint8_t* in, uint8_t i){ +void sb1(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t07, t08, t10, t11, t12, t13, t16, t17, t01; - t01 = in[4*0+i] | in[4*3+i]; - t02 = in[4*2+i] ^ in[4*3+i]; - t03 = ~ in[4*1+i]; - t04 = in[4*0+i] ^ in[4*2+i]; - t05 = in[4*0+i] | t03; - t06 = in[4*3+i] & t04; + t01 = in[4*0] | in[4*3]; + t02 = in[4*2] ^ in[4*3]; + t03 = ~ in[4*1]; + t04 = in[4*0] ^ in[4*2]; + t05 = in[4*0] | t03; + t06 = in[4*3] & t04; t07 = t01 & t02; - t08 = in[4*1+i] | t06; - out[4*2+i] = t02 ^ t05; + t08 = in[4*1] | t06; + out[4*2] = t02 ^ t05; t10 = t07 ^ t08; t11 = t01 ^ t10; - t12 = out[4*2+i] ^ t11; - t13 = in[4*1+i] & in[4*3+i]; - out[4*3+i] = ~ t10; - out[4*1+i] = t13 ^ t12; - t16 = t10 | out[4*1+i]; + t12 = out[4*2] ^ t11; + t13 = in[4*1] & in[4*3]; + out[4*3] = ~ t10; + out[4*1] = t13 ^ t12; + t16 = t10 | out[4*1]; t17 = t05 & t16; - out[4*0+i] = in[4*2+i] ^ t17; + out[4*0] = in[4*2] ^ t17; } /* InvS1: 5 8 2 14 15 6 12 3 11 4 7 9 1 13 10 0 */ /* depth = 7,4,5,3, Total gates=18 */ -static void sb1_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb1_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t14, t15, t17, t01; - t01 = in[4*0+i] ^ in[4*1+i]; - t02 = in[4*1+i] | in[4*3+i]; - t03 = in[4*0+i] & in[4*2+i]; - t04 = in[4*2+i] ^ t02; - t05 = in[4*0+i] | t04; + t01 = in[4*0] ^ in[4*1]; + t02 = in[4*1] | in[4*3]; + t03 = in[4*0] & in[4*2]; + t04 = in[4*2] ^ t02; + t05 = in[4*0] | t04; t06 = t01 & t05; - t07 = in[4*3+i] | t03; - t08 = in[4*1+i] ^ t06; + t07 = in[4*3] | t03; + t08 = in[4*1] ^ t06; t09 = t07 ^ t06; t10 = t04 | t03; - t11 = in[4*3+i] & t08; - out[4*2+i] = ~ t09; - out[4*1+i] = t10 ^ t11; - t14 = in[4*0+i] | out[4*2+i]; - t15 = t06 ^ out[4*1+i]; - out[4*3+i] = t01 ^ t04; - t17 = in[4*2+i] ^ t15; - out[4*0+i] = t14 ^ t17; + t11 = in[4*3] & t08; + out[4*2] = ~ t09; + out[4*1] = t10 ^ t11; + t14 = in[4*0] | out[4*2]; + t15 = t06 ^ out[4*1]; + out[4*3] = t01 ^ t04; + t17 = in[4*2] ^ t15; + out[4*0] = t14 ^ t17; } /* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */ /* depth = 3,8,11,7, Total gates=16 */ -static void sb2(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb2(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t05, t06, t07, t08, t09, t10, t12, t13, t14, t01; - t01 = in[4*0+i] | in[4*2+i]; - t02 = in[4*0+i] ^ in[4*1+i]; - t03 = in[4*3+i] ^ t01; - out[4*0+i] = t02 ^ t03; - t05 = in[4*2+i] ^ out[4*0+i]; - t06 = in[4*1+i] ^ t05; - t07 = in[4*1+i] | t05; + t01 = in[4*0] | in[4*2]; + t02 = in[4*0] ^ in[4*1]; + t03 = in[4*3] ^ t01; + out[4*0] = t02 ^ t03; + t05 = in[4*2] ^ out[4*0]; + t06 = in[4*1] ^ t05; + t07 = in[4*1] | t05; t08 = t01 & t06; t09 = t03 ^ t07; t10 = t02 | t09; - out[4*1+i] = t10 ^ t08; - t12 = in[4*0+i] | in[4*3+i]; - t13 = t09 ^ out[4*1+i]; - t14 = in[4*1+i] ^ t13; - out[4*3+i] = ~ t09; - out[4*2+i] = t12 ^ t14; + out[4*1] = t10 ^ t08; + t12 = in[4*0] | in[4*3]; + t13 = t09 ^ out[4*1]; + t14 = in[4*1] ^ t13; + out[4*3] = ~ t09; + out[4*2] = t12 ^ t14; } /* InvS2: 12 9 15 4 11 14 1 2 0 3 6 13 5 8 10 7 */ /* depth = 3,6,8,3, Total gates=18 */ -static void sb2_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb2_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t12, t15, t16, t17, t01; - t01 = in[4*0+i] ^ in[4*3+i]; - t02 = in[4*2+i] ^ in[4*3+i]; - t03 = in[4*0+i] & in[4*2+i]; - t04 = in[4*1+i] | t02; - out[4*0+i] = t01 ^ t04; - t06 = in[4*0+i] | in[4*2+i]; - t07 = in[4*3+i] | out[4*0+i]; - t08 = ~ in[4*3+i]; - t09 = in[4*1+i] & t06; + t01 = in[4*0] ^ in[4*3]; + t02 = in[4*2] ^ in[4*3]; + t03 = in[4*0] & in[4*2]; + t04 = in[4*1] | t02; + out[4*0] = t01 ^ t04; + t06 = in[4*0] | in[4*2]; + t07 = in[4*3] | out[4*0]; + t08 = ~ in[4*3]; + t09 = in[4*1] & t06; t10 = t08 | t03; - t11 = in[4*1+i] & t07; + t11 = in[4*1] & t07; t12 = t06 & t02; - out[4*3+i] = t09 ^ t10; - out[4*1+i] = t12 ^ t11; - t15 = in[4*2+i] & out[4*3+i]; - t16 = out[4*0+i] ^ out[4*1+i]; + out[4*3] = t09 ^ t10; + out[4*1] = t12 ^ t11; + t15 = in[4*2] & out[4*3]; + t16 = out[4*0] ^ out[4*1]; t17 = t10 ^ t15; - out[4*2+i] = t16 ^ t17; + out[4*2] = t16 ^ t17; } /* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */ /* depth = 8,3,5,5, Total gates=18 */ -static void sb3(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb3(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t13, t14, t15, t01; - t01 = in[4*0+i] ^ in[4*2+i]; - t02 = in[4*0+i] | in[4*3+i]; - t03 = in[4*0+i] & in[4*3+i]; + t01 = in[4*0] ^ in[4*2]; + t02 = in[4*0] | in[4*3]; + t03 = in[4*0] & in[4*3]; t04 = t01 & t02; - t05 = in[4*1+i] | t03; - t06 = in[4*0+i] & in[4*1+i]; - t07 = in[4*3+i] ^ t04; - t08 = in[4*2+i] | t06; - t09 = in[4*1+i] ^ t07; - t10 = in[4*3+i] & t05; + t05 = in[4*1] | t03; + t06 = in[4*0] & in[4*1]; + t07 = in[4*3] ^ t04; + t08 = in[4*2] | t06; + t09 = in[4*1] ^ t07; + t10 = in[4*3] & t05; t11 = t02 ^ t10; - out[4*3+i] = t08 ^ t09; - t13 = in[4*3+i] | out[4*3+i]; - t14 = in[4*0+i] | t07; - t15 = in[4*1+i] & t13; - out[4*2+i] = t08 ^ t11; - out[4*0+i] = t14 ^ t15; - out[4*1+i] = t05 ^ t04; + out[4*3] = t08 ^ t09; + t13 = in[4*3] | out[4*3]; + t14 = in[4*0] | t07; + t15 = in[4*1] & t13; + out[4*2] = t08 ^ t11; + out[4*0] = t14 ^ t15; + out[4*1] = t05 ^ t04; } /* InvS3: 0 9 10 7 11 14 6 13 3 5 12 2 4 8 15 1 */ /* depth = 3,6,4,4, Total gates=17 */ -static void sb3_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb3_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t07, t09, t11, t12, t13, t14, t16, t01; - t01 = in[4*2+i] | in[4*3+i]; - t02 = in[4*0+i] | in[4*3+i]; - t03 = in[4*2+i] ^ t02; - t04 = in[4*1+i] ^ t02; - t05 = in[4*0+i] ^ in[4*3+i]; + t01 = in[4*2] | in[4*3]; + t02 = in[4*0] | in[4*3]; + t03 = in[4*2] ^ t02; + t04 = in[4*1] ^ t02; + t05 = in[4*0] ^ in[4*3]; t06 = t04 & t03; - t07 = in[4*1+i] & t01; - out[4*2+i] = t05 ^ t06; - t09 = in[4*0+i] ^ t03; - out[4*0+i] = t07 ^ t03; - t11 = out[4*0+i] | t05; + t07 = in[4*1] & t01; + out[4*2] = t05 ^ t06; + t09 = in[4*0] ^ t03; + out[4*0] = t07 ^ t03; + t11 = out[4*0] | t05; t12 = t09 & t11; - t13 = in[4*0+i] & out[4*2+i]; + t13 = in[4*0] & out[4*2]; t14 = t01 ^ t05; - out[4*1+i] = in[4*1+i] ^ t12; - t16 = in[4*1+i] | t13; - out[4*3+i] = t14 ^ t16; + out[4*1] = in[4*1] ^ t12; + t16 = in[4*1] | t13; + out[4*3] = t14 ^ t16; } /* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */ /* depth = 6,7,5,3, Total gates=19 */ -static void sb4(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb4(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t12, t13, t14, t15, t16, t01; - t01 = in[4*0+i] | in[4*1+i]; - t02 = in[4*1+i] | in[4*2+i]; - t03 = in[4*0+i] ^ t02; - t04 = in[4*1+i] ^ in[4*3+i]; - t05 = in[4*3+i] | t03; - t06 = in[4*3+i] & t01; - out[4*3+i] = t03 ^ t06; - t08 = out[4*3+i] & t04; + t01 = in[4*0] | in[4*1]; + t02 = in[4*1] | in[4*2]; + t03 = in[4*0] ^ t02; + t04 = in[4*1] ^ in[4*3]; + t05 = in[4*3] | t03; + t06 = in[4*3] & t01; + out[4*3] = t03 ^ t06; + t08 = out[4*3] & t04; t09 = t04 & t05; - t10 = in[4*2+i] ^ t06; - t11 = in[4*1+i] & in[4*2+i]; + t10 = in[4*2] ^ t06; + t11 = in[4*1] & in[4*2]; t12 = t04 ^ t08; t13 = t11 | t03; t14 = t10 ^ t09; - t15 = in[4*0+i] & t05; + t15 = in[4*0] & t05; t16 = t11 | t12; - out[4*2+i] = t13 ^ t08; - out[4*1+i] = t15 ^ t16; - out[4*0+i] = ~ t14; + out[4*2] = t13 ^ t08; + out[4*1] = t15 ^ t16; + out[4*0] = ~ t14; } /* InvS4: 5 0 8 3 10 9 7 14 2 12 11 6 4 15 13 1 */ /* depth = 6,4,7,3, Total gates=17 */ -static void sb4_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb4_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t07, t09, t10, t11, t12, t13, t15, t01; - t01 = in[4*1+i] | in[4*3+i]; - t02 = in[4*2+i] | in[4*3+i]; - t03 = in[4*0+i] & t01; - t04 = in[4*1+i] ^ t02; - t05 = in[4*2+i] ^ in[4*3+i]; + t01 = in[4*1] | in[4*3]; + t02 = in[4*2] | in[4*3]; + t03 = in[4*0] & t01; + t04 = in[4*1] ^ t02; + t05 = in[4*2] ^ in[4*3]; t06 = ~ t03; - t07 = in[4*0+i] & t04; - out[4*1+i] = t05 ^ t07; - t09 = out[4*1+i] | t06; - t10 = in[4*0+i] ^ t07; + t07 = in[4*0] & t04; + out[4*1] = t05 ^ t07; + t09 = out[4*1] | t06; + t10 = in[4*0] ^ t07; t11 = t01 ^ t09; - t12 = in[4*3+i] ^ t04; - t13 = in[4*2+i] | t10; - out[4*3+i] = t03 ^ t12; - t15 = in[4*0+i] ^ t04; - out[4*2+i] = t11 ^ t13; - out[4*0+i] = t15 ^ t09; + t12 = in[4*3] ^ t04; + t13 = in[4*2] | t10; + out[4*3] = t03 ^ t12; + t15 = in[4*0] ^ t04; + out[4*2] = t11 ^ t13; + out[4*0] = t15 ^ t09; } /* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */ /* depth = 4,6,8,6, Total gates=17 */ -static void sb5(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb5(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t14, t01; - t01 = in[4*1+i] ^ in[4*3+i]; - t02 = in[4*1+i] | in[4*3+i]; - t03 = in[4*0+i] & t01; - t04 = in[4*2+i] ^ t02; + t01 = in[4*1] ^ in[4*3]; + t02 = in[4*1] | in[4*3]; + t03 = in[4*0] & t01; + t04 = in[4*2] ^ t02; t05 = t03 ^ t04; - out[4*0+i] = ~ t05; - t07 = in[4*0+i] ^ t01; - t08 = in[4*3+i] | out[4*0+i]; - t09 = in[4*1+i] | t05; - t10 = in[4*3+i] ^ t08; - t11 = in[4*1+i] | t07; - t12 = t03 | out[4*0+i]; + out[4*0] = ~ t05; + t07 = in[4*0] ^ t01; + t08 = in[4*3] | out[4*0]; + t09 = in[4*1] | t05; + t10 = in[4*3] ^ t08; + t11 = in[4*1] | t07; + t12 = t03 | out[4*0]; t13 = t07 | t10; t14 = t01 ^ t11; - out[4*2+i] = t09 ^ t13; - out[4*1+i] = t07 ^ t08; - out[4*3+i] = t12 ^ t14; + out[4*2] = t09 ^ t13; + out[4*1] = t07 ^ t08; + out[4*3] = t12 ^ t14; } /* InvS5: 8 15 2 9 4 1 13 14 11 6 5 3 7 12 10 0 */ /* depth = 4,6,9,7, Total gates=17 */ -static void sb5_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb5_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t12, t13, t15, t16, t01; - t01 = in[4*0+i] & in[4*3+i]; - t02 = in[4*2+i] ^ t01; - t03 = in[4*0+i] ^ in[4*3+i]; - t04 = in[4*1+i] & t02; - t05 = in[4*0+i] & in[4*2+i]; - out[4*0+i] = t03 ^ t04; - t07 = in[4*0+i] & out[4*0+i]; - t08 = t01 ^ out[4*0+i]; - t09 = in[4*1+i] | t05; - t10 = ~ in[4*1+i]; - out[4*1+i] = t08 ^ t09; + t01 = in[4*0] & in[4*3]; + t02 = in[4*2] ^ t01; + t03 = in[4*0] ^ in[4*3]; + t04 = in[4*1] & t02; + t05 = in[4*0] & in[4*2]; + out[4*0] = t03 ^ t04; + t07 = in[4*0] & out[4*0]; + t08 = t01 ^ out[4*0]; + t09 = in[4*1] | t05; + t10 = ~ in[4*1]; + out[4*1] = t08 ^ t09; t12 = t10 | t07; - t13 = out[4*0+i] | out[4*1+i]; - out[4*3+i] = t02 ^ t12; + t13 = out[4*0] | out[4*1]; + out[4*3] = t02 ^ t12; t15 = t02 ^ t13; - t16 = in[4*1+i] ^ in[4*3+i]; - out[4*2+i] = t16 ^ t15; + t16 = in[4*1] ^ in[4*3]; + out[4*2] = t16 ^ t15; } /* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */ /* depth = 8,3,6,3, Total gates=19 */ -static void sb6(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb6(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t15, t17, t18, t01; - t01 = in[4*0+i] & in[4*3+i]; - t02 = in[4*1+i] ^ in[4*2+i]; - t03 = in[4*0+i] ^ in[4*3+i]; + t01 = in[4*0] & in[4*3]; + t02 = in[4*1] ^ in[4*2]; + t03 = in[4*0] ^ in[4*3]; t04 = t01 ^ t02; - t05 = in[4*1+i] | in[4*2+i]; - out[4*1+i] = ~ t04; + t05 = in[4*1] | in[4*2]; + out[4*1] = ~ t04; t07 = t03 & t05; - t08 = in[4*1+i] & out[4*1+i]; - t09 = in[4*0+i] | in[4*2+i]; + t08 = in[4*1] & out[4*1]; + t09 = in[4*0] | in[4*2]; t10 = t07 ^ t08; - t11 = in[4*1+i] | in[4*3+i]; - t12 = in[4*2+i] ^ t11; + t11 = in[4*1] | in[4*3]; + t12 = in[4*2] ^ t11; t13 = t09 ^ t10; - out[4*2+i] = ~ t13; - t15 = out[4*1+i] & t03; - out[4*3+i] = t12 ^ t07; - t17 = in[4*0+i] ^ in[4*1+i]; - t18 = out[4*2+i] ^ t15; - out[4*0+i] = t17 ^ t18; + out[4*2] = ~ t13; + t15 = out[4*1] & t03; + out[4*3] = t12 ^ t07; + t17 = in[4*0] ^ in[4*1]; + t18 = out[4*2] ^ t15; + out[4*0] = t17 ^ t18; } /* InvS6: 15 10 1 13 5 3 6 0 4 9 14 7 2 12 8 11 */ /* depth = 5,3,8,6, Total gates=19 */ -static void sb6_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb6_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t12, t13, t14, t15, t16, t17, t01; - t01 = in[4*0+i] ^ in[4*2+i]; - t02 = ~ in[4*2+i]; - t03 = in[4*1+i] & t01; - t04 = in[4*1+i] | t02; - t05 = in[4*3+i] | t03; - t06 = in[4*1+i] ^ in[4*3+i]; - t07 = in[4*0+i] & t04; - t08 = in[4*0+i] | t02; + t01 = in[4*0] ^ in[4*2]; + t02 = ~ in[4*2]; + t03 = in[4*1] & t01; + t04 = in[4*1] | t02; + t05 = in[4*3] | t03; + t06 = in[4*1] ^ in[4*3]; + t07 = in[4*0] & t04; + t08 = in[4*0] | t02; t09 = t07 ^ t05; - out[4*1+i] = t06 ^ t08; - out[4*0+i] = ~ t09; - t12 = in[4*1+i] & out[4*0+i]; + out[4*1] = t06 ^ t08; + out[4*0] = ~ t09; + t12 = in[4*1] & out[4*0]; t13 = t01 & t05; t14 = t01 ^ t12; t15 = t07 ^ t13; - t16 = in[4*3+i] | t02; - t17 = in[4*0+i] ^ out[4*1+i]; - out[4*3+i] = t17 ^ t15; - out[4*2+i] = t16 ^ t14; + t16 = in[4*3] | t02; + t17 = in[4*0] ^ out[4*1]; + out[4*3] = t17 ^ t15; + out[4*2] = t16 ^ t14; } /* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */ /* depth = 10,7,10,4, Total gates=19 */ -static void sb7(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb7(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t13, t14, t15, t16, t17, t01; - t01 = in[4*0+i] & in[4*2+i]; - t02 = ~ in[4*3+i]; - t03 = in[4*0+i] & t02; - t04 = in[4*1+i] | t01; - t05 = in[4*0+i] & in[4*1+i]; - t06 = in[4*2+i] ^ t04; - out[4*3+i] = t03 ^ t06; - t08 = in[4*2+i] | out[4*3+i]; - t09 = in[4*3+i] | t05; - t10 = in[4*0+i] ^ t08; - t11 = t04 & out[4*3+i]; - out[4*1+i] = t09 ^ t10; - t13 = in[4*1+i] ^ out[4*1+i]; - t14 = t01 ^ out[4*1+i]; - t15 = in[4*2+i] ^ t05; + t01 = in[4*0] & in[4*2]; + t02 = ~ in[4*3]; + t03 = in[4*0] & t02; + t04 = in[4*1] | t01; + t05 = in[4*0] & in[4*1]; + t06 = in[4*2] ^ t04; + out[4*3] = t03 ^ t06; + t08 = in[4*2] | out[4*3]; + t09 = in[4*3] | t05; + t10 = in[4*0] ^ t08; + t11 = t04 & out[4*3]; + out[4*1] = t09 ^ t10; + t13 = in[4*1] ^ out[4*1]; + t14 = t01 ^ out[4*1]; + t15 = in[4*2] ^ t05; t16 = t11 | t13; t17 = t02 | t14; - out[4*0+i] = t15 ^ t17; - out[4*2+i] = in[4*0+i] ^ t16; + out[4*0] = t15 ^ t17; + out[4*2] = in[4*0] ^ t16; } /* InvS7: 3 0 6 13 9 14 15 8 5 12 11 7 10 1 4 2 */ /* depth = 9,7,3,3, Total gates=18 */ -static void sb7_inv(uint8_t* out, const uint8_t* in, uint8_t i){ +static void sb7_inv(uint8_t* out, const uint8_t* in){ uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t13, t14, t15, t16, t01; - t01 = in[4*0+i] & in[4*1+i]; - t02 = in[4*0+i] | in[4*1+i]; - t03 = in[4*2+i] | t01; - t04 = in[4*3+i] & t02; - out[4*3+i] = t03 ^ t04; - t06 = in[4*1+i] ^ t04; - t07 = in[4*3+i] ^ out[4*3+i]; + t01 = in[4*0] & in[4*1]; + t02 = in[4*0] | in[4*1]; + t03 = in[4*2] | t01; + t04 = in[4*3] & t02; + out[4*3] = t03 ^ t04; + t06 = in[4*1] ^ t04; + t07 = in[4*3] ^ out[4*3]; t08 = ~ t07; t09 = t06 | t08; - t10 = in[4*1+i] ^ in[4*3+i]; - t11 = in[4*0+i] | in[4*3+i]; - out[4*1+i] = in[4*0+i] ^ t09; - t13 = in[4*2+i] ^ t06; - t14 = in[4*2+i] & t11; - t15 = in[4*3+i] | out[4*1+i]; + t10 = in[4*1] ^ in[4*3]; + t11 = in[4*0] | in[4*3]; + out[4*1] = in[4*0] ^ t09; + t13 = in[4*2] ^ t06; + t14 = in[4*2] & t11; + t15 = in[4*3] | out[4*1]; t16 = t01 | t10; - out[4*0+i] = t13 ^ t15; - out[4*2+i] = t14 ^ t16; + out[4*0] = t13 ^ t15; + out[4*2] = t14 ^ t16; } -typedef void(*sb_fpt)(uint8_t*, const uint8_t*, uint8_t i); +typedef void(*sb_fpt)(uint8_t*, const uint8_t*); sb_fpt sf_tab[] PROGMEM = { sb0, sb1, sb2, sb3, @@ -453,7 +453,7 @@ void sbox128(void * w, uint8_t box){ sb_fpt fp; fp = (sb_fpt)pgm_read_word(&(sf_tab[box])); for(i=0; i<4; ++i){ - fp(buffer, (uint8_t*)w, i); + fp(buffer+i, (uint8_t*)w+i); } memcpy(w, buffer, 16); } @@ -465,7 +465,7 @@ void inv_sbox128(void * w, uint8_t box){ sb_fpt fp; fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box])); for(i=0; i<4; ++i){ - fp(buffer, (uint8_t*)w, i); + fp(buffer+i, (uint8_t*)w+i); } memcpy(w, buffer, 16); }