serpent sboxes in asm bitslice

This commit is contained in:
bg 2009-01-26 18:04:30 +00:00
parent 8ef8dfdc72
commit 6918fd7223
7 changed files with 1127 additions and 262 deletions

View File

@ -10,7 +10,7 @@
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
@ -92,13 +92,11 @@ aes_decrypt_core:
add r26, r20
adc r27, r1
clt
; ldi CTR, 2
.irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
ld \param, Z+
.endr
ldi xREDUCER, 0x1b /* load reducer */
ldi r31, hi8(aes_invsbox)
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
@ -110,6 +108,7 @@ aes_decrypt_core:
brne 2f
set
2:
ldi r31, hi8(aes_invsbox)
/* substitute and invShift */
.irp param, ST00, ST10, ST20, ST30
mov r30, \param
@ -384,7 +383,6 @@ exit:
3: eor T1, T3
eor ST30, T1
ldi r31, hi8(aes_invsbox)
rjmp 1b
.balign 256

View File

@ -1,7 +1,7 @@
MCU_TARGET = atmega644
OPTIMIZE = -Os
PROGRAMMER = stk500 # avr911
PROGRAMMER = stk500v2 # avr911
DEFS = -D$(call uc, $(MCU_TARGET))
FLASHCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c $(PROGRAMMER) -U flash:w:# no space at the end
#FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end

View File

@ -5,7 +5,7 @@ ALGO_NAME := SERPENT_BITSLICE
BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-bitslice.o memxor.o
$(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-bitslice-asm.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o debug.o uart.o serial-tools.o \
nessie_bc_test.o nessie_common.o cli.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"

View File

@ -0,0 +1,13 @@
# Makefile for serpent
ALGO_NAME := SERPENT_ASM_BITSLICE
# comment out the following line for removement of serpent from the build process
BLOCK_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := serpent-sboxes-bitslice-asm.o serpent-asm.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-serpent-test.o debug.o uart.o serial-tools.o \
nessie_bc_test.o nessie_common.o cli.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := "nessie"
$(ALGO_NAME)_PERFORMANCE_TEST := "performance"

View File

@ -706,7 +706,7 @@ serpent_dec:
movw r24, r14
ldi r22, 31
rcall inv_sbox128
call inv_sbox128
movw r24, r14
ldi r20, 16

View File

@ -0,0 +1,854 @@
/* serpent-sboxes-bitslice.c */
/*
This file is part of the Crypto-avr-lib/microcrypt-lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* serpent-sboxes.c
* a bitsliced implementation of the serpent sboxes
* author: Daniel Otte
* email: daniel.otte@rub.de
* license: GPLv3
*/
#include "avr-asm-macros.S"
IN0 = 22
IN1 = 23
IN2 = 24
IN3 = 25
OUT0 = 18
OUT1 = 19
OUT2 = 20
OUT3 = 21
T00 = 2
T00 = 3
T01 = 4
T02 = 5
T03 = 6
T04 = 7
T05 = 8
T06 = 9
T07 = 10
T08 = 11
T09 = 12
T10 = 13
T11 = 14
T12 = 15
T13 = 16
T14 = 17
T15 = 26
T16 = 27
T17 = 0
/* S0: 3 8 15 1 10 6 5 11 14 13 4 2 7 0 9 12 */
/* depth = 5,7,4,2, Total gates=18 */
sb0:
mov T00, IN1
eor T00, IN2
mov T01, IN0
or T01, IN3
mov T02, IN0
eor T02, IN1
mov OUT3, T01
eor OUT3, T00
mov T04, IN2
or T04, OUT3
mov T05, IN0
eor T05, IN3
mov T06, IN1
or T06, IN2
mov T07, IN3
and T07, T04
mov T08, T02
and T08, T06
mov OUT2, T08
eor OUT2, T07
mov T10, T08
and T10, OUT2
mov T11, IN2
eor T11, IN3
mov T12, T06
eor T12, T10
mov T13, IN1
and T13, T05
mov T14, T05
eor T14, T12
mov OUT0, T14
com OUT0
mov T16, OUT0
eor T16, T13
mov OUT1, T11
eor OUT1, T16
ret
/* InvS0: 13 3 11 0 10 6 5 12 1 14 4 7 15 9 8 2 */
/* depth = 8,4,3,6, Total gates=19 */
sb0_inv:
mov T00, IN2
eor T00, IN3
mov T01, IN0
or T01, IN1
mov T02, IN1
or T02, IN2
mov T03, IN2
and T03, T00
mov T04, T01
eor T04, T00
mov T05, IN0
or T05, T03
mov OUT2, T04
com OUT2
mov T07, IN1
eor T07, IN3
mov T08, T02
and T08, T07
mov T09, IN3
or T09, OUT2
mov OUT1, T08
eor OUT1, T05
mov T11, IN0
or T11, T04
mov T12, OUT1
eor T12, T11
mov T13, T02
eor T13, T09
mov T14, IN0
eor T14, IN2
mov OUT3, T13
eor OUT3, T12
mov T16, T04
and T16, T12
mov T17, T13
or T17, T16
mov OUT0, T14
eor OUT0, T17
ret
/* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */
/* depth = 10,7,3,5, Total gates=18 */
sb1:
mov T00, IN0
or T00, IN3
mov T01, IN2
eor T01, IN3
mov T02, IN1
com T02
mov T03, IN0
eor T03, IN2
mov T04, IN0
or T04, T02
mov T05, IN3
and T05, T03
mov T06, T00
and T06, T01
mov T07, IN1
or T07, T05
mov OUT2, T01
eor OUT2, T04
mov T09, T06
eor T09, T07
mov T10, T00
eor T10, T09
mov T11, OUT2
eor T11, T10
mov T12, IN1
and T12, IN3
mov OUT3, T09
com OUT3
mov OUT1, T12
eor OUT1, T11
mov T15, T09
or T15, OUT1
mov T16, T04
and T16, T15
mov OUT0, IN2
eor OUT0, T16
ret
/* InvS1: 5 8 2 14 15 6 12 3 11 4 7 9 1 13 10 0 */
/* depth = 7,4,5,3, Total gates=18 */
sb1_inv:
mov T00, IN0
eor T00, IN1
mov T01, IN1
or T01, IN3
mov T02, IN0
and T02, IN2
mov T03, IN2
eor T03, T01
mov T04, IN0
or T04, T03
mov T05, T00
and T05, T04
mov T06, IN3
or T06, T02
mov T07, IN1
eor T07, T05
mov T08, T06
eor T08, T05
mov T09, T03
or T09, T02
mov T10, IN3
and T10, T07
mov OUT2, T08
com OUT2
mov OUT1, T09
eor OUT1, T10
mov T13, IN0
or T13, OUT2
mov T14, T05
eor T14, OUT1
mov OUT3, T00
eor OUT3, T03
mov T16, IN2
eor T16, T14
mov OUT0, T13
eor OUT0, T16
ret
/* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */
/* depth = 3,8,11,7, Total gates=16 */
sb2:
mov T00, IN0
or T00, IN2
mov T01, IN0
eor T01, IN1
mov T02, IN3
eor T02, T00
mov OUT0, T01
eor OUT0, T02
mov T04, IN2
eor T04, OUT0
mov T05, IN1
eor T05, T04
mov T06, IN1
or T06, T04
mov T07, T00
and T07, T05
mov T08, T02
eor T08, T06
mov T09, T01
or T09, T08
mov OUT1, T09
eor OUT1, T07
mov T11, IN0
or T11, IN3
mov T12, T08
eor T12, OUT1
mov T13, IN1
eor T13, T12
mov OUT3, T08
com OUT3
mov OUT2, T11
eor OUT2, T13
ret
/* InvS2: 12 9 15 4 11 14 1 2 0 3 6 13 5 8 10 7 */
/* depth = 3,6,8,3, Total gates=18 */
sb2_inv:
mov T00, IN0
eor T00, IN3
mov T01, IN2
eor T01, IN3
mov T02, IN0
and T02, IN2
mov T03, IN1
or T03, T01
mov OUT0, T00
eor OUT0, T03
mov T05, IN0
or T05, IN2
mov T06, IN3
or T06, OUT0
mov T07, IN3
com T07
mov T08, IN1
and T08, T05
mov T09, T07
or T09, T02
mov T10, IN1
and T10, T06
mov T11, T05
and T11, T01
mov OUT3, T08
eor OUT3, T09
mov OUT1, T11
eor OUT1, T10
mov T14, IN2
and T14, OUT3
mov T15, OUT0
eor T15, OUT1
mov T16, T09
eor T16, T14
mov OUT2, T15
eor OUT2, T16
ret
/* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */
/* depth = 8,3,5,5, Total gates=18 */
sb3:
mov T00, IN0
eor T00, IN2
mov T01, IN0
or T01, IN3
mov T02, IN0
and T02, IN3
mov T03, T00
and T03, T01
mov T04, IN1
or T04, T02
mov T05, IN0
and T05, IN1
mov T06, IN3
eor T06, T03
mov T07, IN2
or T07, T05
mov T08, IN1
eor T08, T06
mov T09, IN3
and T09, T04
mov T10, T01
eor T10, T09
mov OUT3, T07
eor OUT3, T08
mov T12, IN3
or T12, OUT3
mov T13, IN0
or T13, T06
mov T14, IN1
and T14, T12
mov OUT2, T07
eor OUT2, T10
mov OUT0, T13
eor OUT0, T14
mov OUT1, T04
eor OUT1, T03
ret
/* InvS3: 0 9 10 7 11 14 6 13 3 5 12 2 4 8 15 1 */
/* depth = 3,6,4,4, Total gates=17 */
sb3_inv:
mov T00, IN2
or T00, IN3
mov T01, IN0
or T01, IN3
mov T02, IN2
eor T02, T01
mov T03, IN1
eor T03, T01
mov T04, IN0
eor T04, IN3
mov T05, T03
and T05, T02
mov T06, IN1
and T06, T00
mov OUT2, T04
eor OUT2, T05
mov T08, IN0
eor T08, T02
mov OUT0, T06
eor OUT0, T02
mov T10, OUT0
or T10, T04
mov T11, T08
and T11, T10
mov T12, IN0
and T12, OUT2
mov T13, T00
eor T13, T04
mov OUT1, IN1
eor OUT1, T11
mov T15, IN1
or T15, T12
mov OUT3, T13
eor OUT3, T15
ret
/* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */
/* depth = 6,7,5,3, Total gates=19 */
sb4:
mov T00, IN0
or T00, IN1
mov T01, IN1
or T01, IN2
mov T02, IN0
eor T02, T01
mov T03, IN1
eor T03, IN3
mov T04, IN3
or T04, T02
mov T05, IN3
and T05, T00
mov OUT3, T02
eor OUT3, T05
mov T07, OUT3
and T07, T03
mov T08, T03
and T08, T04
mov T09, IN2
eor T09, T05
mov T10, IN1
and T10, IN2
mov T11, T03
eor T11, T07
mov T12, T10
or T12, T02
mov T13, T09
eor T13, T08
mov T14, IN0
and T14, T04
mov T15, T10
or T15, T11
mov OUT2, T12
eor OUT2, T07
mov OUT1, T14
eor OUT1, T15
mov OUT0, T13
com OUT0
ret
/* InvS4: 5 0 8 3 10 9 7 14 2 12 11 6 4 15 13 1 */
/* depth = 6,4,7,3, Total gates=17 */
sb4_inv:
mov T00, IN1
or T00, IN3
mov T01, IN2
or T01, IN3
mov T02, IN0
and T02, T00
mov T03, IN1
eor T03, T01
mov T04, IN2
eor T04, IN3
mov T05, T02
com T05
mov T06, IN0
and T06, T03
mov OUT1, T04
eor OUT1, T06
mov T08, OUT1
or T08, T05
mov T09, IN0
eor T09, T06
mov T10, T00
eor T10, T08
mov T11, IN3
eor T11, T03
mov T12, IN2
or T12, T09
mov OUT3, T02
eor OUT3, T11
mov T14, IN0
eor T14, T03
mov OUT2, T10
eor OUT2, T12
mov OUT0, T14
eor OUT0, T08
ret
/* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */
/* depth = 4,6,8,6, Total gates=17 */
sb5:
mov T00, IN1
eor T00, IN3
mov T01, IN1
or T01, IN3
mov T02, IN0
and T02, T00
mov T03, IN2
eor T03, T01
mov T04, T02
eor T04, T03
mov OUT0, T04
com OUT0
mov T06, IN0
eor T06, T00
mov T07, IN3
or T07, OUT0
mov T08, IN1
or T08, T04
mov T09, IN3
eor T09, T07
mov T10, IN1
or T10, T06
mov T11, T02
or T11, OUT0
mov T12, T06
or T12, T09
mov T13, T00
eor T13, T10
mov OUT2, T08
eor OUT2, T12
mov OUT1, T06
eor OUT1, T07
mov OUT3, T11
eor OUT3, T13
ret
/* InvS5: 8 15 2 9 4 1 13 14 11 6 5 3 7 12 10 0 */
/* depth = 4,6,9,7, Total gates=17 */
sb5_inv:
mov T00, IN0
and T00, IN3
mov T01, IN2
eor T01, T00
mov T02, IN0
eor T02, IN3
mov T03, IN1
and T03, T01
mov T04, IN0
and T04, IN2
mov OUT0, T02
eor OUT0, T03
mov T06, IN0
and T06, OUT0
mov T07, T00
eor T07, OUT0
mov T08, IN1
or T08, T04
mov T09, IN1
com T09
mov OUT1, T07
eor OUT1, T08
mov T11, T09
or T11, T06
mov T12, OUT0
or T12, OUT1
mov OUT3, T01
eor OUT3, T11
mov T14, T01
eor T14, T12
mov T15, IN1
eor T15, IN3
mov OUT2, T15
eor OUT2, T14
ret
/* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */
/* depth = 8,3,6,3, Total gates=19 */
sb6:
mov T00, IN0
and T00, IN3
mov T01, IN1
eor T01, IN2
mov T02, IN0
eor T02, IN3
mov T03, T00
eor T03, T01
mov T04, IN1
or T04, IN2
mov OUT1, T03
com OUT1
mov T06, T02
and T06, T04
mov T07, IN1
and T07, OUT1
mov T08, IN0
or T08, IN2
mov T09, T06
eor T09, T07
mov T10, IN1
or T10, IN3
mov T11, IN2
eor T11, T10
mov T12, T08
eor T12, T09
mov OUT2, T12
com OUT2
mov T14, OUT1
and T14, T02
mov OUT3, T11
eor OUT3, T06
mov T16, IN0
eor T16, IN1
mov T17, OUT2
eor T17, T14
mov OUT0, T16
eor OUT0, T17
ret
/* InvS6: 15 10 1 13 5 3 6 0 4 9 14 7 2 12 8 11 */
/* depth = 5,3,8,6, Total gates=19 */
sb6_inv:
mov T00, IN0
eor T00, IN2
mov T01, IN2
com T01
mov T02, IN1
and T02, T00
mov T03, IN1
or T03, T01
mov T04, IN3
or T04, T02
mov T05, IN1
eor T05, IN3
mov T06, IN0
and T06, T03
mov T07, IN0
or T07, T01
mov T08, T06
eor T08, T04
mov OUT1, T05
eor OUT1, T07
mov OUT0, T08
com OUT0
mov T11, IN1
and T11, OUT0
mov T12, T00
and T12, T04
mov T13, T00
eor T13, T11
mov T14, T06
eor T14, T12
mov T15, IN3
or T15, T01
mov T16, IN0
eor T16, OUT1
mov OUT3, T16
eor OUT3, T14
mov OUT2, T15
eor OUT2, T13
ret
/* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */
/* depth = 10,7,10,4, Total gates=19 */
sb7:
mov T00, IN0
and T00, IN2
mov T01, IN3
com T01
mov T02, IN0
and T02, T01
mov T03, IN1
or T03, T00
mov T04, IN0
and T04, IN1
mov T05, IN2
eor T05, T03
mov OUT3, T02
eor OUT3, T05
mov T07, IN2
or T07, OUT3
mov T08, IN3
or T08, T04
mov T09, IN0
eor T09, T07
mov T10, T03
and T10, OUT3
mov OUT1, T08
eor OUT1, T09
mov T12, IN1
eor T12, OUT1
mov T13, T00
eor T13, OUT1
mov T14, IN2
eor T14, T04
mov T15, T10
or T15, T12
mov T16, T01
or T16, T13
mov OUT0, T14
eor OUT0, T16
mov OUT2, IN0
eor OUT2, T15
ret
/* InvS7: 3 0 6 13 9 14 15 8 5 12 11 7 10 1 4 2 */
/* depth = 9,7,3,3, Total gates=18 */
sb7_inv:
mov T00, IN0
and T00, IN1
mov T01, IN0
or T01, IN1
mov T02, IN2
or T02, T00
mov T03, IN3
and T03, T01
mov OUT3, T02
eor OUT3, T03
mov T05, IN1
eor T05, T03
mov T06, IN3
eor T06, OUT3
mov T07, T06
com T07
mov T08, T05
or T08, T07
mov T09, IN1
eor T09, IN3
mov T10, IN0
or T10, IN3
mov OUT1, IN0
eor OUT1, T08
mov T12, IN2
eor T12, T05
mov T13, IN2
and T13, T10
mov T14, IN3
or T14, OUT1
mov T15, T00
or T15, T09
mov OUT0, T12
eor OUT0, T14
mov OUT2, T13
eor OUT2, T15
ret
sf_tab:
.word sb0, sb1, sb2, sb3
.word sb4, sb5, sb6, sb7
sinvf_tab:
.word sb0_inv, sb1_inv, sb2_inv, sb3_inv
.word sb4_inv, sb5_inv, sb6_inv, sb7_inv
/*
.byte pm_lo8(sb0), pm_hi8(sb0)
.byte pm_lo8(sb1), pm_hi8(sb1)
.byte pm_lo8(sb2), pm_hi8(sb2)
.byte pm_lo8(sb3), pm_hi8(sb3)
.byte pm_lo8(sb4), pm_hi8(sb4)
.byte pm_lo8(sb5), pm_hi8(sb5)
.byte pm_lo8(sb6), pm_hi8(sb6)
.byte pm_lo8(sb7), pm_hi8(sb7)
sinvf_tab:
.byte pm_lo8(sb0_inv), pm_hi8(sb0_inv)
.byte pm_lo8(sb1_inv), pm_hi8(sb1_inv)
.byte pm_lo8(sb2_inv), pm_hi8(sb2_inv)
.byte pm_lo8(sb3_inv), pm_hi8(sb3_inv)
.byte pm_lo8(sb4_inv), pm_hi8(sb4_inv)
.byte pm_lo8(sb5_inv), pm_hi8(sb5_inv)
.byte pm_lo8(sb6_inv), pm_hi8(sb6_inv)
.byte pm_lo8(sb7_inv), pm_hi8(sb7_inv)
*/
/*
void sbox128(void * w, uint8_t box){
uint8_t i, buffer[16];
box &= 0x7;
sb_fpt fp;
fp = (sb_fpt)pgm_read_word(&(sf_tab[box]));
for(i=0; i<4; ++i){
fp(buffer+i, (uint8_t*)w+i);
}
memcpy(w, buffer, 16);
}
*/
.global sbox128
sbox128:
ldi r30, lo8(sf_tab)
ldi r31, hi8(sf_tab)
1:
; clr r1
andi r22, 0x07
lsl r22
add r30, r22
adc r31, r1
lpm r26, Z+
lpm r27, Z
lsr r27
ror r26
push r28
push r29
movw r30, r26
movw r28, r24
push_range 2, 17
ldd IN0, Y+0
ldd IN1, Y+4
ldd IN2, Y+8
ldd IN3, Y+12
icall
std Y+0, OUT0
std Y+4, OUT1
std Y+8, OUT2
std Y+12, OUT3
ldd IN0, Y+0+1
ldd IN1, Y+4+1
ldd IN2, Y+8+1
ldd IN3, Y+12+1
icall
std Y+0+1, OUT0
std Y+4+1, OUT1
std Y+8+1, OUT2
std Y+12+1, OUT3
ldd IN0, Y+0+2
ldd IN1, Y+4+2
ldd IN2, Y+8+2
ldd IN3, Y+12+2
icall
std Y+0+2, OUT0
std Y+4+2, OUT1
std Y+8+2, OUT2
std Y+12+2, OUT3
ldd IN0, Y+0+3
ldd IN1, Y+4+3
ldd IN2, Y+8+3
ldd IN3, Y+12+3
icall
std Y+0+3, OUT0
std Y+4+3, OUT1
std Y+8+3, OUT2
std Y+12+3, OUT3
pop_range 2, 17
pop r29
pop r28
ret
.global inv_sbox128
inv_sbox128:
ldi r30, lo8(sinvf_tab)
ldi r31, hi8(sinvf_tab)
rjmp 1b
/*
void inv_sbox128(void * w, uint8_t box){
uint8_t i, buffer[16];
box &= 0x7;
sb_fpt fp;
fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box]));
for(i=0; i<4; ++i){
fp(buffer+i, (uint8_t*)w+i);
}
memcpy(w, buffer, 16);
}
*/

View File

@ -35,406 +35,406 @@
/* depth = 5,7,4,2, Total gates=18 */
static
void sb0(uint8_t* out, const uint8_t* in, uint8_t i){
void sb0(uint8_t* out, const uint8_t* in){
// (a,b,c,d,w,x,y,z)
uint8_t t01, t02, t03, t05, t06, t07, t08, t09, t11, t12, t13, t14, t15, t17;
t01 = in[4*1+i] ^ in[4*2+i];
t02 = in[4*0+i] | in[4*3+i];
t03 = in[4*0+i] ^ in[4*1+i];
out[4*3+i] = t02 ^ t01;
t05 = in[4*2+i] | out[4*3+i];
t06 = in[4*0+i] ^ in[4*3+i];
t07 = in[4*1+i] | in[4*2+i];
t08 = in[4*3+i] & t05;
t01 = in[4*1] ^ in[4*2];
t02 = in[4*0] | in[4*3];
t03 = in[4*0] ^ in[4*1];
out[4*3] = t02 ^ t01;
t05 = in[4*2] | out[4*3];
t06 = in[4*0] ^ in[4*3];
t07 = in[4*1] | in[4*2];
t08 = in[4*3] & t05;
t09 = t03 & t07;
out[4*2+i] = t09 ^ t08;
t11 = t09 & out[4*2+i];
t12 = in[4*2+i] ^ in[4*3+i];
out[4*2] = t09 ^ t08;
t11 = t09 & out[4*2];
t12 = in[4*2] ^ in[4*3];
t13 = t07 ^ t11;
t14 = in[4*1+i] & t06;
t14 = in[4*1] & t06;
t15 = t06 ^ t13;
out[4*0+i] = ~ t15;
t17 = out[4*0+i] ^ t14;
out[4*1+i] = t12 ^ t17;
out[4*0] = ~ t15;
t17 = out[4*0] ^ t14;
out[4*1] = t12 ^ t17;
}
/* InvS0: 13 3 11 0 10 6 5 12 1 14 4 7 15 9 8 2 */
/* depth = 8,4,3,6, Total gates=19 */
static
void sb0_inv(uint8_t* out, const uint8_t* in, uint8_t i){
void sb0_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t12, t13, t14, t15, t17, t18, t01;
t01 = in[4*2+i] ^ in[4*3+i];
t02 = in[4*0+i] | in[4*1+i];
t03 = in[4*1+i] | in[4*2+i];
t04 = in[4*2+i] & t01;
t01 = in[4*2] ^ in[4*3];
t02 = in[4*0] | in[4*1];
t03 = in[4*1] | in[4*2];
t04 = in[4*2] & t01;
t05 = t02 ^ t01;
t06 = in[4*0+i] | t04;
out[4*2+i] = ~ t05;
t08 = in[4*1+i] ^ in[4*3+i];
t06 = in[4*0] | t04;
out[4*2] = ~ t05;
t08 = in[4*1] ^ in[4*3];
t09 = t03 & t08;
t10 = in[4*3+i] | out[4*2+i];
out[4*1+i] = t09 ^ t06;
t12 = in[4*0+i] | t05;
t13 = out[4*1+i] ^ t12;
t10 = in[4*3] | out[4*2];
out[4*1] = t09 ^ t06;
t12 = in[4*0] | t05;
t13 = out[4*1] ^ t12;
t14 = t03 ^ t10;
t15 = in[4*0+i] ^ in[4*2+i];
out[4*3+i] = t14 ^ t13;
t15 = in[4*0] ^ in[4*2];
out[4*3] = t14 ^ t13;
t17 = t05 & t13;
t18 = t14 | t17;
out[4*0+i] = t15 ^ t18;
out[4*0] = t15 ^ t18;
}
/* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */
/* depth = 10,7,3,5, Total gates=18 */
static
void sb1(uint8_t* out, const uint8_t* in, uint8_t i){
void sb1(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t07, t08, t10, t11, t12, t13, t16, t17, t01;
t01 = in[4*0+i] | in[4*3+i];
t02 = in[4*2+i] ^ in[4*3+i];
t03 = ~ in[4*1+i];
t04 = in[4*0+i] ^ in[4*2+i];
t05 = in[4*0+i] | t03;
t06 = in[4*3+i] & t04;
t01 = in[4*0] | in[4*3];
t02 = in[4*2] ^ in[4*3];
t03 = ~ in[4*1];
t04 = in[4*0] ^ in[4*2];
t05 = in[4*0] | t03;
t06 = in[4*3] & t04;
t07 = t01 & t02;
t08 = in[4*1+i] | t06;
out[4*2+i] = t02 ^ t05;
t08 = in[4*1] | t06;
out[4*2] = t02 ^ t05;
t10 = t07 ^ t08;
t11 = t01 ^ t10;
t12 = out[4*2+i] ^ t11;
t13 = in[4*1+i] & in[4*3+i];
out[4*3+i] = ~ t10;
out[4*1+i] = t13 ^ t12;
t16 = t10 | out[4*1+i];
t12 = out[4*2] ^ t11;
t13 = in[4*1] & in[4*3];
out[4*3] = ~ t10;
out[4*1] = t13 ^ t12;
t16 = t10 | out[4*1];
t17 = t05 & t16;
out[4*0+i] = in[4*2+i] ^ t17;
out[4*0] = in[4*2] ^ t17;
}
/* InvS1: 5 8 2 14 15 6 12 3 11 4 7 9 1 13 10 0 */
/* depth = 7,4,5,3, Total gates=18 */
static void sb1_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb1_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t14, t15, t17, t01;
t01 = in[4*0+i] ^ in[4*1+i];
t02 = in[4*1+i] | in[4*3+i];
t03 = in[4*0+i] & in[4*2+i];
t04 = in[4*2+i] ^ t02;
t05 = in[4*0+i] | t04;
t01 = in[4*0] ^ in[4*1];
t02 = in[4*1] | in[4*3];
t03 = in[4*0] & in[4*2];
t04 = in[4*2] ^ t02;
t05 = in[4*0] | t04;
t06 = t01 & t05;
t07 = in[4*3+i] | t03;
t08 = in[4*1+i] ^ t06;
t07 = in[4*3] | t03;
t08 = in[4*1] ^ t06;
t09 = t07 ^ t06;
t10 = t04 | t03;
t11 = in[4*3+i] & t08;
out[4*2+i] = ~ t09;
out[4*1+i] = t10 ^ t11;
t14 = in[4*0+i] | out[4*2+i];
t15 = t06 ^ out[4*1+i];
out[4*3+i] = t01 ^ t04;
t17 = in[4*2+i] ^ t15;
out[4*0+i] = t14 ^ t17;
t11 = in[4*3] & t08;
out[4*2] = ~ t09;
out[4*1] = t10 ^ t11;
t14 = in[4*0] | out[4*2];
t15 = t06 ^ out[4*1];
out[4*3] = t01 ^ t04;
t17 = in[4*2] ^ t15;
out[4*0] = t14 ^ t17;
}
/* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */
/* depth = 3,8,11,7, Total gates=16 */
static void sb2(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb2(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t05, t06, t07, t08, t09, t10, t12, t13, t14, t01;
t01 = in[4*0+i] | in[4*2+i];
t02 = in[4*0+i] ^ in[4*1+i];
t03 = in[4*3+i] ^ t01;
out[4*0+i] = t02 ^ t03;
t05 = in[4*2+i] ^ out[4*0+i];
t06 = in[4*1+i] ^ t05;
t07 = in[4*1+i] | t05;
t01 = in[4*0] | in[4*2];
t02 = in[4*0] ^ in[4*1];
t03 = in[4*3] ^ t01;
out[4*0] = t02 ^ t03;
t05 = in[4*2] ^ out[4*0];
t06 = in[4*1] ^ t05;
t07 = in[4*1] | t05;
t08 = t01 & t06;
t09 = t03 ^ t07;
t10 = t02 | t09;
out[4*1+i] = t10 ^ t08;
t12 = in[4*0+i] | in[4*3+i];
t13 = t09 ^ out[4*1+i];
t14 = in[4*1+i] ^ t13;
out[4*3+i] = ~ t09;
out[4*2+i] = t12 ^ t14;
out[4*1] = t10 ^ t08;
t12 = in[4*0] | in[4*3];
t13 = t09 ^ out[4*1];
t14 = in[4*1] ^ t13;
out[4*3] = ~ t09;
out[4*2] = t12 ^ t14;
}
/* InvS2: 12 9 15 4 11 14 1 2 0 3 6 13 5 8 10 7 */
/* depth = 3,6,8,3, Total gates=18 */
static void sb2_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb2_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t12, t15, t16, t17, t01;
t01 = in[4*0+i] ^ in[4*3+i];
t02 = in[4*2+i] ^ in[4*3+i];
t03 = in[4*0+i] & in[4*2+i];
t04 = in[4*1+i] | t02;
out[4*0+i] = t01 ^ t04;
t06 = in[4*0+i] | in[4*2+i];
t07 = in[4*3+i] | out[4*0+i];
t08 = ~ in[4*3+i];
t09 = in[4*1+i] & t06;
t01 = in[4*0] ^ in[4*3];
t02 = in[4*2] ^ in[4*3];
t03 = in[4*0] & in[4*2];
t04 = in[4*1] | t02;
out[4*0] = t01 ^ t04;
t06 = in[4*0] | in[4*2];
t07 = in[4*3] | out[4*0];
t08 = ~ in[4*3];
t09 = in[4*1] & t06;
t10 = t08 | t03;
t11 = in[4*1+i] & t07;
t11 = in[4*1] & t07;
t12 = t06 & t02;
out[4*3+i] = t09 ^ t10;
out[4*1+i] = t12 ^ t11;
t15 = in[4*2+i] & out[4*3+i];
t16 = out[4*0+i] ^ out[4*1+i];
out[4*3] = t09 ^ t10;
out[4*1] = t12 ^ t11;
t15 = in[4*2] & out[4*3];
t16 = out[4*0] ^ out[4*1];
t17 = t10 ^ t15;
out[4*2+i] = t16 ^ t17;
out[4*2] = t16 ^ t17;
}
/* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */
/* depth = 8,3,5,5, Total gates=18 */
static void sb3(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb3(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t13, t14, t15, t01;
t01 = in[4*0+i] ^ in[4*2+i];
t02 = in[4*0+i] | in[4*3+i];
t03 = in[4*0+i] & in[4*3+i];
t01 = in[4*0] ^ in[4*2];
t02 = in[4*0] | in[4*3];
t03 = in[4*0] & in[4*3];
t04 = t01 & t02;
t05 = in[4*1+i] | t03;
t06 = in[4*0+i] & in[4*1+i];
t07 = in[4*3+i] ^ t04;
t08 = in[4*2+i] | t06;
t09 = in[4*1+i] ^ t07;
t10 = in[4*3+i] & t05;
t05 = in[4*1] | t03;
t06 = in[4*0] & in[4*1];
t07 = in[4*3] ^ t04;
t08 = in[4*2] | t06;
t09 = in[4*1] ^ t07;
t10 = in[4*3] & t05;
t11 = t02 ^ t10;
out[4*3+i] = t08 ^ t09;
t13 = in[4*3+i] | out[4*3+i];
t14 = in[4*0+i] | t07;
t15 = in[4*1+i] & t13;
out[4*2+i] = t08 ^ t11;
out[4*0+i] = t14 ^ t15;
out[4*1+i] = t05 ^ t04;
out[4*3] = t08 ^ t09;
t13 = in[4*3] | out[4*3];
t14 = in[4*0] | t07;
t15 = in[4*1] & t13;
out[4*2] = t08 ^ t11;
out[4*0] = t14 ^ t15;
out[4*1] = t05 ^ t04;
}
/* InvS3: 0 9 10 7 11 14 6 13 3 5 12 2 4 8 15 1 */
/* depth = 3,6,4,4, Total gates=17 */
static void sb3_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb3_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t07, t09, t11, t12, t13, t14, t16, t01;
t01 = in[4*2+i] | in[4*3+i];
t02 = in[4*0+i] | in[4*3+i];
t03 = in[4*2+i] ^ t02;
t04 = in[4*1+i] ^ t02;
t05 = in[4*0+i] ^ in[4*3+i];
t01 = in[4*2] | in[4*3];
t02 = in[4*0] | in[4*3];
t03 = in[4*2] ^ t02;
t04 = in[4*1] ^ t02;
t05 = in[4*0] ^ in[4*3];
t06 = t04 & t03;
t07 = in[4*1+i] & t01;
out[4*2+i] = t05 ^ t06;
t09 = in[4*0+i] ^ t03;
out[4*0+i] = t07 ^ t03;
t11 = out[4*0+i] | t05;
t07 = in[4*1] & t01;
out[4*2] = t05 ^ t06;
t09 = in[4*0] ^ t03;
out[4*0] = t07 ^ t03;
t11 = out[4*0] | t05;
t12 = t09 & t11;
t13 = in[4*0+i] & out[4*2+i];
t13 = in[4*0] & out[4*2];
t14 = t01 ^ t05;
out[4*1+i] = in[4*1+i] ^ t12;
t16 = in[4*1+i] | t13;
out[4*3+i] = t14 ^ t16;
out[4*1] = in[4*1] ^ t12;
t16 = in[4*1] | t13;
out[4*3] = t14 ^ t16;
}
/* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */
/* depth = 6,7,5,3, Total gates=19 */
static void sb4(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb4(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t12, t13, t14, t15, t16, t01;
t01 = in[4*0+i] | in[4*1+i];
t02 = in[4*1+i] | in[4*2+i];
t03 = in[4*0+i] ^ t02;
t04 = in[4*1+i] ^ in[4*3+i];
t05 = in[4*3+i] | t03;
t06 = in[4*3+i] & t01;
out[4*3+i] = t03 ^ t06;
t08 = out[4*3+i] & t04;
t01 = in[4*0] | in[4*1];
t02 = in[4*1] | in[4*2];
t03 = in[4*0] ^ t02;
t04 = in[4*1] ^ in[4*3];
t05 = in[4*3] | t03;
t06 = in[4*3] & t01;
out[4*3] = t03 ^ t06;
t08 = out[4*3] & t04;
t09 = t04 & t05;
t10 = in[4*2+i] ^ t06;
t11 = in[4*1+i] & in[4*2+i];
t10 = in[4*2] ^ t06;
t11 = in[4*1] & in[4*2];
t12 = t04 ^ t08;
t13 = t11 | t03;
t14 = t10 ^ t09;
t15 = in[4*0+i] & t05;
t15 = in[4*0] & t05;
t16 = t11 | t12;
out[4*2+i] = t13 ^ t08;
out[4*1+i] = t15 ^ t16;
out[4*0+i] = ~ t14;
out[4*2] = t13 ^ t08;
out[4*1] = t15 ^ t16;
out[4*0] = ~ t14;
}
/* InvS4: 5 0 8 3 10 9 7 14 2 12 11 6 4 15 13 1 */
/* depth = 6,4,7,3, Total gates=17 */
static void sb4_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb4_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t07, t09, t10, t11, t12, t13, t15, t01;
t01 = in[4*1+i] | in[4*3+i];
t02 = in[4*2+i] | in[4*3+i];
t03 = in[4*0+i] & t01;
t04 = in[4*1+i] ^ t02;
t05 = in[4*2+i] ^ in[4*3+i];
t01 = in[4*1] | in[4*3];
t02 = in[4*2] | in[4*3];
t03 = in[4*0] & t01;
t04 = in[4*1] ^ t02;
t05 = in[4*2] ^ in[4*3];
t06 = ~ t03;
t07 = in[4*0+i] & t04;
out[4*1+i] = t05 ^ t07;
t09 = out[4*1+i] | t06;
t10 = in[4*0+i] ^ t07;
t07 = in[4*0] & t04;
out[4*1] = t05 ^ t07;
t09 = out[4*1] | t06;
t10 = in[4*0] ^ t07;
t11 = t01 ^ t09;
t12 = in[4*3+i] ^ t04;
t13 = in[4*2+i] | t10;
out[4*3+i] = t03 ^ t12;
t15 = in[4*0+i] ^ t04;
out[4*2+i] = t11 ^ t13;
out[4*0+i] = t15 ^ t09;
t12 = in[4*3] ^ t04;
t13 = in[4*2] | t10;
out[4*3] = t03 ^ t12;
t15 = in[4*0] ^ t04;
out[4*2] = t11 ^ t13;
out[4*0] = t15 ^ t09;
}
/* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */
/* depth = 4,6,8,6, Total gates=17 */
static void sb5(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb5(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t14, t01;
t01 = in[4*1+i] ^ in[4*3+i];
t02 = in[4*1+i] | in[4*3+i];
t03 = in[4*0+i] & t01;
t04 = in[4*2+i] ^ t02;
t01 = in[4*1] ^ in[4*3];
t02 = in[4*1] | in[4*3];
t03 = in[4*0] & t01;
t04 = in[4*2] ^ t02;
t05 = t03 ^ t04;
out[4*0+i] = ~ t05;
t07 = in[4*0+i] ^ t01;
t08 = in[4*3+i] | out[4*0+i];
t09 = in[4*1+i] | t05;
t10 = in[4*3+i] ^ t08;
t11 = in[4*1+i] | t07;
t12 = t03 | out[4*0+i];
out[4*0] = ~ t05;
t07 = in[4*0] ^ t01;
t08 = in[4*3] | out[4*0];
t09 = in[4*1] | t05;
t10 = in[4*3] ^ t08;
t11 = in[4*1] | t07;
t12 = t03 | out[4*0];
t13 = t07 | t10;
t14 = t01 ^ t11;
out[4*2+i] = t09 ^ t13;
out[4*1+i] = t07 ^ t08;
out[4*3+i] = t12 ^ t14;
out[4*2] = t09 ^ t13;
out[4*1] = t07 ^ t08;
out[4*3] = t12 ^ t14;
}
/* InvS5: 8 15 2 9 4 1 13 14 11 6 5 3 7 12 10 0 */
/* depth = 4,6,9,7, Total gates=17 */
static void sb5_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb5_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t12, t13, t15, t16, t01;
t01 = in[4*0+i] & in[4*3+i];
t02 = in[4*2+i] ^ t01;
t03 = in[4*0+i] ^ in[4*3+i];
t04 = in[4*1+i] & t02;
t05 = in[4*0+i] & in[4*2+i];
out[4*0+i] = t03 ^ t04;
t07 = in[4*0+i] & out[4*0+i];
t08 = t01 ^ out[4*0+i];
t09 = in[4*1+i] | t05;
t10 = ~ in[4*1+i];
out[4*1+i] = t08 ^ t09;
t01 = in[4*0] & in[4*3];
t02 = in[4*2] ^ t01;
t03 = in[4*0] ^ in[4*3];
t04 = in[4*1] & t02;
t05 = in[4*0] & in[4*2];
out[4*0] = t03 ^ t04;
t07 = in[4*0] & out[4*0];
t08 = t01 ^ out[4*0];
t09 = in[4*1] | t05;
t10 = ~ in[4*1];
out[4*1] = t08 ^ t09;
t12 = t10 | t07;
t13 = out[4*0+i] | out[4*1+i];
out[4*3+i] = t02 ^ t12;
t13 = out[4*0] | out[4*1];
out[4*3] = t02 ^ t12;
t15 = t02 ^ t13;
t16 = in[4*1+i] ^ in[4*3+i];
out[4*2+i] = t16 ^ t15;
t16 = in[4*1] ^ in[4*3];
out[4*2] = t16 ^ t15;
}
/* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */
/* depth = 8,3,6,3, Total gates=19 */
static void sb6(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb6(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t15, t17, t18, t01;
t01 = in[4*0+i] & in[4*3+i];
t02 = in[4*1+i] ^ in[4*2+i];
t03 = in[4*0+i] ^ in[4*3+i];
t01 = in[4*0] & in[4*3];
t02 = in[4*1] ^ in[4*2];
t03 = in[4*0] ^ in[4*3];
t04 = t01 ^ t02;
t05 = in[4*1+i] | in[4*2+i];
out[4*1+i] = ~ t04;
t05 = in[4*1] | in[4*2];
out[4*1] = ~ t04;
t07 = t03 & t05;
t08 = in[4*1+i] & out[4*1+i];
t09 = in[4*0+i] | in[4*2+i];
t08 = in[4*1] & out[4*1];
t09 = in[4*0] | in[4*2];
t10 = t07 ^ t08;
t11 = in[4*1+i] | in[4*3+i];
t12 = in[4*2+i] ^ t11;
t11 = in[4*1] | in[4*3];
t12 = in[4*2] ^ t11;
t13 = t09 ^ t10;
out[4*2+i] = ~ t13;
t15 = out[4*1+i] & t03;
out[4*3+i] = t12 ^ t07;
t17 = in[4*0+i] ^ in[4*1+i];
t18 = out[4*2+i] ^ t15;
out[4*0+i] = t17 ^ t18;
out[4*2] = ~ t13;
t15 = out[4*1] & t03;
out[4*3] = t12 ^ t07;
t17 = in[4*0] ^ in[4*1];
t18 = out[4*2] ^ t15;
out[4*0] = t17 ^ t18;
}
/* InvS6: 15 10 1 13 5 3 6 0 4 9 14 7 2 12 8 11 */
/* depth = 5,3,8,6, Total gates=19 */
static void sb6_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb6_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t12, t13, t14, t15, t16, t17, t01;
t01 = in[4*0+i] ^ in[4*2+i];
t02 = ~ in[4*2+i];
t03 = in[4*1+i] & t01;
t04 = in[4*1+i] | t02;
t05 = in[4*3+i] | t03;
t06 = in[4*1+i] ^ in[4*3+i];
t07 = in[4*0+i] & t04;
t08 = in[4*0+i] | t02;
t01 = in[4*0] ^ in[4*2];
t02 = ~ in[4*2];
t03 = in[4*1] & t01;
t04 = in[4*1] | t02;
t05 = in[4*3] | t03;
t06 = in[4*1] ^ in[4*3];
t07 = in[4*0] & t04;
t08 = in[4*0] | t02;
t09 = t07 ^ t05;
out[4*1+i] = t06 ^ t08;
out[4*0+i] = ~ t09;
t12 = in[4*1+i] & out[4*0+i];
out[4*1] = t06 ^ t08;
out[4*0] = ~ t09;
t12 = in[4*1] & out[4*0];
t13 = t01 & t05;
t14 = t01 ^ t12;
t15 = t07 ^ t13;
t16 = in[4*3+i] | t02;
t17 = in[4*0+i] ^ out[4*1+i];
out[4*3+i] = t17 ^ t15;
out[4*2+i] = t16 ^ t14;
t16 = in[4*3] | t02;
t17 = in[4*0] ^ out[4*1];
out[4*3] = t17 ^ t15;
out[4*2] = t16 ^ t14;
}
/* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */
/* depth = 10,7,10,4, Total gates=19 */
static void sb7(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb7(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t13, t14, t15, t16, t17, t01;
t01 = in[4*0+i] & in[4*2+i];
t02 = ~ in[4*3+i];
t03 = in[4*0+i] & t02;
t04 = in[4*1+i] | t01;
t05 = in[4*0+i] & in[4*1+i];
t06 = in[4*2+i] ^ t04;
out[4*3+i] = t03 ^ t06;
t08 = in[4*2+i] | out[4*3+i];
t09 = in[4*3+i] | t05;
t10 = in[4*0+i] ^ t08;
t11 = t04 & out[4*3+i];
out[4*1+i] = t09 ^ t10;
t13 = in[4*1+i] ^ out[4*1+i];
t14 = t01 ^ out[4*1+i];
t15 = in[4*2+i] ^ t05;
t01 = in[4*0] & in[4*2];
t02 = ~ in[4*3];
t03 = in[4*0] & t02;
t04 = in[4*1] | t01;
t05 = in[4*0] & in[4*1];
t06 = in[4*2] ^ t04;
out[4*3] = t03 ^ t06;
t08 = in[4*2] | out[4*3];
t09 = in[4*3] | t05;
t10 = in[4*0] ^ t08;
t11 = t04 & out[4*3];
out[4*1] = t09 ^ t10;
t13 = in[4*1] ^ out[4*1];
t14 = t01 ^ out[4*1];
t15 = in[4*2] ^ t05;
t16 = t11 | t13;
t17 = t02 | t14;
out[4*0+i] = t15 ^ t17;
out[4*2+i] = in[4*0+i] ^ t16;
out[4*0] = t15 ^ t17;
out[4*2] = in[4*0] ^ t16;
}
/* InvS7: 3 0 6 13 9 14 15 8 5 12 11 7 10 1 4 2 */
/* depth = 9,7,3,3, Total gates=18 */
static void sb7_inv(uint8_t* out, const uint8_t* in, uint8_t i){
static void sb7_inv(uint8_t* out, const uint8_t* in){
uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t13, t14, t15, t16, t01;
t01 = in[4*0+i] & in[4*1+i];
t02 = in[4*0+i] | in[4*1+i];
t03 = in[4*2+i] | t01;
t04 = in[4*3+i] & t02;
out[4*3+i] = t03 ^ t04;
t06 = in[4*1+i] ^ t04;
t07 = in[4*3+i] ^ out[4*3+i];
t01 = in[4*0] & in[4*1];
t02 = in[4*0] | in[4*1];
t03 = in[4*2] | t01;
t04 = in[4*3] & t02;
out[4*3] = t03 ^ t04;
t06 = in[4*1] ^ t04;
t07 = in[4*3] ^ out[4*3];
t08 = ~ t07;
t09 = t06 | t08;
t10 = in[4*1+i] ^ in[4*3+i];
t11 = in[4*0+i] | in[4*3+i];
out[4*1+i] = in[4*0+i] ^ t09;
t13 = in[4*2+i] ^ t06;
t14 = in[4*2+i] & t11;
t15 = in[4*3+i] | out[4*1+i];
t10 = in[4*1] ^ in[4*3];
t11 = in[4*0] | in[4*3];
out[4*1] = in[4*0] ^ t09;
t13 = in[4*2] ^ t06;
t14 = in[4*2] & t11;
t15 = in[4*3] | out[4*1];
t16 = t01 | t10;
out[4*0+i] = t13 ^ t15;
out[4*2+i] = t14 ^ t16;
out[4*0] = t13 ^ t15;
out[4*2] = t14 ^ t16;
}
typedef void(*sb_fpt)(uint8_t*, const uint8_t*, uint8_t i);
typedef void(*sb_fpt)(uint8_t*, const uint8_t*);
sb_fpt sf_tab[] PROGMEM = {
sb0, sb1, sb2, sb3,
@ -453,7 +453,7 @@ void sbox128(void * w, uint8_t box){
sb_fpt fp;
fp = (sb_fpt)pgm_read_word(&(sf_tab[box]));
for(i=0; i<4; ++i){
fp(buffer, (uint8_t*)w, i);
fp(buffer+i, (uint8_t*)w+i);
}
memcpy(w, buffer, 16);
}
@ -465,7 +465,7 @@ void inv_sbox128(void * w, uint8_t box){
sb_fpt fp;
fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box]));
for(i=0; i<4; ++i){
fp(buffer, (uint8_t*)w, i);
fp(buffer+i, (uint8_t*)w+i);
}
memcpy(w, buffer, 16);
}