# autogen f1 function for BMW
=begin
This file is part of the ARM-Crypto-Lib.
Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
=end
header = <.
*/
static inline
void bmw_small_f1(uint32_t* q, const uint32_t* m, const uint32_t* h){
uint32_t even, odd;
uint32x4_t qq16, qq20, qq24, qq28;
uint32x4_t qm0, qm1, qm2;
uint32x4_t qk={0x55555550UL, 0x5aaaaaa5UL, 0x5ffffffaUL, 0x6555554fUL};
uint32x4_t qkadd={0x15555554UL, 0x15555554UL, 0x15555554UL, 0x15555554UL};
uint32x2_t dtmp0;
uint32x4x2_t q2tmp0, q2tmp1;
EOF
footer = <>1)|q[%2d]);\n", i+16, (i%2==0)?"even":"odd ", i+14, i+14)
s += sprintf(" qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}),\n" \
" vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16}));\n")
s += sprintf(" qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}),\n" \
" vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2}));\n")
s += sprintf(" qm1 = vaddq_u32(qm1, qm0);\n")
s += sprintf(" dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1));\n")
s += sprintf(" q[%2d] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1);\n", i+16)
return s
end
puts header
[16,20,24,28].each {|x| puts gen_addElement(x)}
(0..1).each {|x| puts gen_expand_1(x)}
(2..15).each {|x| puts gen_expand_2(x, 2)}
puts footer