small update
This commit is contained in:
parent
c079867a33
commit
33a2839546
|
@ -118,7 +118,8 @@ def print_reg_map(map, regs, length, indent=0)
|
||||||
if map[x][r]==nil
|
if map[x][r]==nil
|
||||||
print ' '
|
print ' '
|
||||||
else
|
else
|
||||||
print map[x][r]
|
print map[x][r] if map[x][r].class == String
|
||||||
|
print map[x][r].to_s(36) if map[x][r].class == Fixnum
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
print "\n"
|
print "\n"
|
||||||
|
@ -195,7 +196,8 @@ def bits_set_simple(x)
|
||||||
end
|
end
|
||||||
|
|
||||||
def init_bitcount_lut
|
def init_bitcount_lut
|
||||||
(0..(2**8-1)).each {|x| $bitcount_lut[x] = bits_set_simple(x)}
|
(0..(2**4-1)).each {|x| $bitcount_lut[x] = bits_set_simple(x)}
|
||||||
|
((2**4)..(2**8-1)).each {|x| $bitcount_lut[x] = bits_set(x, 4)}
|
||||||
((2**8)..(2**16-1)).each {|x| $bitcount_lut[x] = bits_set(x, 8)}
|
((2**8)..(2**16-1)).each {|x| $bitcount_lut[x] = bits_set(x, 8)}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -221,7 +223,7 @@ def decode_word(word)
|
||||||
return r
|
return r
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate_c_code(fout, func, optimizations=[], reg_map=[])
|
def generate_c_code(fout, func, optimizations=[], reg_map=[], use_map=[])
|
||||||
out_interval = 3
|
out_interval = 3
|
||||||
out_modulus = 16
|
out_modulus = 16
|
||||||
out_idx = 0
|
out_idx = 0
|
||||||
|
@ -259,10 +261,10 @@ def generate_c_code(fout, func, optimizations=[], reg_map=[])
|
||||||
opt_table[step] << [sign_a, sign_b, sign_out, reg_name, reg_a, reg_b, set, free]
|
opt_table[step] << [sign_a, sign_b, sign_out, reg_name, reg_a, reg_b, set, free]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
puts 'DBG: '+opt_table.inspect
|
|
||||||
(0..(func.length-1)).each do |i|
|
(0..(func.length-1)).each do |i|
|
||||||
fout.printf("q[%2d] = ", out_idx)
|
fout.printf("q[%2d] = ", out_idx)
|
||||||
out_idx = (out_idx+out_interval)%out_modulus
|
out_idx = (out_idx+out_interval)%out_modulus
|
||||||
|
use_map << Array.new
|
||||||
func[i].each do |j|
|
func[i].each do |j|
|
||||||
skip = 0
|
skip = 0
|
||||||
if opt_table[i]
|
if opt_table[i]
|
||||||
|
@ -271,12 +273,15 @@ def generate_c_code(fout, func, optimizations=[], reg_map=[])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
fout.printf("%st[%2d] ", j[0].chr, j[1..-1].to_i) if skip==0
|
fout.printf("%st[%2d] ", j[0].chr, j[1..-1].to_i) if skip==0
|
||||||
|
use_map[-1] << j[1..-1].to_i if skip==0
|
||||||
end
|
end
|
||||||
if opt_table[i]
|
if opt_table[i]
|
||||||
opt_table[i].each do |opt|
|
opt_table[i].each do |opt|
|
||||||
fout.print(opt[2]+'('+opt[3])
|
fout.print(opt[2]+'('+opt[3])
|
||||||
if opt[6]
|
if opt[6]
|
||||||
fout.printf('=%st[%2d]%st[%2d]',opt[0],opt[4].to_i,opt[1],opt[5].to_i)
|
fout.printf('=%st[%2d]%st[%2d]',opt[0],opt[4].to_i,opt[1],opt[5].to_i)
|
||||||
|
use_map[-1] << opt[4].to_i
|
||||||
|
use_map[-1] << opt[5].to_i
|
||||||
end
|
end
|
||||||
fout.print(') ')
|
fout.print(') ')
|
||||||
end
|
end
|
||||||
|
@ -285,6 +290,67 @@ def generate_c_code(fout, func, optimizations=[], reg_map=[])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class Array
|
||||||
|
def find_max_index
|
||||||
|
return nil if self.length==0
|
||||||
|
maxidx=0
|
||||||
|
max=self[0]
|
||||||
|
self.each do |i|
|
||||||
|
if(self[i]!=nil && max<self[i])
|
||||||
|
maxidx = i
|
||||||
|
max = self[i]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return maxidx
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_load_pressure(use_map, use_locations, regs, steps)
|
||||||
|
loads=0
|
||||||
|
reg_map = Array.new(steps)
|
||||||
|
(0..(reg_map.length-1)).each{|i| reg_map[i]=Array.new(regs)}
|
||||||
|
(0..(steps-1)).each do |step|
|
||||||
|
use_locations.each do |e|
|
||||||
|
e.pop if e[-1] && e[-1]<step
|
||||||
|
end
|
||||||
|
local_use_map = Array.new(regs)
|
||||||
|
reg_map[step] = reg_map[step-1].clone if step>0
|
||||||
|
#(0..(regs-1)).each {|i| reg_map[step][i] = reg_map[step-1][i]}
|
||||||
|
use_map[step].each do |entry|
|
||||||
|
# print 'DBG: step='+step.to_s+' entry='+entry.to_s
|
||||||
|
found = reg_map[step].find_index(entry)
|
||||||
|
if found!=nil
|
||||||
|
# print ' (direct)'
|
||||||
|
reg_map[step][found] = entry
|
||||||
|
local_use_map[found] = 1
|
||||||
|
else
|
||||||
|
loads += 1
|
||||||
|
if t0=reg_map[step].find_index(nil)
|
||||||
|
# print ' (found unsused slot)'
|
||||||
|
reg_map[step][t0] = entry
|
||||||
|
local_use_map[t0] = 1
|
||||||
|
else
|
||||||
|
# find a register to clear
|
||||||
|
a = reg_map[step].collect {|e| use_locations[e][-1]}
|
||||||
|
if t1 = a.find_index(nil)
|
||||||
|
# print ' (found not further used slot)'
|
||||||
|
reg_map[step][t1] = entry
|
||||||
|
else
|
||||||
|
# print ' (reassigned slot)'
|
||||||
|
reg_map[step][a.find_max_index] = entry
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
# print "\n"
|
||||||
|
end
|
||||||
|
# puts 'DBG: map part ('+step.to_s+'): '+reg_map[step].inspect
|
||||||
|
end
|
||||||
|
return loads, reg_map
|
||||||
|
end
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
|
||||||
(0..15).each do |i|
|
(0..15).each do |i|
|
||||||
(0..3). each do |j|
|
(0..3). each do |j|
|
||||||
((j+1)..4).each do |k|
|
((j+1)..4).each do |k|
|
||||||
|
@ -355,25 +421,31 @@ puts "initializing bitcount table..."
|
||||||
init_bitcount_lut
|
init_bitcount_lut
|
||||||
|
|
||||||
puts "collision free combinations:"
|
puts "collision free combinations:"
|
||||||
max = 0
|
puts "(from cache)"
|
||||||
combinations = Array.new
|
combinations = [354997, 94005, 93877]
|
||||||
percent = 0
|
if combinations==nil
|
||||||
percent_step =(2**dublets.length-1)/10000.0
|
max = 0
|
||||||
next_step = (2**dublets.length-1)
|
combinations = Array.new
|
||||||
puts ''
|
percent = 0
|
||||||
(2**dublets.length-1).downto(0) do |x|
|
percent_step =(2**dublets.length-1)/10000.0
|
||||||
if(x<=next_step)
|
next_step = (2**dublets.length-1)
|
||||||
print "\x1b[s "+sprintf("%5.2f%%", percent/100.0)+"\x1b[u"
|
puts ''
|
||||||
percent += 1
|
(2**dublets.length-1).downto(0) do |x|
|
||||||
next_step -= percent_step
|
if(x<=next_step)
|
||||||
end
|
print "\x1b[s "+sprintf("%5.2f%%", percent/100.0)+"\x1b[u"
|
||||||
if check_collision(x, collision_lut) == false
|
percent += 1
|
||||||
if bits_set(x)>= max
|
next_step -= percent_step
|
||||||
combinations = Array.new if bits_set(x)>max
|
end
|
||||||
combinations << x
|
if check_collision(x, collision_lut) == false
|
||||||
max = bits_set(x)
|
if bits_set(x)>= max
|
||||||
|
combinations = Array.new if bits_set(x)>max
|
||||||
|
combinations << x
|
||||||
|
max = bits_set(x)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
puts 'DBG: combinations: '+combinations.inspect
|
||||||
end
|
end
|
||||||
|
|
||||||
combinations.each do |c|
|
combinations.each do |c|
|
||||||
|
@ -383,4 +455,33 @@ combinations.each do |c|
|
||||||
end
|
end
|
||||||
steps = word_to_steps(combinations[-1], dublets)
|
steps = word_to_steps(combinations[-1], dublets)
|
||||||
regs, reg_map = reg_map(steps, f0_def.length)
|
regs, reg_map = reg_map(steps, f0_def.length)
|
||||||
generate_c_code(STDOUT, f0_def,steps, reg_map)
|
use_map = []
|
||||||
|
generate_c_code(STDOUT, f0_def,steps, reg_map, use_map)
|
||||||
|
puts 'DBG: '
|
||||||
|
use_map.each do |q|
|
||||||
|
print "\t[ "
|
||||||
|
print q.collect {|v| v.to_s(16)}.join(', ')
|
||||||
|
print " ]\n"
|
||||||
|
end
|
||||||
|
reg_use_locations = Array.new(f0_def.length)
|
||||||
|
(0..(reg_use_locations.length-1)).each{|x| reg_use_locations[x] = Array.new}
|
||||||
|
|
||||||
|
(0..(f0_def.length-1)).each do |i|
|
||||||
|
use_map[i].each do |x|
|
||||||
|
reg_use_locations[x] << i
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
reg_use_locations.each{|x| x.reverse!}
|
||||||
|
#puts 'DBG: '+reg_use_locations.inspect
|
||||||
|
#puts 'DBG: (16 regs) '+calculate_load_pressure(use_map, reg_use_locations, 16, 16).inspect
|
||||||
|
#puts 'DBG: ( 8 regs) '+calculate_load_pressure(use_map, reg_use_locations, 8, 16).inspect
|
||||||
|
(4..16).each do |regs|
|
||||||
|
p,m = calculate_load_pressure(use_map, reg_use_locations, regs, 16)
|
||||||
|
puts "=#{regs} registers="
|
||||||
|
puts " load pressure: " +p.to_s
|
||||||
|
puts " map: "
|
||||||
|
print_reg_map(m, regs, 16, 4)
|
||||||
|
# puts "DBG: reg_map: "+m.inspect
|
||||||
|
# puts "DBG: use_map: "+use_map.inspect
|
||||||
|
end
|
||||||
|
|
|
@ -104,7 +104,7 @@
|
||||||
#define R64_6(x) (ROTR64((x), 21))
|
#define R64_6(x) (ROTR64((x), 21))
|
||||||
#define R64_7(x) (ROTR64((x), 11))
|
#define R64_7(x) (ROTR64((x), 11))
|
||||||
|
|
||||||
#include "f1_autogen_large.c"
|
#include "f1_autogen_large.i"
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void bmw_large_f0(uint64_t* q, uint64_t* h, const uint64_t* m){
|
void bmw_large_f0(uint64_t* q, uint64_t* h, const uint64_t* m){
|
||||||
|
|
|
@ -107,7 +107,7 @@
|
||||||
#define R32_7(x) (ROTR32((x), 5))
|
#define R32_7(x) (ROTR32((x), 5))
|
||||||
|
|
||||||
|
|
||||||
#include "f1_autogen.c"
|
#include "f1_autogen.i"
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void bmw_small_f0(uint32_t* q, uint32_t* h, const uint32_t* m){
|
void bmw_small_f0(uint32_t* q, uint32_t* h, const uint32_t* m){
|
||||||
|
@ -150,39 +150,39 @@ void bmw_small_f0(uint32_t* q, uint32_t* h, const uint32_t* m){
|
||||||
q[13] = (t[ 2] + t[ 4] + t[ 7] + t[10] + t[11]);
|
q[13] = (t[ 2] + t[ 4] + t[ 7] + t[10] + t[11]);
|
||||||
*/
|
*/
|
||||||
q[ 0] = +t[ 5] +t[10] +t[13] +(tr1=-t[ 7]+t[14]) ;
|
q[ 0] = +t[ 5] +t[10] +t[13] +(tr1=-t[ 7]+t[14]) ;
|
||||||
|
q[ 0] = S32_0(q[ 0]) + h[ 1];
|
||||||
q[ 3] = +t[ 8] +t[13] +t[ 0] +(tr2=-t[ 1]-t[10]) ;
|
q[ 3] = +t[ 8] +t[13] +t[ 0] +(tr2=-t[ 1]-t[10]) ;
|
||||||
|
q[ 3] = S32_3(q[ 3]) + h[ 4];
|
||||||
q[ 6] = -t[11] +t[13] -t[ 0] -t[ 3] +t[ 4] ;
|
q[ 6] = -t[11] +t[13] -t[ 0] -t[ 3] +t[ 4] ;
|
||||||
|
q[ 6] = S32_1(q[ 6]) + h[ 7];
|
||||||
q[ 9] = +t[ 0] +(tr0=-t[ 3]+t[ 6]) +(tr1) ;
|
q[ 9] = +t[ 0] +(tr0=-t[ 3]+t[ 6]) +(tr1) ;
|
||||||
|
q[ 9] = S32_4(q[ 9]) + h[10];
|
||||||
q[12] = -t[ 9] -(tr0) -(tr2) ;
|
q[12] = -t[ 9] -(tr0) -(tr2) ;
|
||||||
|
q[12] = S32_2(q[12]) + h[13];
|
||||||
q[15] = -t[ 4] +(tr0=-t[ 9]+t[12]) +(tr1=-t[ 6]+t[13]) ;
|
q[15] = -t[ 4] +(tr0=-t[ 9]+t[12]) +(tr1=-t[ 6]+t[13]) ;
|
||||||
|
q[15] = S32_0(q[15]) + h[ 0];
|
||||||
q[ 2] = +t[ 7] +t[15] +t[ 0] -(tr0) ;
|
q[ 2] = +t[ 7] +t[15] +t[ 0] -(tr0) ;
|
||||||
|
q[ 2] = S32_2(q[ 2]) + h[ 3];
|
||||||
q[ 5] = +t[10] +(tr0=-t[ 2]+t[15]) +(tr2=+t[ 3]-t[12]) ;
|
q[ 5] = +t[10] +(tr0=-t[ 2]+t[15]) +(tr2=+t[ 3]-t[12]) ;
|
||||||
|
q[ 5] = S32_0(q[ 5]) + h[ 6];
|
||||||
q[ 8] = -t[ 5] -(tr0) +(tr1) ;
|
q[ 8] = -t[ 5] -(tr0) +(tr1) ;
|
||||||
|
q[ 8] = S32_3(q[ 8]) + h[ 9];
|
||||||
q[11] = -t[ 0] -t[ 2] +t[ 9] +(tr0=-t[ 5]+t[ 8]) ;
|
q[11] = -t[ 0] -t[ 2] +t[ 9] +(tr0=-t[ 5]+t[ 8]) ;
|
||||||
|
q[11] = S32_1(q[11]) + h[12];
|
||||||
q[14] = -t[11] +(tr0) +(tr2) ;
|
q[14] = -t[11] +(tr0) +(tr2) ;
|
||||||
|
q[14] = S32_4(q[14]) + h[15];
|
||||||
q[ 1] = +t[ 6] +(tr0=+t[11]+t[14]) +(tr1=-t[ 8]-t[15]) ;
|
q[ 1] = +t[ 6] +(tr0=+t[11]+t[14]) +(tr1=-t[ 8]-t[15]) ;
|
||||||
|
q[ 1] = S32_1(q[ 1]) + h[ 2];
|
||||||
q[ 4] = +t[ 9] +t[ 1] +t[ 2] -(tr0) ;
|
q[ 4] = +t[ 9] +t[ 1] +t[ 2] -(tr0) ;
|
||||||
|
q[ 4] = S32_4(q[ 4]) + h[ 5];
|
||||||
q[ 7] = -t[12] -t[14] +t[ 1] -t[ 4] -t[ 5] ;
|
q[ 7] = -t[12] -t[14] +t[ 1] -t[ 4] -t[ 5] ;
|
||||||
|
q[ 7] = S32_2(q[ 7]) + h[ 8];
|
||||||
q[10] = -t[ 1] +(tr0=-t[ 4]-t[ 7]) -(tr1) ;
|
q[10] = -t[ 1] +(tr0=-t[ 4]-t[ 7]) -(tr1) ;
|
||||||
|
q[10] = S32_0(q[10]) + h[11];
|
||||||
q[13] = +t[ 2] +t[10] +t[11] -(tr0) ;
|
q[13] = +t[ 2] +t[10] +t[11] -(tr0) ;
|
||||||
|
q[13] = S32_3(q[13]) + h[14];
|
||||||
|
|
||||||
dump_x(q, 16, 'W');
|
dump_x(q, 16, 'W');
|
||||||
q[ 0] = S32_0(q[ 0]) + h[ 1];
|
|
||||||
q[ 1] = S32_1(q[ 1]) + h[ 2];
|
|
||||||
q[ 2] = S32_2(q[ 2]) + h[ 3];
|
|
||||||
q[ 3] = S32_3(q[ 3]) + h[ 4];
|
|
||||||
q[ 4] = S32_4(q[ 4]) + h[ 5];
|
|
||||||
q[ 5] = S32_0(q[ 5]) + h[ 6];
|
|
||||||
q[ 6] = S32_1(q[ 6]) + h[ 7];
|
|
||||||
q[ 7] = S32_2(q[ 7]) + h[ 8];
|
|
||||||
q[ 8] = S32_3(q[ 8]) + h[ 9];
|
|
||||||
q[ 9] = S32_4(q[ 9]) + h[10];
|
|
||||||
q[10] = S32_0(q[10]) + h[11];
|
|
||||||
q[11] = S32_1(q[11]) + h[12];
|
|
||||||
q[12] = S32_2(q[12]) + h[13];
|
|
||||||
q[13] = S32_3(q[13]) + h[14];
|
|
||||||
q[14] = S32_4(q[14]) + h[15];
|
|
||||||
q[15] = S32_0(q[15]) + h[ 0];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
|
|
@ -29,7 +29,6 @@
|
||||||
#include "ubi.h"
|
#include "ubi.h"
|
||||||
#include "skein.h"
|
#include "skein.h"
|
||||||
|
|
||||||
#include "cli.h"
|
|
||||||
|
|
||||||
void skein256_init(skein256_ctx_t* ctx, uint16_t outsize_b){
|
void skein256_init(skein256_ctx_t* ctx, uint16_t outsize_b){
|
||||||
skein_config_t conf;
|
skein_config_t conf;
|
||||||
|
|
Loading…
Reference in New Issue