# analyze_f0.rb =begin This file is part of the ARM-Crypto-Lib. Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . =end =begin q[ 0] = (+ h[ 5] - h[ 7] + h[10] + h[13] + h[14]); q[ 3] = (+ h[ 8] - h[10] + h[13] + h[ 0] - h[ 1]); q[ 6] = (- h[11] + h[13] - h[ 0] - h[ 3] + h[ 4]); q[ 9] = (+ h[14] + h[ 0] - h[ 3] + h[ 6] - h[ 7]); q[12] = (+ h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]); q[15] = (- h[ 4] - h[ 6] - h[ 9] + h[12] + h[13]); q[ 2] = (+ h[ 7] + h[ 9] - h[12] + h[15] + h[ 0]); q[ 5] = (+ h[10] - h[12] + h[15] - h[ 2] + h[ 3]); q[ 8] = (+ h[13] - h[15] + h[ 2] - h[ 5] - h[ 6]); q[11] = (- h[ 0] - h[ 2] - h[ 5] + h[ 8] + h[ 9]); q[14] = (+ h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]); q[ 1] = (+ h[ 6] - h[ 8] + h[11] + h[14] - h[15]); q[ 4] = (+ h[ 9] - h[11] - h[14] + h[ 1] + h[ 2]); q[ 7] = (- h[12] - h[14] + h[ 1] - h[ 4] - h[ 5]); q[10] = (+ h[15] - h[ 1] - h[ 4] - h[ 7] + h[ 8]); q[13] = (+ h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]); =end f0_def = [ [ '+5', '-7', '+10', '+13', '+14'], [ '+8', '-10', '+13', '+0', '-1'], [ '-11', '+13', '-0', '-3', '+4'], [ '+14', '+0', '-3', '+6', '-7'], [ '+1', '+3', '-6', '-9', '+10'], [ '-4', '-6', '-9', '+12', '+13'], [ '+7', '+9', '-12', '+15', '+0'], [ '+10', '-12', '+15', '-2', '+3'], [ '+13', '-15', '+2', '-5', '-6'], [ '-0', '-2', '-5', '+8', '+9'], [ '+3', '-5', '+8', '-11', '-12'], [ '+6', '-8', '+11', '+14', '-15'], [ '+9', '-11', '-14', '+1', '+2'], [ '-12', '-14', '+1', '-4', '-5'], [ '+15', '-1', '-4', '-7', '+8'], [ '+2', '+4', '+7', '+10', '+11'] ] $stat=Hash.new $stat_location=Hash.new def set_stat(s1,s2,i) if s2.to_i.absmax_reg end return max_reg, map end def word_to_steps(word, list) steps=Array.new idx=0 while(word!=0) if(word&1==1) steps << list[idx] end word >>= 1 idx += 1 end return steps end def print_collision_map(collisions, length) print ' ' (0..(length-1)).each {|x| print ('A'[0]+x).chr+' '} (0..(length-1)).each do |y| print "\n"+('A'[0]+y).chr+' ' (0..(length-1)).each do |x| if(collisions.find_index(('A'[0]+x).chr+('A'[0]+y).chr)) or (collisions.find_index(('A'[0]+y).chr+('A'[0]+x).chr)) print('x ') else print(' ') end end end print("\n") end def check_collision(word, lut) (0..(lut.length-1)).each do |i| if word&(1<>=1 end return r end def init_bitcount_lut (0..(2**4-1)).each {|x| $bitcount_lut[x] = bits_set_simple(x)} ((2**4)..(2**8-1)).each {|x| $bitcount_lut[x] = bits_set(x, 4)} ((2**8)..(2**16-1)).each {|x| $bitcount_lut[x] = bits_set(x, 8)} end def bits_set(x, length=16) r=0 while(x!=0) r += $bitcount_lut[x&(2**length-1)] x >>= length end return r end def decode_word(word) idx='A' r = '' while(word!=0) if(word&1==1) r += idx end word >>= 1 idx = (idx[0]+1).chr end return r end def generate_c_code(fout, func, optimizations=[], reg_map=[], use_map=[]) out_interval = 3 out_modulus = 16 out_idx = 0 opt_table = Array.new optimizations.each do |opt| opt_steps = opt[2].split(',') opt_steps.collect! {|x| x.to_i} opt_steps.each do |step| reg_a = opt[0].split(',')[0] reg_b = opt[0].split(',')[1] sign_a = '+' if func[step].find_index('+'+reg_a) sign_a = '-' if func[step].find_index('-'+reg_a) sign_b = '+' if func[step].find_index('+'+reg_b) sign_b = '-' if func[step].find_index('-'+reg_b) set = false free = false if step==opt_steps[0] sign_out='+' set=true else i=0 while opt_table[opt_steps[0]][i][4]!=reg_a || opt_table[opt_steps[0]][i][5]!=reg_b i+=1 end if(sign_a==opt_table[opt_steps[0]][i][0]) sign_out='+' else sign_out='-' end end free = true if step==opt_steps[-1] reg_number = reg_map[step].find_index(opt[3]) reg_name = sprintf("tr%d", reg_number) opt_table[step] = Array.new if opt_table[step]==nil opt_table[step] << [sign_a, sign_b, sign_out, reg_name, reg_a, reg_b, set, free] end end (0..(func.length-1)).each do |i| fout.printf("q[%2d] = ", out_idx) out_idx = (out_idx+out_interval)%out_modulus use_map << Array.new func[i].each do |j| skip = 0 if opt_table[i] opt_table[i].each do |opt| skip = 1 if opt[4]==j[1..-1] or opt[5]==j[1..-1] end end fout.printf("%st[%2d] ", j[0].chr, j[1..-1].to_i) if skip==0 use_map[-1] << j[1..-1].to_i if skip==0 end if opt_table[i] opt_table[i].each do |opt| fout.print(opt[2]+'('+opt[3]) if opt[6] fout.printf('=%st[%2d]%st[%2d]',opt[0],opt[4].to_i,opt[1],opt[5].to_i) use_map[-1] << opt[4].to_i use_map[-1] << opt[5].to_i end fout.print(') ') end end fout.print(";\n") end end class Array def find_max_index return nil if self.length==0 maxidx=0 max=self[0] self.each do |i| if(self[i]!=nil && max0 #(0..(regs-1)).each {|i| reg_map[step][i] = reg_map[step-1][i]} use_map[step].each do |entry| # print 'DBG: step='+step.to_s+' entry='+entry.to_s found = reg_map[step].find_index(entry) if found!=nil # print ' (direct)' reg_map[step][found] = entry local_use_map[found] = 1 else loads += 1 if t0=reg_map[step].find_index(nil) # print ' (found unsused slot)' reg_map[step][t0] = entry local_use_map[t0] = 1 else # find a register to clear a = reg_map[step].collect {|e| use_locations[e][-1]} if t1 = a.find_index(nil) # print ' (found not further used slot)' reg_map[step][t1] = entry else # print ' (reassigned slot)' reg_map[step][a.find_max_index] = entry end end end # print "\n" end # puts 'DBG: map part ('+step.to_s+'): '+reg_map[step].inspect end return loads, reg_map end ################################################################################ (0..15).each do |i| (0..3). each do |j| ((j+1)..4).each do |k| set_stat(f0_def[i][j], f0_def[i][k], i) end end end dublets = Array.new $stat.each_pair do |key,value| if value[0]+value[3]>1 || value[1]+value[2]>1 # puts key+": \t"+value.inspect+": \t"+$stat_location[key] dublets << [key, value, $stat_location[key]] end end dublets.sort! do |x,y| t = x[2].split(',') p = t[1].to_i - t[0].to_i t = y[2].split(',') q = t[1].to_i - t[0].to_i if (p!=q) (p-q) else (x[2].split(',')[0].to_i-y[2].split(',')[0].to_i) end end idx='A' dublets.each {|e| e << idx; idx=(idx[0]+1).chr} puts "useful combinations:" dublets.each {|e| puts e[3]+': '+e[0]+' '*(5-e[0].length)+" \t"+e[1].inspect+" \t" +e[2]} collisions = Array.new puts "searching for collisions: " (0..(dublets.length-2)).each do |i| ((i+1)..(dublets.length-1)).each do |j| if collision(dublets[i], dublets[j]) print '*' collisions << dublets[i][3]+dublets[j][3] else print '.' end end end puts '' #puts "collisions: " #puts collisions.join(',') #puts "collision-map: " #print_collision_map(collisions, dublets.length) collision_lut = Array.new(dublets.length, 0) (0..(dublets.length-1)).each do |x| (0..(dublets.length-1)).each do |y| if collisions.find_index(('A'[0]+x).chr+('A'[0]+y).chr) or collisions.find_index(('A'[0]+y).chr+('A'[0]+x).chr) collision_lut[x] |= 1<= max combinations = Array.new if bits_set(x)>max combinations << x max = bits_set(x) end end end puts 'DBG: combinations: '+combinations.inspect end combinations.each do |c| regs, reg_map = reg_map(word_to_steps(c, dublets), f0_def.length) puts bits_set(c).to_s+': '+decode_word(c)+' ( '+(regs+1).to_s+' registers)' print_reg_map(reg_map, regs+1, f0_def.length, 4) end steps = word_to_steps(combinations[-1], dublets) regs, reg_map = reg_map(steps, f0_def.length) use_map = [] generate_c_code(STDOUT, f0_def,steps, reg_map, use_map) puts 'DBG: ' use_map.each do |q| print "\t[ " print q.collect {|v| v.to_s(16)}.join(', ') print " ]\n" end reg_use_locations = Array.new(f0_def.length) (0..(reg_use_locations.length-1)).each{|x| reg_use_locations[x] = Array.new} (0..(f0_def.length-1)).each do |i| use_map[i].each do |x| reg_use_locations[x] << i end end reg_use_locations.each{|x| x.reverse!} #puts 'DBG: '+reg_use_locations.inspect #puts 'DBG: (16 regs) '+calculate_load_pressure(use_map, reg_use_locations, 16, 16).inspect #puts 'DBG: ( 8 regs) '+calculate_load_pressure(use_map, reg_use_locations, 8, 16).inspect (4..16).each do |regs| p,m = calculate_load_pressure(use_map, reg_use_locations, regs, 16) puts "=#{regs} registers=" puts " load pressure: " +p.to_s puts " map: " print_reg_map(m, regs, 16, 4) # puts "DBG: reg_map: "+m.inspect # puts "DBG: use_map: "+use_map.inspect end