from operator import itemgetter from scipy.stats import rv_discrete import numpy as np import sys, math from joblib import Parallel, delayed import random #verbose=True verbose=False delta = 0.1 map2threshold = 0 ni = {} zi = {} table = [] # rows: 0 - empty, 1...s; columns 0,1...s table_ids = ['F00'] ignore = [] tests = [] faults = [] non_triv_faults = [] Map1 = {} # for every j, which i has [i,j][?] = 0 Map2 = {} # for every j, which i has [i,j][0] < .5 def compute_num_samples(mu1, mu2): if mu1[2] != 0 or mu2[2] != 0: raise ZeroDivisionError p = mu1[0] return math.log(delta)/(1+0.5*math.log(p*(1-p), 2)) + 1 choosetable = {} def choose(n, k): """ A fast way to calculate binomial coefficients by Andrew Dalke (contrib). """ if str((n,k)) in choosetable: return choosetable[str((n,k))] ret = 0 if 0 <= k <= n: ntok = 1 ktok = 1 for t in xrange(1, min(k, n - k) + 1): ntok *= n ktok *= t n -= 1 ret = ntok // ktok else: ret = 0 choosetable[str((n,k))] = ret return ret def partial_prob(n,mu): b = mu[0] c = mu[1] if b+c == 0: return 0 s = 0 print "(%d,%s) " % (n,mu), try: num1 = 1/(1 + c/b) num2 = 1/(1 + b/c) for a in range((n/2 + 1),n+1): s = s + choose(n,a)*(num1**a)*(num2**(n-a)) if s >= 1-delta: return s if verbose: print "partial_prob(%d,%s)=%lf with b=%lf c=%lf, denom=%lf" % (n,mu,s,b,c,(b+c)**n) return s except (ZeroDivisionError, OverflowError): return 0 def parse_file(file): fault_cnt = 0 ignore_index = [] with open(file) as f: for line in f: line = line.strip() line_parts = line.split('='); prefix = line_parts[0].split(':') if float(prefix[1]) - 0.5 == 0: ignore.append(prefix[0]) ignore_index.append(fault_cnt) continue tuples = line_parts[1].split('|') fault_triples = [eval(t) for t in tuples] for t in fault_triples: if (t[0] + t[1] + t[2] - 1.0) != 0.0: print "Exception ", line, " for ", t[0], t[1], t[2], " , sum=", t[0] + t[1] + t[2] table.append(fault_triples) table_ids.append(prefix[0]) if verbose: print "=== Fault-%s mapped to Test-%d ===" % (prefix[0], fault_cnt) fault_cnt += 1 if len(ignore_index) > 0: if verbose: print "************** Modified Table ****************" # also remove columns Fi for i in ignore for t in table: for i in ignore_index: t.pop(i+1) if verbose: print t if verbose: print "**********************************************" # add empty row in beginning for easy indexing table.insert(0,[0]*(len(table)+1)) return (table, ignore) def init(f): global num_gates, table, ignore global tests, faults, non_triv_faults (table, ignore) = parse_file(f) num_gates = len(table) - 1 # adjust extra row tests = range(1,num_gates+1) faults = range(0, num_gates+1) non_triv_faults = range(1, num_gates+1) def read_ni(filename): with open(filename) as cache_file: line = cache_file.readline().strip() length = int(line) for i in range(length): line = cache_file.readline().strip() parts = line.split(':') ni[int(parts[0])] = eval(parts[1]) line = cache_file.readline().strip() length = int(line) for i in range(length): line = cache_file.readline().strip() parts = line.split(':') Map1[int(parts[0])] = eval(parts[1]) def compute_ni(table): global ni ni = {} cache = {} for r in tests: print "Computing ni for test ",r Map1[r] = [] mu1 = table[r][0] mu2 = table[r][r] if mu1 == mu2: print r raise ZeroDivisionError num1 = 0 if mu1 == (1,0,0) or mu1 == (0,1,0): num = 1 else: num = int(compute_num_samples(mu1, mu2)) m1 = 0 m2 = 0 for f in non_triv_faults: #print " m1-m2 computation with: ", table[r][f] if 1 > table[r][f][2] > 0: m1 = int(max(m1, math.log(delta)/math.log(1-table[r][f][2]))) elif table[r][f][2] == 1: m1 = int(max(m1,1)) p = table[r][f][0] if p > 0.5: m2 = int(max(m2, 2*p*math.log(1/delta)/((p-0.5)*(p-0.5)))) print " num=%d, m1=%d, m2=%d" % (num, m1, m2) ''' num1 = 1 mu_2_min = 1 mu_1_max = 0 for f in non_triv_faults: print f, if table[r][f][0] > 0: if num >= 100000: continue #d1 = TVD(table[r][f], mu1) #d2 = TVD(table[r][f], mu2) #if verbose: print r,f, d1, d2 #if (d1 > d2): # print " Analysis1 - Need to perform 3rd-component test ",f #else: # print " Analysis1 - NO NEED to perform 3rd-component test ",f # check if num samples are enough p = 0 if (num,table[r][f]) in cache: p = cache[(num,table[r][f])] else: p = partial_prob(num, table[r][f]) cache[(num,table[r][f])] = p if p >= 1 - delta: #pass # do nothing, num samples are enough to reduce false hits if verbose: print " Analysis2 - NO NEED to perform 3rd-component test ",f else: # find minimum non-zero third-component among dist which are closer to [r][r] than [r][0] if verbose: print " Analysis2 - Need to perform 3rd-component test for fault-%d (false hit prob = %lf)" % (f,p) mu_2_min = min(mu_2_min, table[r][f][2]) mu_1_max = max(mu_1_max, table[r][f][1]) print "" if mu_2_min < 1: num1 = math.log(delta)/math.log(1-mu_2_min) if mu_1_max > 0: if mu_1_max == 1: print "always wrong suspect !!!" else: num1 = max(num1, math.log(delta)/math.log(1/mu_1_max)) ''' ni[r] = min(50000,max(num, m1, m2)) cache = {} for f in non_triv_faults: Map1[f] = [] Map2[f] = [] for r in tests: if table[r][f][2] == 0: Map1[f].append(r) if table[r][f][0] > 0.5: Map2[f].append(r) if verbose: print " ** Map1[%d]: %s, Map2[%d]: %s" % (f, Map1[f], f, Map2[f]) def DKL(q, p): d = 0.0 for c in [0,1,2]: if q[c] == 0: pass #add 0 elif p[c] == 0: raise ValueError('invalid DKL(%s || %s)' % (q,p)) else: d = d + q[c]*math.log((q[c]/p[c]),2) return d def TVD(p,q): d = 0.0 for c in [0,1,2]: d = d + abs(p[c]-q[c]) return d elements = np.array([0,1,2]) def sample_mc(distr, num_samples): probabilities = np.array(distr) samples = np.random.choice(elements, num_samples, p=list(probabilities)) sample_distr = (np.count_nonzero(samples == 0), np.count_nonzero(samples == 1), np.count_nonzero(samples == 2)) sample_distr = [sample_distr[i]*1.0/num_samples for i in [0,1,2]] s = [] if num_samples < 20: s = samples if verbose: print "%d samples from %s is %s [%s]" % (num_samples, distr, sample_distr, s) return sample_distr def compute_tau(C,i): distr = table[i][C] tau = sample_mc(distr, ni[i]) return tau def get_suspects(C): suspects = [] reject = [] if verbose: print "tau[2] not zero for ", for i in tests: tau = compute_tau(C,i) if tau[2] != 0: if verbose: print i, for f in Map1[i]: # faults which have table[i][f][?]=0 reject.append(f) else: if table[i][i][1] == 1: if tau[1] == 1: suspects.append(i) else: d0 = DKL(tau, table[i][0]) di = DKL(tau, table[i][i]) if di < d0: suspects.append(i) if verbose: print '' reject = list(set(reject)) if verbose: print "Suspects for %d = %s, Rejects = %s" % (C, suspects, reject) new_suspects = [s for s in suspects if s not in reject] if len(new_suspects) < len(suspects): if verbose: print " ==> New suspects after pruning: %s" % new_suspects print " ==> Pruning: %s to %s" % (suspects, new_suspects) return new_suspects def get_suspects2(C): suspects = [] tau = {} for i in tests: tau[i] = compute_tau(C,i) if verbose: print ("%d: C0 = %s, Actual = %s, Cj = %s, C%d = %s" % (i, table[i][0], tau[i], table[i][C], i, table[i][i])) if tau[i][2] > 0: continue try: d0 = DKL(tau[i], table[i][0]) except ValueError: d0 = -1 # tau not close to [i,0] try: di = DKL(tau[i], table[i][i]) except ValueError: di = -1 # tau not close to [i,i] if d0 == -1 and di == -1: if verbose: print "Impossible" pass # tau impossible from (1,0,0) or (0,1,0) elif di == -1 or (d0 != -1 and di > d0): if verbose: print "No" pass # not at all close to [i,i] or somewhat closer to [i,0] than to [i,i] else: if verbose: print "Yes" suspects.append(i) if verbose: print "----- Refinement with suspects = %s -----" % suspects reject = [] for j in suspects: if verbose: print " Can fault be ", j for i in Map1[j]: if tau[i][2] != 0: print "Map1 rejection" #print " Rejecting %d for i=%d tau=%s, mu[i,j]=%s (test Map1)" % (j, i, tau[i], table[i][j]) reject.append(j) break incorrect = 0 for i in Map2[j]: if tau[i][0] < 0.5: if verbose: print " Maybe rejecting %d for i=%d tau=%s, mu[i,j]=%s (test Map2)" % (j, i, tau[i], table[i][j]) incorrect = incorrect + 1 if incorrect > map2threshold: if verbose: print " Rejecting ",j reject.append(j) if verbose: print " and reject = ", reject new_suspects = [s for s in suspects if s not in reject] if len(new_suspects) < len(suspects): if verbose: print " ==> New suspects after pruning: %s" % new_suspects return new_suspects def print_stats(f, num_iter, susp_size, missed): if len(susp_size[f]) == 0: print "******** EXCEPTION **********" print "~~~~~~~ RESULT ~~~~~~~~" avg_susp_size = -1 if num_iter != missed[f]: avg_susp_size = (1.0/ (num_iter - missed[f]))*reduce(lambda x,y:x+y, susp_size[f], 0) susp_size_array = np.array(susp_size[f]) if len(susp_size_array) > 0: median_susp_size = np.median(susp_size_array) max_susp_size = max(susp_size_array) third_q = np.percentile(susp_size_array, 75) single_susp = np.count_nonzero(susp_size_array == 1) else: median_susp_size = -1#np.median(susp_size_array) max_susp_size = -1#max(susp_size_array) third_q = -1#np.percentile(susp_size_array, 75) single_susp = -1#np.count_nonzero(susp_size_array == 1) if num_iter > missed[f]: correct = single_susp*100.0/(num_iter-missed[f]) else: correct = 0 print "Fault: %d (%d samples), Total = %d, missed = %d (%.2f %%), avg susp size = %.2f, median susp size = %d, max = %d, 3rd q=%d, number of times correct = %d (%.2f%%)" % \ (f, ni[f], num_iter, missed[f], missed[f]*100.0/num_iter, avg_susp_size, median_susp_size, max_susp_size, third_q, single_susp, correct) print "~~~~~~~~~~~~~~~~~~" def diagnose(f, num_iter): missed_f = 0 susp_size_f = [] print "****** Computing SUSP for Fault-",table_ids[f] for iter in range(num_iter): #if iter % 100 > 0: # print ".", #else: # print "\n====> Iteration: ", iter susp = get_suspects2(f) if verbose: print "---> suspected faults: %s" % susp if f not in susp: missed_f = missed_f + 1 else: susp_size_f.append(len(susp)) print "\n" return (f, missed_f, susp_size_f) def main(num_iter): global susp_size, missed if len(sys.argv) == 2: sys.argv.append(2) elif len(sys.argv) < 3: print "Error. python cluster.py " sys.exit(1) init(sys.argv[1]) #duplicates = remove_duplicates(table) if sys.argv[2] == '1': read_ni('ni_' + sys.argv[1]) else: compute_ni(table) if sys.argv[2] == '0': with open('ni_' + sys.argv[1], 'w') as cache_file: cache_file.write('%d\n' % len(ni)) for (k,v) in ni.items(): cache_file.write('%s:%s\n' % (k,v)) cache_file.write('%d\n' % len(Map1)) for (k,v) in Map1.items(): cache_file.write('%s:%s\n' % (k,v)) print ni map2threshold = num_gates*delta*(1+math.sqrt(3*math.log(1/delta)/(num_gates*delta))) print " Testing %d faults. %d faults undetectable from fault-free: %s" % (num_gates+1, len(ignore), ignore) susp_size = {} missed = {} #(f,m,s) = diagnose(7,100) #print f,m,s #sys.exit(1) test_list = non_triv_faults #num_iter=10 ret = Parallel(n_jobs=3, verbose=2)(delayed(diagnose)(f, num_iter) for f in test_list) for (f, m, s) in ret: missed[f] = m susp_size[f] = s for f in test_list: #diagnose(f,num_iter,susp_size,missed) print_stats(f, num_iter, susp_size, missed) #print "----------- RESULTS ------------" #for f in non_triv_faults: #print_stats(f) main(10000)