# This one: compare delta of perfect_cc vs other criteria for inverse pairs.
"""
DIR MAD/aep/run_020208_scoring
Solution 2 AutoSol_run_1_ Z_SCORE: 431.8692 CC_TO_PERFECT: 0.6591
 NAME CC RFACTOR SKEW FOM NCS_OVERLAP
 RAW 0.6905 0.314 0.4314 0.78 1.077
 ZSCORE 100.3708 165.0859 96.964 15.6 53.8485
Solution 4 AutoSol_run_1_ Z_SCORE: 19.5832 CC_TO_PERFECT: 0.0365
 NAME CC RFACTOR SKEW FOM NCS_OVERLAP
 RAW 0.267 0.5769 0.0037 0.78 0.0
 ZSCORE 0.772 2.3374 0.8738 15.6 0.0
Solution 6 AutoSol_run_1_ Z_SCORE: 434.4561 CC_TO_PERFECT: 0.6591
 NAME CC RFACTOR SKEW FOM NCS_OVERLAP
 RAW 0.6923 0.3106 0.4314 0.78 1.078
 ZSCORE 100.7941 167.1953 96.9677 15.6 53.8991
"""
import sys,os
#
verbose=True
tolerance=0.05
file="hand.dat"
print "Analyzing data in ",file
all_data_raw={}
all_data_z_score={}
all_data_overall_z={}
all_data_perfect_cc={}
all_data_inverse={}
score_type_list=[]
run_data={}
run_name=None
solution_name=None
solution_data={}

solution_name=None
types=None
raw_scores=None
z_scores=None

for line in open(file).readlines():
  if not line: continue
  if not line.split(): continue
  if line.split()[0]=="DIR" and len(line.split())>1:
     solution_name=None
     run_name=line.split()[1]
     #print "New run: ",run_name
     run_data_raw={}
     all_data_raw[run_name]=run_data_raw
     run_data_z_score={}
     all_data_z_score[run_name]=run_data_z_score
     run_data_overall_z={}
     all_data_overall_z[run_name]=run_data_overall_z
     run_data_perfect_cc={}
     all_data_perfect_cc[run_name]=run_data_perfect_cc
     run_data_inverse={}
     all_data_inverse[run_name]=run_data_inverse

  elif  line.split()[0]=="Solution" and len(line.split())>1:
     score_type_list=[]
     raw_scores=None
     z_scores=None
     solution_name=line.split()[1]
     try:
       solution_z_score=float(line.split()[4])
       solution_perfect_cc=float(line.split()[6])
     except:
       solution_z_score=None
       solution_perfect_cc=None
     try:
       solution_inverse=line.split()[8]
     except:
       solution_inverse=None

     #print "new solution: ",solution_name 
  elif  line.split()[0]=="NAME" and len(line.split())>1:
     types=line.split()[1:]
     if not score_type_list or len(score_type_list)<len(types):
          score_type_list=types
          print "SCORE TYPE LIST: ",score_type_list
  elif  line.split()[0]=="RAW" and len(line.split())>1:
     raw_scores=line.split()[1:]
  elif  line.split()[0]=="ZSCORE" and len(line.split())>1:
     z_scores=line.split()[1:]

  if solution_name and types and raw_scores and z_scores and\
       solution_z_score and solution_perfect_cc and solution_inverse: # new solution
     solution_data_raw={}
     solution_data_z_score={}
     for type,raw_score,z_score in zip(types,raw_scores,z_scores):
       solution_data_raw[type]=raw_score
       solution_data_z_score[type]=z_score
     run_data_raw[solution_name]=solution_data_raw
     run_data_z_score[solution_name]=solution_data_z_score
     run_data_overall_z[solution_name]=solution_z_score
     run_data_perfect_cc[solution_name]=solution_perfect_cc
     run_data_inverse[solution_name]=solution_inverse
     solution_name=None
     types=None
     raw_scores=None
     z_scores=None

# analyze the data now:

run_list=all_data_raw.keys()
print "Total of ",len(run_list),"runs to analyze"

raw_out_2a=open('raw_2a.list','w')
raw_out_2=open('raw_2.list','w')
raw_out=open('raw.list','w')
zscore_out=open('zscore.list','w')
print >>raw_out_2a,"run solution inverse perfect_cc perfect_cc_inverse",
print >>raw_out_2,"run solution inverse perfect_cc-perfect_cc_inverse",
print >>raw_out, "run solution inverse perfect_cc perfect_cc_inverse z z_inverse",
print >>zscore_out, "run solution inverse perfect_cc perfect_cc_inverse z z_inverse",
for score_type in score_type_list:
  print >> raw_out_2a, score_type+" "+score_type+"_inverse",
  print >> raw_out_2, score_type+"-"+score_type+"_inverse",
  print >> raw_out, score_type,score_type,
  print >> zscore_out, score_type,score_type,
print >>raw_out_2a
print >>raw_out_2
print >>raw_out
print >>zscore_out

same={}
correct={}
incorrect={}
for score_type in score_type_list:
  same[score_type]=0
  correct[score_type]=0
  incorrect[score_type]=0
for run in run_list:
  if verbose:print "\nRUN ",run
  solution_data_raw=all_data_raw[run]
  solution_data_z_score=all_data_z_score[run]
  solution_data_overall_z=all_data_overall_z[run]
  solution_data_perfect_cc=all_data_perfect_cc[run]
  solution_data_inverse=all_data_inverse[run]
  solution_list=solution_data_raw.keys()
  used=[]
  for solution_test in solution_list:
    solution=solution_test
    inverse=solution_data_inverse[solution]
    if solution in used or inverse in used: continue
    used.append(solution)
    used.append(inverse)

    perfect_cc=solution_data_perfect_cc[solution]
    try:
     perfect_cc_inverse=solution_data_perfect_cc[inverse]
    except:
     continue # no inverse available
    if perfect_cc_inverse > perfect_cc:
      save=solution
      solution=inverse
      inverse=save
      perfect_cc=solution_data_perfect_cc[solution]
      perfect_cc_inverse=solution_data_perfect_cc[inverse]

    #if perfect_cc-perfect_cc_inverse < 0.1: 
       #if verbose:
        #print "\nSKIPPING: SOLUTION ",solution," INVERSE: ",inverse,"Z:",overall_z," CC:",perfect_cc
       #continue
    data_raw=solution_data_raw[solution]
    data_z_score=solution_data_z_score[solution]
    overall_z=solution_data_overall_z[solution]

    data_raw_inverse=solution_data_raw[inverse]
    data_z_score_inverse=solution_data_z_score[inverse]
    overall_z_inverse=solution_data_overall_z[inverse]

    delta=float(data_raw['FOM'])-float(data_raw_inverse['FOM'])
    if delta<-0.01 or delta> 0.01: 
      print "ZZ FOM: ",solution
      continue
    
    if verbose:print "\nSOLUTION ",solution," INVERSE: ",inverse,"Z:",overall_z," CC:",perfect_cc
    print >>raw_out_2a, run,solution,inverse,perfect_cc,perfect_cc_inverse,
    print >>raw_out_2, run,solution,inverse,perfect_cc-perfect_cc_inverse,
    print >>raw_out, run,solution,inverse,perfect_cc,perfect_cc_inverse,overall_z,overall_z_inverse,
    print >>zscore_out, run,solution,inverse,perfect_cc,perfect_cc_inverse,overall_z,overall_z_inverse,
    
    for score_type in score_type_list:
       try:
         raw=float(data_raw[score_type])
         raw_inverse=float(data_raw_inverse[score_type])
       except: 
         raw=0.0
         raw_inverse=0.0
       try:
         delta=raw-raw_inverse
       except:
         delta=0.

       try:
         z_score=float(data_z_score[score_type])
         z_score_inverse=float(data_z_score_inverse[score_type])
       except: 
         z_score=0.0
         z_score_inverse=0.0
       try:
         delta_z=z_score-z_score_inverse
       except:
         delta_z=0.

       delta_perf=perfect_cc-perfect_cc_inverse
       if delta_perf<0: delta_perf=-1.*delta_perf
       if delta_perf>tolerance:  # worth talking about
         if raw>raw_inverse:
           correct[score_type]+=1 
         elif raw<raw_inverse:
           incorrect[score_type]+=1 
         else:
           same[score_type]+=1 
       else:
         same[score_type]+=1 
       if verbose:print score_type,raw,raw_inverse,z_score,z_score_inverse
       print >>raw_out_2a, raw,raw_inverse,
       print >>raw_out_2, raw-raw_inverse,
       print >>zscore_out, z_score,z_score_inverse,
       print >>raw_out, raw,raw_inverse,
    print >>raw_out_2a
    print >>raw_out_2
    print >>raw_out
    print >>zscore_out
for score_type in score_type_list:
  total=correct[score_type]+incorrect[score_type]
  if total >0:
    corr=100.*float(correct[score_type])/float(total) 
  else: corr=0.5
  print "Fraction correct choices: %s %6.2f %i %i %i" %(score_type,corr,correct[score_type],incorrect[score_type] ,same[score_type])
