00001 package crowdUser; 00002 00003 import java.sql.Connection; 00004 import java.sql.DriverManager; 00005 import java.sql.ResultSet; 00006 import java.sql.Statement; 00007 import java.util.*; 00008 00009 import ipeirotis.gal.scripts.DawidSkene; 00010 import ipeirotis.gal.scripts.Labeling; 00011 00012 00013 00025 public class Refiner { 00026 00027 00028 // Fields 00029 00030 00034 private String[] columnsCSV; 00038 private Integer columnAxis; 00042 private Integer columnMedia1; 00046 private Integer columnMedia2; 00050 private Integer columnWorkerId; 00054 private Integer columnAnswer; 00059 private Map<String,Integer> compScores ; 00063 private List<String[]> data; 00064 00065 00066 00067 // Constructors 00068 00078 public Refiner (List<String> fields) 00079 { 00080 this.data = new ArrayList <String[]> (); 00081 getFields(fields); 00082 } 00083 00084 00085 // Methods 00086 00087 00088 00104 private String signature(String axis, String idMedia1, String idMedia2) 00105 { 00106 String sig = axis + "__" + idMedia1 + "__" + idMedia2; 00107 return sig; 00108 } 00109 00110 00122 private void getFields(List<String> csvFields) 00123 { 00124 this.columnsCSV = csvFields.toArray(new String[0]) ; 00125 Integer rank = 0; 00126 for (String field : csvFields) 00127 { 00128 if (field.equals("idMedia1") ) 00129 this.columnMedia1 = rank ; 00130 else if (field.equals("idMedia2") ) 00131 this.columnMedia2 = rank ; 00132 else if (field.equals("_worker_id") ) 00133 this.columnWorkerId = rank ; 00134 else if (field.equals("axis") ) 00135 this.columnAxis = rank ; 00136 else if (field.length() > 25 ) 00137 this.columnAnswer = rank ; 00138 rank ++ ; 00139 } 00140 } 00141 00142 00143 00153 public void csv2List( String csvFile ) 00154 { 00155 00156 this.data.clear(); 00157 try 00158 { 00159 // connecting to the .csv "database" 00160 Class.forName("org.relique.jdbc.csv.CsvDriver"); 00161 Connection conCSV = DriverManager.getConnection("jdbc:relique:csv:" + "../data/HITresults/"); 00162 Statement stmt = conCSV.createStatement(); 00163 // creating and sending the query 00164 String queryCSV = "SELECT * FROM " + csvFile; 00165 ResultSet rs = stmt.executeQuery(queryCSV); 00166 // using the results 00167 while (rs.next()) 00168 { 00169 List<String> request = new ArrayList<String>(); 00170 for (int i=1; i<this.columnsCSV.length+1; i++) 00171 request.add(rs.getString(i)); 00172 this.data.add(request.toArray(new String[0])); 00173 } 00174 // closing everything 00175 rs.close(); 00176 stmt.close(); 00177 conCSV.close(); 00178 } 00179 catch(Exception e) { e.printStackTrace(); } 00180 } 00181 00182 00199 public void getanotherlabel( ) 00200 { 00201 // step 1: creation of the "files" 00202 String inputFile = generateInputFile() ; 00203 String correctFile = generateCorrectFile() ; 00204 String costFile = "1\t1\t0\n2\t2\t0\n1\t2\t1\n2\t1\t1" ; 00205 Integer iterations = 10 ; 00206 HashMap<String,String> posterior_voting = new HashMap <String,String>() ; 00207 List<String[]> newData = new ArrayList <String[]> () ; 00208 00209 // step 2: get another label is called if their is enough HIT results (here, 10). 00210 if (this.data.size() > 10) 00211 try 00212 { 00213 // the content of this block is roughly a copy-pasta from "get another label" main method. 00214 String[] lines_input = inputFile.split("\n"); 00215 Vector<Labeling> labelings = DawidSkene.loadLabels(lines_input); 00216 String[] lines_correct = correctFile.split("\n"); 00217 Vector<Labeling> correct = DawidSkene.loadLabels(lines_correct); 00218 String[] lines_cost = costFile.split("\n") ; 00219 Vector<Labeling> costs = DawidSkene.loadLabels(lines_cost); 00220 DawidSkene ds = new DawidSkene(labelings, correct, costs); 00221 ds.estimate(iterations); 00222 ds.updateAnnotatorCosts(); 00223 posterior_voting = ds.getMajorityVote(); 00224 } 00225 catch (Exception e) { e.printStackTrace(); } 00226 00227 // step 2 (again): if their is not enough HIT results, raw results from the "correctFile" String are used directly. 00228 else 00229 { 00230 for (int i=0; i<this.data.size(); i++) 00231 { 00232 String[] dataLine = this.data.get(i); 00233 String sig = signature(dataLine[this.columnAxis], dataLine[this.columnMedia1], dataLine[this.columnMedia2]) ; 00234 if ( this.compScores.get(sig) > 0) 00235 posterior_voting.put(sig, "1"); 00236 else 00237 posterior_voting.put(sig, "2"); 00238 } 00239 } 00240 00241 // Step 3: The refined results are put in the "data" attribute 00242 for (int i=0; i<this.data.size(); i++) 00243 { 00244 List<String> newEntry = new ArrayList <String> () ; 00245 String sigComparisonStudied = signature( this.data.get(i)[this.columnAxis] , 00246 this.data.get(i)[this.columnMedia1] , this.data.get(i)[this.columnMedia2]); 00247 newEntry.add(this.data.get(i)[this.columnAxis]); 00248 newEntry.add(this.data.get(i)[this.columnMedia1]); 00249 newEntry.add(this.data.get(i)[this.columnMedia2]); 00250 newEntry.add ( posterior_voting.get(sigComparisonStudied) ) ; 00251 newData.add(newEntry.toArray(new String[0])); 00252 } 00253 this.data = newData ; 00254 } 00255 00256 00257 00265 private String generateCorrectFile() 00266 { 00267 String correctFile = "" ; 00268 try 00269 { 00270 String line; 00271 for (int i=0; i<this.data.size(); i++) 00272 { 00273 String[] dataLine = this.data.get(i); 00274 String sig = signature(dataLine[this.columnAxis], dataLine[this.columnMedia1], dataLine[this.columnMedia2]) ; 00275 line = dataLine[this.columnWorkerId] + "\t" + sig + "\t" ; 00276 if ( this.compScores.get(sig) > 0) 00277 line += "1"; 00278 else 00279 line += "2"; 00280 correctFile += line+"\n"; 00281 } 00282 } 00283 catch (Exception e) { e.printStackTrace(); } 00284 return correctFile ; 00285 } 00286 00287 00304 private String generateInputFile() 00305 { 00306 String inputFile = ""; 00307 this.compScores = new Hashtable <String,Integer> () ; 00308 try{ 00309 String line; 00310 for (int i=0; i<this.data.size(); i++) 00311 { 00312 String[] dataLine = this.data.get(i); 00313 String sig = signature(dataLine[this.columnAxis], dataLine[this.columnMedia1], dataLine[this.columnMedia2]) ; 00314 00315 // storing the results for future retrieval 00316 if (dataLine[this.columnAnswer].equals("Media 1")) 00317 if (this.compScores.containsKey(sig)) 00318 this.compScores.put(sig, this.compScores.get(sig) + 1) ; 00319 else 00320 this.compScores.put(sig, 1); 00321 else 00322 if (this.compScores.containsKey(sig)) 00323 this.compScores.put(sig, this.compScores.get(sig) - 1) ; 00324 else 00325 this.compScores.put(sig, -1); 00326 00327 // writing the "input file" 00328 line = dataLine[this.columnWorkerId] + "\t" + sig + "\t" ; 00329 if (dataLine[this.columnAnswer].equals("Media 1")) 00330 line += "1"; 00331 else 00332 line += "2"; 00333 inputFile += line + "\n"; 00334 } 00335 } 00336 catch (Exception e) { e.printStackTrace(); } 00337 return inputFile ; 00338 } 00339 00340 00345 public List<String[]> getData() 00346 { 00347 return this.data ; 00348 } 00349 }