0.8
Sorting media using crowdsourcing.   
Doxygen
LIRIS

Refiner.java

Go to the documentation of this file.
00001 package crowdUser;
00002 
00003 import java.sql.Connection;
00004 import java.sql.DriverManager;
00005 import java.sql.ResultSet;
00006 import java.sql.Statement;
00007 import java.util.*;
00008 
00009 import ipeirotis.gal.scripts.DawidSkene;
00010 import ipeirotis.gal.scripts.Labeling;
00011 
00012 
00013 
00025 public class Refiner {
00026 
00027   00028   // Fields
00029   00030 
00034   private String[] columnsCSV;
00038   private Integer columnAxis;
00042   private Integer columnMedia1;
00046   private Integer columnMedia2;
00050   private Integer columnWorkerId;
00054   private Integer columnAnswer;
00059   private Map<String,Integer> compScores ;
00063   private List<String[]> data;
00064 
00065   
00066   00067   // Constructors
00068   00078   public Refiner (List<String> fields)
00079   {
00080         this.data = new ArrayList <String[]> ();
00081         getFields(fields);
00082   }
00083   
00084   00085   // Methods
00086   00087   
00088   
00104   private String signature(String axis, String idMedia1, String idMedia2)
00105   {
00106         String sig =  axis + "__" + idMedia1 + "__" + idMedia2;
00107         return sig;
00108   }
00109   
00110 
00122   private void getFields(List<String> csvFields)
00123   {
00124        this.columnsCSV = csvFields.toArray(new String[0]) ;
00125        Integer rank = 0;
00126        for (String field : csvFields)
00127             {
00128               if (field.equals("idMedia1") )
00129                     this.columnMedia1 = rank ;
00130               else if (field.equals("idMedia2") )
00131                     this.columnMedia2 = rank ;
00132               else if (field.equals("_worker_id") )
00133                     this.columnWorkerId = rank ;
00134               else if (field.equals("axis") )
00135                     this.columnAxis = rank ;
00136               else if (field.length() > 25 )
00137                     this.columnAnswer = rank ;
00138               rank ++ ;
00139             }
00140   }
00141 
00142   
00143  
00153   public void csv2List( String csvFile )
00154   {
00155 
00156       this.data.clear();
00157         try
00158         {
00159               // connecting to the .csv "database"
00160               Class.forName("org.relique.jdbc.csv.CsvDriver");
00161             Connection conCSV = DriverManager.getConnection("jdbc:relique:csv:" + "../data/HITresults/");
00162             Statement stmt = conCSV.createStatement();
00163             // creating and sending the query
00164             String queryCSV = "SELECT * FROM " + csvFile;
00165             ResultSet rs = stmt.executeQuery(queryCSV);
00166             // using the results
00167             while (rs.next())
00168             {
00169               List<String> request = new ArrayList<String>();
00170               for (int i=1; i<this.columnsCSV.length+1; i++)
00171                     request.add(rs.getString(i));
00172               this.data.add(request.toArray(new String[0]));
00173             }
00174               // closing everything
00175               rs.close();
00176               stmt.close();
00177               conCSV.close();
00178         }
00179         catch(Exception e)  { e.printStackTrace(); }
00180   }
00181 
00182   
00199   public void getanotherlabel(  )
00200   {
00201         // step 1: creation of the "files"
00202         String inputFile = generateInputFile() ;
00203         String correctFile = generateCorrectFile() ;
00204         String costFile = "1\t1\t0\n2\t2\t0\n1\t2\t1\n2\t1\t1" ;
00205         Integer iterations = 10 ;
00206         HashMap<String,String> posterior_voting = new HashMap <String,String>() ;
00207         List<String[]> newData = new ArrayList <String[]> () ;
00208         
00209         // step 2: get another label is called if their is enough HIT results (here, 10).
00210         if (this.data.size() > 10)
00211               try
00212               {   
00213                     // the content of this block is roughly a copy-pasta from "get another label" main method.
00214                     String[] lines_input = inputFile.split("\n");
00215                     Vector<Labeling> labelings = DawidSkene.loadLabels(lines_input);
00216                     String[] lines_correct = correctFile.split("\n");
00217                     Vector<Labeling> correct = DawidSkene.loadLabels(lines_correct);
00218                     String[] lines_cost = costFile.split("\n") ; 
00219                     Vector<Labeling> costs = DawidSkene.loadLabels(lines_cost);
00220                     DawidSkene ds = new DawidSkene(labelings, correct, costs);
00221                     ds.estimate(iterations);
00222                     ds.updateAnnotatorCosts();
00223                     posterior_voting = ds.getMajorityVote();
00224               }
00225               catch (Exception e) { e.printStackTrace(); }
00226 
00227         // step 2 (again): if their is not enough HIT results, raw results from the "correctFile" String are used directly.
00228         else
00229         {
00230               for (int i=0; i<this.data.size(); i++)
00231               {
00232                     String[] dataLine = this.data.get(i);
00233                     String sig = signature(dataLine[this.columnAxis], dataLine[this.columnMedia1], dataLine[this.columnMedia2]) ;
00234                     if (  this.compScores.get(sig) > 0)
00235                           posterior_voting.put(sig, "1");
00236                     else
00237                           posterior_voting.put(sig, "2");
00238               }     
00239         }
00240         
00241         // Step 3: The refined results are put in the "data" attribute
00242         for (int i=0; i<this.data.size(); i++)
00243         {
00244               List<String> newEntry = new ArrayList <String> () ;
00245               String sigComparisonStudied = signature( this.data.get(i)[this.columnAxis] , 
00246                               this.data.get(i)[this.columnMedia1] , this.data.get(i)[this.columnMedia2]);
00247               newEntry.add(this.data.get(i)[this.columnAxis]);
00248               newEntry.add(this.data.get(i)[this.columnMedia1]);
00249               newEntry.add(this.data.get(i)[this.columnMedia2]);
00250               newEntry.add ( posterior_voting.get(sigComparisonStudied) ) ;
00251               newData.add(newEntry.toArray(new String[0]));
00252         }
00253         this.data = newData ;  
00254   }
00255 
00256 
00257   
00265   private String generateCorrectFile()
00266   {
00267         String correctFile = "" ;
00268         try
00269         {
00270               String line;
00271               for (int i=0; i<this.data.size(); i++)
00272               {
00273                     String[] dataLine = this.data.get(i);
00274                     String sig = signature(dataLine[this.columnAxis], dataLine[this.columnMedia1], dataLine[this.columnMedia2]) ;
00275                     line = dataLine[this.columnWorkerId] + "\t" + sig + "\t" ;
00276                     if (  this.compScores.get(sig) > 0)
00277                           line += "1";
00278                     else
00279                           line += "2";
00280                     correctFile += line+"\n";  
00281               }
00282         }
00283         catch (Exception e) { e.printStackTrace(); }
00284         return correctFile ;
00285   }
00286 
00287 
00304   private String generateInputFile()
00305   {
00306         String inputFile = "";
00307         this.compScores = new Hashtable <String,Integer> () ;
00308         try{
00309               String line;
00310               for (int i=0; i<this.data.size(); i++)
00311               {
00312                     String[] dataLine = this.data.get(i);
00313                     String sig = signature(dataLine[this.columnAxis], dataLine[this.columnMedia1], dataLine[this.columnMedia2]) ;
00314                   
00315                     // storing the results for future retrieval
00316                     if (dataLine[this.columnAnswer].equals("Media 1"))
00317                           if (this.compScores.containsKey(sig))
00318                                 this.compScores.put(sig, this.compScores.get(sig) + 1) ;
00319                           else
00320                                 this.compScores.put(sig, 1);
00321                     else
00322                           if (this.compScores.containsKey(sig))
00323                                 this.compScores.put(sig, this.compScores.get(sig) - 1) ;
00324                           else
00325                                 this.compScores.put(sig, -1);
00326                     
00327                     // writing the "input file"
00328                     line = dataLine[this.columnWorkerId] + "\t" + sig + "\t" ;
00329                     if (dataLine[this.columnAnswer].equals("Media 1"))
00330                           line += "1";
00331                     else
00332                           line += "2";
00333                     inputFile += line + "\n";
00334               }
00335         }
00336         catch (Exception e) { e.printStackTrace(); }
00337         return inputFile ;
00338   }
00339 
00340 
00345   public List<String[]> getData()
00346   {
00347         return this.data ;
00348   }
00349 }
 All Classes Namespaces Files Functions Variables