List of usage examples for java.util TreeMap size
int size
To view the source code for java.util TreeMap size.
Click Source Link
From source file:Main.java
public static void main(String[] args) { TreeMap<String, String> treeMap = new TreeMap<String, String>(); System.out.println("Size of TreeMap : " + treeMap.size()); treeMap.put("1", "One"); treeMap.put("2", "Two"); treeMap.put("3", "Three"); System.out.println("Size of TreeMap after addition : " + treeMap.size()); treeMap.remove("2"); System.out.println("Size of TreeMap after removal : " + treeMap.size()); }
From source file:Main.java
public static void main(String[] args) { TreeMap<String, String> treeMap = new TreeMap<String, String>(); treeMap.put("1", "One"); treeMap.put("2", "Two"); treeMap.put("3", "Three"); treeMap.clear();/*from w w w . j a va2 s.c o m*/ System.out.println(treeMap.size()); }
From source file:Main.java
public static void main(String[] args) { TreeMap<Integer, String> treemap = new TreeMap<Integer, String>(); // populating tree map treemap.put(2, "two"); treemap.put(1, "one"); treemap.put(3, "three"); treemap.put(6, "six"); treemap.put(5, "from java2s.com"); // getting size of the map System.out.println("Size of the map: " + treemap.size()); }
From source file:module.entities.UsernameChecker.CheckOpengovUsernames.java
/** * @param args the command line arguments *///from www . jav a2s.c o m public static void main(String[] args) throws SQLException, IOException { // args = new String[1]; // args[0] = "searchConf.txt"; Date d = new Date(); long milTime = d.getTime(); long execStart = System.nanoTime(); Timestamp startTime = new Timestamp(milTime); long lStartTime; long lEndTime = 0; int status_id = 1; JSONObject obj = new JSONObject(); if (args.length != 1) { System.out.println("None or too many argument parameters where defined! " + "\nPlease provide ONLY the configuration file name as the only argument."); } else { try { configFile = args[0]; initLexicons(); Database.init(); lStartTime = System.currentTimeMillis(); System.out.println("Opengov username identification process started at: " + startTime); usernameCheckerId = Database.LogUsernameChecker(lStartTime); TreeMap<Integer, String> OpenGovUsernames = Database.GetOpenGovUsers(); HashSet<ReportEntry> report_names = new HashSet<>(); if (OpenGovUsernames.size() > 0) { for (int userID : OpenGovUsernames.keySet()) { String DBusername = Normalizer .normalize(OpenGovUsernames.get(userID).toUpperCase(locale), Normalizer.Form.NFD) .replaceAll("\\p{M}", ""); String username = ""; int type; String[] splitUsername = DBusername.split(" "); if (checkNameInLexicons(splitUsername)) { for (String splText : splitUsername) { username += splText + " "; } type = 1; } else if (checkOrgInLexicons(splitUsername)) { for (String splText : splitUsername) { username += splText + " "; } type = 2; } else { username = DBusername; type = -1; } ReportEntry cerEntry = new ReportEntry(userID, username.trim(), type); report_names.add(cerEntry); } status_id = 2; obj.put("message", "Opengov username checker finished with no errors"); obj.put("details", ""); Database.UpdateOpengovUsersReportName(report_names); lEndTime = System.currentTimeMillis(); } else { status_id = 2; obj.put("message", "Opengov username checker finished with no errors"); obj.put("details", "No usernames needed to be checked"); lEndTime = System.currentTimeMillis(); } } catch (Exception ex) { System.err.println(ex.getMessage()); status_id = 3; obj.put("message", "Opengov username checker encountered an error"); obj.put("details", ex.getMessage().toString()); lEndTime = System.currentTimeMillis(); } } long execEnd = System.nanoTime(); long executionTime = (execEnd - execStart); System.out.println("Total process time: " + (((executionTime / 1000000) / 1000) / 60) + " minutes."); Database.UpdateLogUsernameChecker(lEndTime, status_id, usernameCheckerId, obj); Database.closeConnection(); }
From source file:cosmos.example.BuildingPermitsExample.java
public static void main(String[] args) throws Exception { BuildingPermitsExample example = new BuildingPermitsExample(); new JCommander(example, args); File inputFile = new File(example.fileName); Preconditions.checkArgument(inputFile.exists() && inputFile.isFile() && inputFile.canRead(), "Expected " + example.fileName + " to be a readable file"); String zookeepers;//from ww w . java 2 s . c om String instanceName; Connector connector; MiniAccumuloCluster mac = null; File macDir = null; // Use the MiniAccumuloCluster is requested if (example.useMiniAccumuloCluster) { macDir = Files.createTempDir(); String password = "password"; MiniAccumuloConfig config = new MiniAccumuloConfig(macDir, password); config.setNumTservers(1); mac = new MiniAccumuloCluster(config); mac.start(); zookeepers = mac.getZooKeepers(); instanceName = mac.getInstanceName(); ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zookeepers); connector = instance.getConnector("root", new PasswordToken(password)); } else { // Otherwise connect to a running instance zookeepers = example.zookeepers; instanceName = example.instanceName; ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zookeepers); connector = instance.getConnector(example.username, new PasswordToken(example.password)); } // Instantiate an instance of Cosmos Cosmos cosmos = new CosmosImpl(zookeepers); // Create a definition for the data we want to load Store id = Store.create(connector, new Authorizations(), AscendingIndexIdentitySet.create()); // Register the definition with Cosmos so it can track its progress. cosmos.register(id); // Load all of the data from our inputFile LoadBuildingPermits loader = new LoadBuildingPermits(cosmos, id, inputFile); loader.run(); // Finalize the SortableResult which will prevent future writes to the data set cosmos.finalize(id); // Flush the ingest traces to the backend so we can see the results; id.sendTraces(); // Get back the Set of Columns that we've ingested. Set<Column> schema = Sets.newHashSet(cosmos.columns(id)); log.debug("\nColumns: " + schema); Iterator<Column> iter = schema.iterator(); while (iter.hasNext()) { Column c = iter.next(); // Remove the internal ID field and columns that begin with CONTRACTOR_ if (c.equals(LoadBuildingPermits.ID) || c.name().startsWith("CONTRACTOR_")) { iter.remove(); } } Iterable<Index> indices = Iterables.transform(schema, new Function<Column, Index>() { @Override public Index apply(Column col) { return Index.define(col); } }); // Ensure that we have locality groups set as we expect log.info("Ensure locality groups are set"); id.optimizeIndices(indices); // Compact down the data for this SortableResult log.info("Issuing compaction for relevant data"); id.consolidate(); final int numTopValues = 10; // Walk through each column in the result set for (Column c : schema) { Stopwatch sw = new Stopwatch(); sw.start(); // Get the number of times we've seen each value in a given column CloseableIterable<Entry<RecordValue<?>, Long>> groupingsInColumn = cosmos.groupResults(id, c); log.info(c.name() + ":"); // Iterate over the counts, collecting the top N values in each column TreeMap<Long, RecordValue<?>> topValues = Maps.newTreeMap(); for (Entry<RecordValue<?>, Long> entry : groupingsInColumn) { if (topValues.size() == numTopValues) { Entry<Long, RecordValue<?>> least = topValues.pollFirstEntry(); if (least.getKey() < entry.getValue()) { topValues.put(entry.getValue(), entry.getKey()); } else { topValues.put(least.getKey(), least.getValue()); } } else if (topValues.size() < numTopValues) { topValues.put(entry.getValue(), entry.getKey()); } } for (Long key : topValues.descendingKeySet()) { log.info(topValues.get(key).value() + " occurred " + key + " times"); } sw.stop(); log.info("Took " + sw.toString() + " to run query.\n"); } log.info("Deleting records"); // Delete the records we've ingested if (!example.useMiniAccumuloCluster) { // Because I'm lazy and don't want to wait around to run the BatchDeleter when we're just going // to rm -rf the directory in a few secs. cosmos.delete(id); } // And shut down Cosmos cosmos.close(); log.info("Cosmos stopped"); // If we were using MAC, also stop that if (example.useMiniAccumuloCluster && null != mac) { mac.stop(); if (null != macDir) { FileUtils.deleteDirectory(macDir); } } }
From source file:io.apiman.tools.i18n.TemplateScanner.java
public static void main(String[] args) throws IOException { if (args == null || args.length != 1) { System.out.println("Template directory not provided (no path provided)."); System.exit(1);//from w w w . j a v a2s.c om } File templateDir = new File(args[0]); if (!templateDir.isDirectory()) { System.out.println("Template directory not provided (provided path is not a directory)."); System.exit(1); } if (!new File(templateDir, "dash.html").isFile()) { System.out.println("Template directory not provided (dash.html not found)."); System.exit(1); } File outputDir = new File(templateDir, "../../../../../../tools/i18n/target"); if (!outputDir.isDirectory()) { System.out.println("Output directory not found: " + outputDir); System.exit(1); } File outputFile = new File(outputDir, "scanner-messages.properties"); if (outputFile.isFile() && !outputFile.delete()) { System.out.println("Couldn't delete the old messages.properties: " + outputFile); System.exit(1); } System.out.println("Starting scan."); System.out.println("Scanning template directory: " + templateDir.getAbsolutePath()); String[] extensions = { "html", "include" }; Collection<File> files = FileUtils.listFiles(templateDir, extensions, true); TreeMap<String, String> strings = new TreeMap<>(); for (File file : files) { System.out.println("\tScanning file: " + file); scanFile(file, strings); } outputMessages(strings, outputFile); System.out.println("Scan complete. Scanned " + files.size() + " files and discovered " + strings.size() + " translation strings."); }
From source file:eval.dataset.ParseWikiLog.java
public static void main(String[] ss) throws FileNotFoundException, ParserConfigurationException, IOException { FileInputStream fin = new FileInputStream("data/enwiki-20151201-pages-logging.xml.gz"); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(fin); InputStreamReader reader = new InputStreamReader(gzIn); BufferedReader br = new BufferedReader(reader); PrintWriter pw = new PrintWriter(new FileWriter("data/user_page.txt")); pw.println(/*from w w w .ja va2 s . c o m*/ "#list of user names and pages that they have edited, deleted or created. These info are mined from logitems of enwiki-20150304-pages-logging.xml.gz"); TreeMap<String, Set<String>> userPageList = new TreeMap(); TreeSet<String> pageList = new TreeSet(); int counterEntry = 0; String currentUser = null; String currentPage = null; try { for (String line = br.readLine(); line != null; line = br.readLine()) { if (line.trim().equals("</logitem>")) { counterEntry++; if (currentUser != null && currentPage != null) { updateMap(userPageList, currentUser, currentPage); pw.println(currentUser + "\t" + currentPage); pageList.add(currentPage); } currentUser = null; currentPage = null; } else if (line.trim().startsWith("<username>")) { currentUser = line.trim().split(">")[1].split("<")[0].replace(" ", "_"); } else if (line.trim().startsWith("<logtitle>")) { String content = line.trim().split(">")[1].split("<")[0]; if (content.split(":").length == 1) { currentPage = content.replace(" ", "_"); } } } } catch (IOException ex) { Logger.getLogger(ParseWikiLog.class.getName()).log(Level.SEVERE, null, ex); } pw.println("#analysed " + counterEntry + " entries of wikipesia log file"); pw.println("#gathered a list of unique user of size " + userPageList.size()); pw.println("#gathered a list of pages of size " + pageList.size()); pw.close(); gzIn.close(); PrintWriter pwUser = new PrintWriter(new FileWriter("data/user_list_page_edited.txt")); pwUser.println( "#list of unique users and pages that they have edited, extracted from logitems of enwiki-20150304-pages-logging.xml.gz"); for (String user : userPageList.keySet()) { pwUser.print(user); Set<String> getList = userPageList.get(user); for (String page : getList) { pwUser.print("\t" + page); } pwUser.println(); } pwUser.close(); PrintWriter pwPage = new PrintWriter(new FileWriter("data/all_pages.txt")); pwPage.println("#list of the unique pages that are extracted from enwiki-20150304-pages-logging.xml.gz"); for (String page : pageList) { pwPage.println(page); } pwPage.close(); System.out.println("#analysed " + counterEntry + " entries of wikipesia log file"); System.out.println("#gathered a list of unique user of size " + userPageList.size()); System.out.println("#gathered a list of pages of size " + pageList.size()); }
From source file:net.massbank.validator.RecordValidator.java
public static void main(String[] args) { RequestDummy request;//from w ww.j a v a2 s . co m PrintStream out = System.out; Options lvOptions = new Options(); lvOptions.addOption("h", "help", false, "show this help."); lvOptions.addOption("r", "recdata", true, "points to the recdata directory containing massbank records. Reads all *.txt files in there."); CommandLineParser lvParser = new BasicParser(); CommandLine lvCmd = null; try { lvCmd = lvParser.parse(lvOptions, args); if (lvCmd.hasOption('h')) { printHelp(lvOptions); return; } } catch (org.apache.commons.cli.ParseException pvException) { System.out.println(pvException.getMessage()); } String recDataPath = lvCmd.getOptionValue("recdata"); // --------------------------------------------- // ???? // --------------------------------------------- final String baseUrl = MassBankEnv.get(MassBankEnv.KEY_BASE_URL); final String dbRootPath = "./"; final String dbHostName = MassBankEnv.get(MassBankEnv.KEY_DB_HOST_NAME); final String tomcatTmpPath = "."; final String tmpPath = (new File(tomcatTmpPath + sdf.format(new Date()))).getPath() + File.separator; GetConfig conf = new GetConfig(baseUrl); int recVersion = 2; String selDbName = ""; Object up = null; // Was: file Upload boolean isResult = true; String upFileName = ""; boolean upResult = false; DatabaseAccess db = null; try { // ---------------------------------------------------- // ??? // ---------------------------------------------------- // if (FileUpload.isMultipartContent(request)) { // (new File(tmpPath)).mkdir(); // String os = System.getProperty("os.name"); // if (os.indexOf("Windows") == -1) { // isResult = FileUtil.changeMode("777", tmpPath); // if (!isResult) { // out.println(msgErr("[" + tmpPath // + "] chmod failed.")); // return; // } // } // up = new FileUpload(request, tmpPath); // } // ---------------------------------------------------- // ?DB???? // ---------------------------------------------------- List<String> dbNameList = Arrays.asList(conf.getDbName()); ArrayList<String> dbNames = new ArrayList<String>(); dbNames.add(""); File[] dbDirs = (new File(dbRootPath)).listFiles(); if (dbDirs != null) { for (File dbDir : dbDirs) { if (dbDir.isDirectory()) { int pos = dbDir.getName().lastIndexOf("\\"); String dbDirName = dbDir.getName().substring(pos + 1); pos = dbDirName.lastIndexOf("/"); dbDirName = dbDirName.substring(pos + 1); if (dbNameList.contains(dbDirName)) { // DB???massbank.conf???DB???? dbNames.add(dbDirName); } } } } if (dbDirs == null || dbNames.size() == 0) { out.println(msgErr("[" + dbRootPath + "] directory not exist.")); return; } Collections.sort(dbNames); // ---------------------------------------------------- // ? // ---------------------------------------------------- // if (FileUpload.isMultipartContent(request)) { // HashMap<String, String[]> reqParamMap = new HashMap<String, // String[]>(); // reqParamMap = up.getRequestParam(); // if (reqParamMap != null) { // for (Map.Entry<String, String[]> req : reqParamMap // .entrySet()) { // if (req.getKey().equals("ver")) { // try { // recVersion = Integer // .parseInt(req.getValue()[0]); // } catch (NumberFormatException nfe) { // } // } else if (req.getKey().equals("db")) { // selDbName = req.getValue()[0]; // } // } // } // } else { // if (request.getParameter("ver") != null) { // try { // recVersion = Integer.parseInt(request // .getParameter("ver")); // } catch (NumberFormatException nfe) { // } // } // selDbName = request.getParameter("db"); // } // if (selDbName == null || selDbName.equals("") // || !dbNames.contains(selDbName)) { // selDbName = dbNames.get(0); // } // --------------------------------------------- // // --------------------------------------------- out.println("Database: "); for (int i = 0; i < dbNames.size(); i++) { String dbName = dbNames.get(i); out.print("dbName"); if (dbName.equals(selDbName)) { out.print(" selected"); } if (i == 0) { out.println("------------------"); } else { out.println(dbName); } } out.println("Record Version : "); out.println(recVersion); out.println("Record Archive :"); // --------------------------------------------- // // --------------------------------------------- // HashMap<String, Boolean> upFileMap = up.doUpload(); // if (upFileMap != null) { // for (Map.Entry<String, Boolean> e : upFileMap.entrySet()) { // upFileName = e.getKey(); // upResult = e.getValue(); // break; // } // if (upFileName.equals("")) { // out.println(msgErr("please select file.")); // isResult = false; // } else if (!upResult) { // out.println(msgErr("[" + upFileName // + "] upload failed.")); // isResult = false; // } else if (!upFileName.endsWith(ZIP_EXTENSION) // && !upFileName.endsWith(MSBK_EXTENSION)) { // out.println(msgErr("please select [" // + UPLOAD_RECDATA_ZIP // + "] or [" // + UPLOAD_RECDATA_MSBK + "].")); // up.deleteFile(upFileName); // isResult = false; // } // } else { // out.println(msgErr("server error.")); // isResult = false; // } // up.deleteFileItem(); // if (!isResult) { // return; // } // --------------------------------------------- // ??? // --------------------------------------------- // final String upFilePath = (new File(tmpPath + File.separator // + upFileName)).getPath(); // isResult = FileUtil.unZip(upFilePath, tmpPath); // if (!isResult) { // out.println(msgErr("[" // + upFileName // + "] extraction failed. possibility of time-out.")); // return; // } // --------------------------------------------- // ?? // --------------------------------------------- final String recPath = (new File(dbRootPath + File.separator + selDbName)).getPath(); File tmpRecDir = new File(recDataPath); if (!tmpRecDir.isDirectory()) { tmpRecDir.mkdirs(); } // --------------------------------------------- // ??? // --------------------------------------------- // data? // final String recDataPath = (new File(tmpPath + File.separator // + RECDATA_DIR_NAME)).getPath() // + File.separator; // // if (!(new File(recDataPath)).isDirectory()) { // if (upFileName.endsWith(ZIP_EXTENSION)) { // out.println(msgErr("[" // + RECDATA_DIR_NAME // + "] directory is not included in the up-loading file.")); // } else if (upFileName.endsWith(MSBK_EXTENSION)) { // out.println(msgErr("The uploaded file is not record data.")); // } // return; // } // --------------------------------------------- // DB // --------------------------------------------- // db = new DatabaseAccess(dbHostName, selDbName); // isResult = db.open(); // if (!isResult) { // db.close(); // out.println(msgErr("not connect to database.")); // return; // } // --------------------------------------------- // ?? // --------------------------------------------- TreeMap<String, String> resultMap = validationRecord(db, out, recDataPath, recPath, recVersion); if (resultMap.size() == 0) { return; } // --------------------------------------------- // ? // --------------------------------------------- isResult = dispResult(out, resultMap); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (db != null) { db.close(); } File tmpDir = new File(tmpPath); if (tmpDir.exists()) { FileUtil.removeDir(tmpDir.getPath()); } } }
From source file:org.grouplens.lenskit.eval.metrics.predict.LastFmEvaluation.java
public static void main(String[] args) { String csvFile = "/home/user/Desktop/LastFM_exps/newresultssets/LastFM_1_of_5_20140613-1938.csv"; // String csvFile = "/home/user/Desktop/LastFM_exps/reresultset1of5fold/LastFM_1_of_5_20140606-0338.csv"; BufferedReader br = null;/*from ww w . j a v a 2 s .co m*/ String sCurrentLine = ""; String cvsSplitBy = ","; try { TreeMap<Integer, ArrayList<Double>> golden = new TreeMap<Integer, ArrayList<Double>>(); TreeMap<Integer, ArrayList<Double>> methodA = new TreeMap<Integer, ArrayList<Double>>(); TreeMap<Integer, ArrayList<Double>> methodB = new TreeMap<Integer, ArrayList<Double>>(); TreeMap<Integer, ArrayList<Double>> methodC = new TreeMap<Integer, ArrayList<Double>>(); br = new BufferedReader(new FileReader(csvFile)); // String header = br.readLine(); // System.out.println(header); while ((sCurrentLine = br.readLine()) != null) { String[] cols = sCurrentLine.split(cvsSplitBy); int userId = Integer.parseInt(cols[0]); double goldenValue = Double.parseDouble(cols[5]); double methodAValue = Double.parseDouble(cols[2]); double methodBValue = Double.parseDouble(cols[3]); double methodCValue = Double.parseDouble(cols[4]); //Actual Values TreeMap creation if (!golden.containsKey(userId)) { golden.put(userId, new ArrayList<Double>()); golden.get(userId).add(goldenValue); } else { golden.get(userId).add(goldenValue); } //Method A TreeMap creation if (!methodA.containsKey(userId)) { methodA.put(userId, new ArrayList<Double>()); methodA.get(userId).add(methodAValue); } else { methodA.get(userId).add(methodAValue); } //Method B TreeMap creation if (!methodB.containsKey(userId)) { methodB.put(userId, new ArrayList<Double>()); methodB.get(userId).add(methodBValue); } else { methodB.get(userId).add(methodBValue); } //Method C TreeMap creation if (!methodC.containsKey(userId)) { methodC.put(userId, new ArrayList<Double>()); methodC.get(userId).add(methodCValue); } else { methodC.get(userId).add(methodCValue); } } uniqueUsers = golden.size(); System.out.println("To synolo twn monadikwn xristwn sto dataset einai: " + uniqueUsers); nusers = 0; sumPearsonA = 0; sumPearsonB = 0; sumPearsonC = 0; for (Integer i : golden.keySet()) { Double[] ADoubles = new Double[methodA.get(i).size()]; methodA.get(i).toArray(ADoubles); Double[] BDoubles = new Double[methodB.get(i).size()]; methodB.get(i).toArray(BDoubles); Double[] CDoubles = new Double[methodC.get(i).size()]; methodC.get(i).toArray(CDoubles); Double[] goldenDoubles = new Double[golden.get(i).size()]; golden.get(i).toArray(goldenDoubles); if (goldenDoubles.length > 1) { corA = p.correlation(ArrayUtils.toPrimitive(ADoubles), ArrayUtils.toPrimitive(goldenDoubles)); corB = p.correlation(ArrayUtils.toPrimitive(BDoubles), ArrayUtils.toPrimitive(goldenDoubles)); corC = p.correlation(ArrayUtils.toPrimitive(CDoubles), ArrayUtils.toPrimitive(goldenDoubles)); if (Double.isNaN(corA)) { corA = 1; } if (Double.isNaN(corB)) { corB = 1; } if (Double.isNaN(corC)) { corC = 1; } sumPearsonA += corA; sumPearsonB += corB; sumPearsonC += corC; nusers++; // for (int arrayCounter = 0; arrayCounter < goldenDoubles.length; arrayCounter++) { // // sumA += ADoubles[arrayCounter]; // sumB += BDoubles[arrayCounter]; // sumC += CDoubles[arrayCounter]; // sumGolden += goldenDoubles[arrayCounter]; // // } // // avgA = sumA / ADoubles.length; // avgB = sumB / BDoubles.length; // avgC = sumC / CDoubles.length; // avgGolden = sumGolden / goldenDoubles.length; // System.out.println(sumPearsonA); // System.out.println(sumPearsonB); // System.out.println(sumPearsonC); // System.out.println(nusers); // System.out.println("----------------------------------"); // sumGolden = 0; sumA = 0; sumB = 0; sumC = 0; } else { System.out.println("User has only ONE rating!!!!! "); System.out.println("----------------------------------"); } } avgA = sumPearsonA / nusers; avgB = sumPearsonB / nusers; avgC = sumPearsonC / nusers; System.out.println(avgA); System.out.println(avgB); System.out.println(avgC); System.out.println("----------------------------------"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } System.out.println("Done"); }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step8GoldDataAggregator.java
public static void main(String[] args) throws Exception { String inputDir = args[0] + "/"; // output dir File outputDir = new File(args[1]); File turkersConfidence = new File(args[2]); if (outputDir.exists()) { outputDir.delete();//from w w w. jav a 2 s .c o m } outputDir.mkdir(); List<String> annotatorsIDs = new ArrayList<>(); // for (File f : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) { // QueryResultContainer queryResultContainer = QueryResultContainer // .fromXML(FileUtils.readFileToString(f, "utf-8")); // for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { // for (QueryResultContainer.MTurkRelevanceVote relevanceVote : rankedResults.mTurkRelevanceVotes) { // if (!annotatorsIDs.contains(relevanceVote.turkID)) // annotatorsIDs.add(relevanceVote.turkID); // } // } // } HashMap<String, Integer> countVotesForATurker = new HashMap<>(); // creates temporary file with format for mace // Hashmap annotations: key is the id of a document and a sentence // Value is an array votes[] of turkers decisions: true or false (relevant or not) // the length of this array equals the number of annotators in List<String> annotatorsIDs. // If an annotator worked on the task his decision is written in the array otherwise the value is NULL // key: queryID + clueWebID + sentenceID // value: true and false annotations TreeMap<String, Annotations> annotations = new TreeMap<>(); for (File f : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); System.out.println("Reading " + f.getName()); for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { String documentID = rankedResults.clueWebID; for (QueryResultContainer.MTurkRelevanceVote relevanceVote : rankedResults.mTurkRelevanceVotes) { Integer turkerID; if (!annotatorsIDs.contains(relevanceVote.turkID)) { annotatorsIDs.add(relevanceVote.turkID); turkerID = annotatorsIDs.size() - 1; } else { turkerID = annotatorsIDs.indexOf(relevanceVote.turkID); } Integer count = countVotesForATurker.get(relevanceVote.turkID); if (count == null) { count = 0; } count++; countVotesForATurker.put(relevanceVote.turkID, count); String id; List<Integer> trueVotes; List<Integer> falseVotes; for (QueryResultContainer.SingleSentenceRelevanceVote singleSentenceRelevanceVote : relevanceVote.singleSentenceRelevanceVotes) if (!"".equals(singleSentenceRelevanceVote.sentenceID)) { id = f.getName() + "_" + documentID + "_" + singleSentenceRelevanceVote.sentenceID; Annotations turkerVotes = annotations.get(id); if (turkerVotes == null) { trueVotes = new ArrayList<>(); falseVotes = new ArrayList<>(); turkerVotes = new Annotations(trueVotes, falseVotes); } trueVotes = turkerVotes.trueAnnotations; falseVotes = turkerVotes.falseAnnotations; if ("true".equals(singleSentenceRelevanceVote.relevant)) { // votes[turkerID] = true; trueVotes.add(turkerID); } else if ("false".equals(singleSentenceRelevanceVote.relevant)) { // votes[turkerID] = false; falseVotes.add(turkerID); } else { throw new IllegalStateException("Annotation value of sentence " + singleSentenceRelevanceVote.sentenceID + " in " + rankedResults.clueWebID + " equals " + singleSentenceRelevanceVote.relevant); } try { int allVotesCount = trueVotes.size() + falseVotes.size(); if (allVotesCount > 5) { System.err.println(id + " doesn't have 5 annotators: true: " + trueVotes.size() + " false: " + falseVotes.size()); // nasty hack, we're gonna strip some data; true votes first /* we can't do that, it breaks something down the line int toRemove = allVotesCount - 5; if (trueVotes.size() >= toRemove) { trueVotes = trueVotes .subList(0, trueVotes.size() - toRemove); } else if ( falseVotes.size() >= toRemove) { falseVotes = falseVotes .subList(0, trueVotes.size() - toRemove); } */ System.err.println("Adjusted: " + id + " doesn't have 5 annotators: true: " + trueVotes.size() + " false: " + falseVotes.size()); } } catch (IllegalStateException e) { e.printStackTrace(); } turkerVotes.trueAnnotations = trueVotes; turkerVotes.falseAnnotations = falseVotes; annotations.put(id, turkerVotes); } else { throw new IllegalStateException( "Empty Sentence ID in " + f.getName() + " for turker " + turkerID); } } } } File tmp = printHashMap(annotations, annotatorsIDs.size()); String file = TEMP_DIR + "/" + tmp.getName(); MACE.main(new String[] { "--prefix", file }); //gets the keys of the documents and sentences ArrayList<String> lines = (ArrayList<String>) FileUtils.readLines(new File(file + ".prediction")); int i = 0; TreeMap<String, TreeMap<String, ArrayList<HashMap<String, String>>>> ids = new TreeMap<>(); ArrayList<HashMap<String, String>> sentences; if (lines.size() != annotations.size()) { throw new IllegalStateException( "The size of prediction file is " + lines.size() + "but expected " + annotations.size()); } for (Map.Entry entry : annotations.entrySet()) { //1001.xml_clueweb12-1905wb-13-07360_8783 String key = (String) entry.getKey(); String[] IDs = key.split("_"); if (IDs.length > 2) { String queryID = IDs[0]; String clueWebID = IDs[1]; String sentenceID = IDs[2]; TreeMap<String, ArrayList<HashMap<String, String>>> clueWebIDs = ids.get(queryID); if (clueWebIDs == null) { clueWebIDs = new TreeMap<>(); } sentences = clueWebIDs.get(clueWebID); if (sentences == null) { sentences = new ArrayList<>(); } HashMap<String, String> sentence = new HashMap<>(); sentence.put(sentenceID, lines.get(i)); sentences.add(sentence); clueWebIDs.put(clueWebID, sentences); ids.put(queryID, clueWebIDs); } else { throw new IllegalStateException("Wrong ID " + key); } i++; } for (Map.Entry entry : ids.entrySet()) { TreeMap<Integer, String> value = (TreeMap<Integer, String>) entry.getValue(); String queryID = (String) entry.getKey(); QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(new File(inputDir, queryID), "utf-8")); for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { for (Map.Entry val : value.entrySet()) { String clueWebID = (String) val.getKey(); if (clueWebID.equals(rankedResults.clueWebID)) { List<QueryResultContainer.SingleSentenceRelevanceVote> goldEstimatedLabels = new ArrayList<>(); List<QueryResultContainer.SingleSentenceRelevanceVote> turkersVotes = new ArrayList<>(); int size = 0; int hitSize = 0; String hitID = ""; for (QueryResultContainer.MTurkRelevanceVote vote : rankedResults.mTurkRelevanceVotes) { if (!hitID.equals(vote.hitID)) { hitID = vote.hitID; hitSize = vote.singleSentenceRelevanceVotes.size(); size = size + hitSize; turkersVotes.addAll(vote.singleSentenceRelevanceVotes); } else { if (vote.singleSentenceRelevanceVotes.size() != hitSize) { hitSize = vote.singleSentenceRelevanceVotes.size(); size = size + hitSize; turkersVotes.addAll(vote.singleSentenceRelevanceVotes); } } } ArrayList<HashMap<String, String>> sentenceList = (ArrayList<HashMap<String, String>>) val .getValue(); if (sentenceList.size() != turkersVotes.size()) { try { throw new IllegalStateException("Expected size of annotations is " + turkersVotes.size() + "but found " + sentenceList.size() + " for document " + rankedResults.clueWebID + " in " + queryID); } catch (IllegalStateException ex) { ex.printStackTrace(); } } for (QueryResultContainer.SingleSentenceRelevanceVote s : turkersVotes) { String valSentence = null; for (HashMap<String, String> anno : sentenceList) { if (anno.keySet().contains(s.sentenceID)) { valSentence = anno.get(s.sentenceID); } } QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = s.sentenceID; if (("false").equals(valSentence)) { singleSentenceVote.relevant = "false"; } else if (("true").equals(valSentence)) { singleSentenceVote.relevant = "true"; } else { throw new IllegalStateException("Annotation value of sentence " + singleSentenceVote.sentenceID + " equals " + val.getValue()); } goldEstimatedLabels.add(singleSentenceVote); } rankedResults.goldEstimatedLabels = goldEstimatedLabels; } } } File outputFile = new File(outputDir, queryID); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } ArrayList<String> annotators = (ArrayList<String>) FileUtils.readLines(new File(file + ".competence")); FileWriter fileWriter; StringBuilder sb = new StringBuilder(); for (int j = 0; j < annotatorsIDs.size(); j++) { String[] s = annotators.get(0).split("\t"); Float score = Float.parseFloat(s[j]); String turkerID = annotatorsIDs.get(j); System.out.println(turkerID + " " + score + " " + countVotesForATurker.get(turkerID)); sb.append(turkerID).append(" ").append(score).append(" ").append(countVotesForATurker.get(turkerID)) .append("\n"); } fileWriter = new FileWriter(turkersConfidence); fileWriter.append(sb.toString()); fileWriter.close(); }