List of usage examples for java.util Map containsKey
boolean containsKey(Object key);
From source file:net.ontopia.topicmaps.cmdlineutils.rdbms.RDBMSIndexTool.java
public static void main(String[] argv) throws Exception { // Initialize logging CmdlineUtils.initializeLogging();/* ww w .j av a2 s . com*/ // Register logging options CmdlineOptions options = new CmdlineOptions("RDBMSIndexTool", argv); CmdlineUtils.registerLoggingOptions(options); // Parse command line options try { options.parse(); } catch (CmdlineOptions.OptionsException e) { System.err.println("Error: " + e.getMessage()); System.exit(1); } // Get command line arguments String[] args = options.getArguments(); if (args.length != 1) { usage(); System.exit(3); } // load database schema project ClassLoader cloader = RDBMSIndexTool.class.getClassLoader(); InputStream istream = cloader.getResourceAsStream("net/ontopia/topicmaps/impl/rdbms/config/schema.xml"); Project dbp = DatabaseProjectReader.loadProject(istream); // open database connection String propfile = args[0]; ConnectionFactoryIF cf = new DefaultConnectionFactory(PropertyUtils.loadProperties(new File(propfile)), true); Connection conn = cf.requestConnection(); try { DatabaseMetaData dbm = conn.getMetaData(); boolean downcase = dbm.storesLowerCaseIdentifiers(); Map extra_indexes = new TreeMap(); Map missing_indexes = new TreeMap(); Iterator tables = dbp.getTables().iterator(); while (tables.hasNext()) { Table table = (Table) tables.next(); String table_name = (downcase ? table.getName().toLowerCase() : table.getName()); //! System.out.println("T :" + table_name); // get primary keys from database Map pkeys = getPrimaryKeys(table_name, dbm); // get indexes from database Map indexes = getIndexes(table_name, dbm); Map dindexes = new HashMap(); if (table.getPrimaryKeys() != null) { String pkey = table_name + '(' + StringUtils.join(table.getPrimaryKeys(), ',') + ')'; if (!pkeys.containsKey(pkey)) System.out.println("PKM: " + pkey); } Iterator iter = table.getIndexes().iterator(); while (iter.hasNext()) { Index index = (Index) iter.next(); String i = table_name + '(' + StringUtils.join(index.getColumns(), ',') + ')'; String index_name = (downcase ? index.getName().toLowerCase() : index.getName()); dindexes.put(i, index_name); } Set extra = new HashSet(indexes.keySet()); extra.removeAll(dindexes.keySet()); extra.removeAll(pkeys.keySet()); if (!extra.isEmpty()) { Iterator i = extra.iterator(); while (i.hasNext()) { Object k = i.next(); extra_indexes.put(k, indexes.get(k)); } } Set missing = new HashSet(dindexes.keySet()); missing.addAll(pkeys.keySet()); missing.removeAll(indexes.keySet()); if (!missing.isEmpty()) { Iterator i = missing.iterator(); while (i.hasNext()) { Object k = i.next(); missing_indexes.put(k, dindexes.get(k)); } } } if (!extra_indexes.isEmpty()) System.out.println("/* --- Extra indexes ----------------------------------------- */"); Iterator eiter = extra_indexes.keySet().iterator(); while (eiter.hasNext()) { Object k = eiter.next(); System.out.println("drop index " + extra_indexes.get(k) + "; /* " + k + " */"); } if (!missing_indexes.isEmpty()) System.out.println("/* --- Missing indexes---------------------------------------- */"); Iterator miter = missing_indexes.keySet().iterator(); while (miter.hasNext()) { Object k = miter.next(); System.out.println("create index " + missing_indexes.get(k) + " on " + k + ";"); } } finally { conn.rollback(); conn.close(); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step5bGoldLabelStatistics.java
@SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { String inputDir = args[0];/*w w w.j a v a2 s . c o m*/ Collection<File> files = IOHelper.listXmlFiles(new File(inputDir)); int totalPairsWithReasonSameAsGold = 0; DescriptiveStatistics ds = new DescriptiveStatistics(); DescriptiveStatistics statsPerTopic = new DescriptiveStatistics(); Map<String, Integer> goldDataDistribution = new HashMap<>(); int totalGoldReasonTokens = 0; for (File file : files) { List<AnnotatedArgumentPair> argumentPairs = (List<AnnotatedArgumentPair>) XStreamTools.getXStream() .fromXML(file); int pairsPerTopicCounter = 0; for (AnnotatedArgumentPair annotatedArgumentPair : argumentPairs) { String goldLabel = annotatedArgumentPair.getGoldLabel(); int sameInOnePair = 0; if (goldLabel != null) { if (!goldDataDistribution.containsKey(goldLabel)) { goldDataDistribution.put(goldLabel, 0); } goldDataDistribution.put(goldLabel, goldDataDistribution.get(goldLabel) + 1); // get gold reason statistics for (AnnotatedArgumentPair.MTurkAssignment assignment : annotatedArgumentPair.mTurkAssignments) { String label = assignment.getValue(); if (goldLabel.equals(label)) { sameInOnePair++; totalGoldReasonTokens += assignment.getReason().split("\\W+").length; } } pairsPerTopicCounter++; } ds.addValue(sameInOnePair); totalPairsWithReasonSameAsGold += sameInOnePair; } statsPerTopic.addValue(pairsPerTopicCounter); } System.out.println("Total reasons matching gold " + totalPairsWithReasonSameAsGold); System.out.println(ds); int totalPairs = 0; for (Integer pairs : goldDataDistribution.values()) { totalPairs += pairs; } System.out.println(goldDataDistribution); System.out.println(goldDataDistribution.values()); System.out.println("Total pairs: " + totalPairs); System.out.println("Stats per topic: " + statsPerTopic); System.out.println("Total gold reason tokens: " + totalGoldReasonTokens); }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step7CollectMTurkResults.java
public static void main(String[] args) throws Exception { // input dir - list of xml query containers // /home/user-ukp/research/data/dip/wp1-documents/step4-boiler-plate/ File inputDir = new File(args[0] + "/"); // MTurk result file // output dir File outputDir = new File(args[2]); if (!outputDir.exists()) { outputDir.mkdirs();//w w w .j a va 2 s . c o m } // Folder with success files File mturkSuccessDir = new File(args[1]); Collection<File> files = FileUtils.listFiles(mturkSuccessDir, new String[] { "result" }, false); if (files.isEmpty()) { throw new IllegalArgumentException("Input folder is empty. " + mturkSuccessDir); } HashMap<String, List<MTurkAnnotation>> mturkAnnotations = new HashMap<>(); // parsing all CSV files for (File mturkCSVResultFile : files) { System.out.println("Parsing " + mturkCSVResultFile.getName()); MTurkOutputReader outputReader = new MTurkOutputReader( new HashSet<>(Arrays.asList("annotation", "workerid")), mturkCSVResultFile); // for fixing broken data input: for each hit, collect all sentence IDs Map<String, SortedSet<String>> hitSentences = new HashMap<>(); // first iteration: collect the sentences for (Map<String, String> record : outputReader) { String hitID = record.get("hitid"); if (!hitSentences.containsKey(hitID)) { hitSentences.put(hitID, new TreeSet<>()); } String relevantSentences = record.get("Answer.relevant_sentences"); String irrelevantSentences = record.get("Answer.irrelevant_sentences"); if (relevantSentences != null) { hitSentences.get(hitID).addAll(Arrays.asList(relevantSentences.split(","))); } if (irrelevantSentences != null) { hitSentences.get(hitID).addAll(Arrays.asList(irrelevantSentences.split(","))); } } // and now second iteration for (Map<String, String> record : outputReader) { String hitID = record.get("hitid"); String annotatorID = record.get("workerid"); String acceptTime = record.get("assignmentaccepttime"); String submitTime = record.get("assignmentsubmittime"); String relevantSentences = record.get("Answer.relevant_sentences"); String irrelevantSentences = record.get("Answer.irrelevant_sentences"); String reject = record.get("reject"); String filename[]; String comment; String clueWeb; String[] relevant = {}; String[] irrelevant = {}; filename = record.get("annotation").split("_"); String fileXml = filename[0]; clueWeb = filename[1].trim(); comment = record.get("Answer.comment"); if (relevantSentences != null) { relevant = relevantSentences.split(","); } if (irrelevantSentences != null) { irrelevant = irrelevantSentences.split(","); } // sanitizing data: if both relevant and irrelevant are empty, that's a bug // we're gonna look up all sentences from this HIT and treat this assignment // as if there were only irrelevant ones if (relevant.length == 0 && irrelevant.length == 0) { SortedSet<String> strings = hitSentences.get(hitID); irrelevant = new String[strings.size()]; strings.toArray(irrelevant); } if (reject != null) { System.out.println(" HIT " + hitID + " annotated by " + annotatorID + " was rejected "); } else { /* // relevant sentences is a comma-delimited string, // this regular expression is rather strange // it must contain digits, it might be that there is only one space or a comma or some other char // digits are the sentence ids. if relevant sentences do not contain digits then it is wrong if (relevantSentences.matches("^\\D*$") && irrelevantSentences.matches("^\\D*$")) { try { throw new IllegalStateException( "No annotations found for HIT " + hitID + " in " + fileXml + " for document " + clueWeb); } catch (IllegalStateException ex) { ex.printStackTrace(); } } */ MTurkAnnotation mturkAnnotation; try { mturkAnnotation = new MTurkAnnotation(hitID, annotatorID, acceptTime, submitTime, comment, clueWeb, relevant, irrelevant); } catch (IllegalArgumentException ex) { throw new IllegalArgumentException("Record: " + record, ex); } List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileXml); if (listOfAnnotations == null) { listOfAnnotations = new ArrayList<>(); } listOfAnnotations.add(mturkAnnotation); mturkAnnotations.put(fileXml, listOfAnnotations); } } // parser.close(); } // Debugging: output number of HITs of a query System.out.println("Accepted HITs for a query:"); for (Map.Entry e : mturkAnnotations.entrySet()) { ArrayList<MTurkAnnotation> a = (ArrayList<MTurkAnnotation>) e.getValue(); System.out.println(e.getKey() + " " + a.size()); } for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); String fileName = f.getName(); List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileName); if (listOfAnnotations == null || listOfAnnotations.isEmpty()) { throw new IllegalStateException("No annotations for " + f.getName()); } for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { for (MTurkAnnotation mtAnnotation : listOfAnnotations) { String clueWeb = mtAnnotation.clueWeb; if (rankedResults.clueWebID.equals(clueWeb)) { List<QueryResultContainer.MTurkRelevanceVote> mTurkRelevanceVotes = rankedResults.mTurkRelevanceVotes; QueryResultContainer.MTurkRelevanceVote relevanceVote = new QueryResultContainer.MTurkRelevanceVote(); String annotatorID = mtAnnotation.annotatorID; String hitID = mtAnnotation.hitID; String acceptTime = mtAnnotation.acceptTime; String submitTime = mtAnnotation.submitTime; String comment = mtAnnotation.comment; String[] relevant = mtAnnotation.relevant; String[] irrelevant = mtAnnotation.irrelevant; relevanceVote.turkID = annotatorID.trim(); relevanceVote.hitID = hitID.trim(); relevanceVote.acceptTime = acceptTime.trim(); relevanceVote.submitTime = submitTime.trim(); relevanceVote.comment = comment != null ? comment.trim() : null; if (relevant.length == 0 && irrelevant.length == 0) { try { throw new IllegalStateException("the length of the annotations is 0" + rankedResults.clueWebID + " for HIT " + relevanceVote.hitID); } catch (IllegalStateException e) { e.printStackTrace(); } } for (String r : relevant) { String sentenceId = r.trim(); if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) { QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = sentenceId; singleSentenceVote.relevant = "true"; relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote); } } for (String r : irrelevant) { String sentenceId = r.trim(); if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) { QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = sentenceId; singleSentenceVote.relevant = "false"; relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote); } } mTurkRelevanceVotes.add(relevanceVote); } } } File outputFile = new File(outputDir, f.getName()); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } }
From source file:com.act.lcms.db.io.PrintConstructInfo.java
public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build());/*w ww. j a v a 2s . com*/ } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { System.err.format("Argument parsing failed: %s\n", e.getMessage()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } File lcmsDir = new File(cl.getOptionValue(OPTION_DIRECTORY)); if (!lcmsDir.isDirectory()) { System.err.format("File at %s is not a directory\n", lcmsDir.getAbsolutePath()); HELP_FORMATTER.printHelp(LoadPlateCompositionIntoDB.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } try (DB db = DB.openDBFromCLI(cl)) { System.out.print("Loading/updating LCMS scan files into DB\n"); ScanFile.insertOrUpdateScanFilesInDirectory(db, lcmsDir); String construct = cl.getOptionValue(OPTION_CONSTRUCT); List<LCMSWell> lcmsWells = LCMSWell.getInstance().getByConstructID(db, construct); Collections.sort(lcmsWells, new Comparator<LCMSWell>() { @Override public int compare(LCMSWell o1, LCMSWell o2) { return o1.getId().compareTo(o2.getId()); } }); Set<String> uniqueMSIDs = new HashSet<>(); Map<Integer, Plate> platesById = new HashMap<>(); System.out.format("\n\n-- Construct %s --\n\n", construct); List<ChemicalAssociatedWithPathway> pathwayChems = ChemicalAssociatedWithPathway.getInstance() .getChemicalsAssociatedWithPathwayByConstructId(db, construct); System.out.print("Chemicals associated with pathway:\n"); System.out.format(" %-8s%-15s%-45s\n", "index", "kind", "chemical"); for (ChemicalAssociatedWithPathway chem : pathwayChems) { System.out.format(" %-8d%-15s%-45s\n", chem.getIndex(), chem.getKind(), chem.getChemical()); } System.out.print("\nLCMS wells:\n"); System.out.format(" %-15s%-6s%-15s%-15s%-15s\n", "barcode", "well", "msid", "fed", "lcms_count"); for (LCMSWell well : lcmsWells) { uniqueMSIDs.add(well.getMsid()); Plate p = platesById.get(well.getPlateId()); if (p == null) { // TODO: migrate Plate to be a subclass of BaseDBModel. p = Plate.getPlateById(db, well.getPlateId()); platesById.put(p.getId(), p); } String chem = well.getChemical(); List<ScanFile> scanFiles = ScanFile.getScanFileByPlateIDRowAndColumn(db, p.getId(), well.getPlateRow(), well.getPlateColumn()); System.out.format(" %-15s%-6s%-15s%-15s%-15d\n", p.getBarcode(), well.getCoordinatesString(), well.getMsid(), chem == null || chem.isEmpty() ? "--" : chem, scanFiles.size()); System.out.flush(); } List<Integer> plateIds = Arrays.asList(platesById.keySet().toArray(new Integer[platesById.size()])); Collections.sort(plateIds); System.out.print("\nAppears in plates:\n"); for (Integer id : plateIds) { Plate p = platesById.get(id); System.out.format(" %s: %s\n", p.getBarcode(), p.getName()); } List<String> msids = Arrays.asList(uniqueMSIDs.toArray(new String[uniqueMSIDs.size()])); Collections.sort(msids); System.out.format("\nMSIDS: %s\n", StringUtils.join(msids, ", ")); Set<String> availableNegativeControls = new HashSet<>(); for (Map.Entry<Integer, Plate> entry : platesById.entrySet()) { List<LCMSWell> wells = LCMSWell.getInstance().getByPlateId(db, entry.getKey()); for (LCMSWell well : wells) { if (!construct.equals(well.getComposition())) { availableNegativeControls.add(well.getComposition()); } } } // Print available standards for each step w/ plate barcodes and coordinates. System.out.format("\nAvailable Standards:\n"); Map<Integer, Plate> plateCache = new HashMap<>(); for (ChemicalAssociatedWithPathway chem : pathwayChems) { List<StandardWell> matchingWells = StandardWell.getInstance().getStandardWellsByChemical(db, chem.getChemical()); for (StandardWell well : matchingWells) { if (!plateCache.containsKey(well.getPlateId())) { Plate p = Plate.getPlateById(db, well.getPlateId()); plateCache.put(p.getId(), p); } } Map<Integer, List<StandardWell>> standardWellsByPlateId = new HashMap<>(); for (StandardWell well : matchingWells) { List<StandardWell> plateWells = standardWellsByPlateId.get(well.getPlateId()); if (plateWells == null) { plateWells = new ArrayList<>(); standardWellsByPlateId.put(well.getPlateId(), plateWells); } plateWells.add(well); } List<Pair<String, Integer>> plateBarcodes = new ArrayList<>(plateCache.size()); for (Plate p : plateCache.values()) { if (p.getBarcode() == null) { plateBarcodes.add(Pair.of("(no barcode)", p.getId())); } else { plateBarcodes.add(Pair.of(p.getBarcode(), p.getId())); } } Collections.sort(plateBarcodes); System.out.format(" %s:\n", chem.getChemical()); for (Pair<String, Integer> barcodePair : plateBarcodes) { // TODO: hoist this whole sorting/translation step into a utility class. List<StandardWell> wells = standardWellsByPlateId.get(barcodePair.getRight()); if (wells == null) { // Don't print plates that don't apply to this chemical, which can happen because we're caching the plates. continue; } Collections.sort(wells, new Comparator<StandardWell>() { @Override public int compare(StandardWell o1, StandardWell o2) { int c = o1.getPlateRow().compareTo(o2.getPlateRow()); if (c != 0) return c; return o1.getPlateColumn().compareTo(o2.getPlateColumn()); } }); List<String> descriptions = new ArrayList<>(wells.size()); for (StandardWell well : wells) { descriptions.add(String.format("%s in %s%s", well.getCoordinatesString(), well.getMedia(), well.getConcentration() == null ? "" : String.format(" c. %f", well.getConcentration()))); } System.out.format(" %s: %s\n", barcodePair.getLeft(), StringUtils.join(descriptions, ", ")); } } List<String> negativeControlStrains = Arrays .asList(availableNegativeControls.toArray(new String[availableNegativeControls.size()])); Collections.sort(negativeControlStrains); System.out.format("\nAvailable negative controls: %s\n", StringUtils.join(negativeControlStrains, ",")); System.out.print("\n----------\n"); System.out.print("\n\n"); } }
From source file:mase.MaseEvolve.java
public static void main(String[] args) throws Exception { File outDir = getOutDir(args); boolean force = Arrays.asList(args).contains(FORCE); if (!outDir.exists()) { outDir.mkdirs();/*from www . j a v a 2s .co m*/ } else if (!force) { System.out.println("Folder already exists: " + outDir.getAbsolutePath() + ". Waiting 5 sec."); try { Thread.sleep(5000); } catch (InterruptedException ex) { } } // Get config file Map<String, String> pars = readParams(args); // Copy config to outdir try { File rawConfig = writeConfig(args, pars, outDir, false); File destiny = new File(outDir, DEFAULT_CONFIG); destiny.delete(); FileUtils.moveFile(rawConfig, new File(outDir, DEFAULT_CONFIG)); } catch (Exception ex) { ex.printStackTrace(); } // JBOT INTEGRATION: copy jbot config file to the outdir // Does nothing when jbot is not used if (pars.containsKey("problem.jbot-config")) { File jbot = new File(pars.get("problem.jbot-config")); FileUtils.copyFile(jbot, new File(outDir, jbot.getName())); } // Write config to system temp file File config = writeConfig(args, pars, outDir, true); // Launch launchExperiment(config); }
From source file:de.mycrobase.jcloudapp.Main.java
public static void main(String[] args) throws Exception { try {/*from w w w . j av a2s.c o m*/ // borrowed from https://github.com/cmur2/gloudapp ImageNormal = ImageIO.read(Main.class.getResourceAsStream("gloudapp.png")); ImageWorking = ImageIO.read(Main.class.getResourceAsStream("gloudapp_working.png")); IconNormal = new ImageIcon(ImageNormal.getScaledInstance(64, 64, Image.SCALE_SMOOTH)); IconLarge = new ImageIcon(ImageNormal); } catch (IOException ex) { showErrorDialog("Could not load images!\n" + ex); System.exit(1); } URI serviceUrl = URI.create("http://my.cl.ly:80/"); File storage = getStorageFile(); if (storage.exists() && storage.isFile()) { Yaml yaml = new Yaml(); try { Map<String, String> m = (Map<String, String>) yaml.load(new FileInputStream(storage)); // avoid NPE by implicitly assuming the presence of a service URL if (m.containsKey("service_url")) { serviceUrl = URI.create(m.get("service_url")); } } catch (IOException ex) { showErrorDialog("Loading settings from .cloudapp-cli failed: " + ex); System.exit(1); } } Main app = new Main(serviceUrl); app.login(args); app.run(); }
From source file:com.termmed.sampling.ConceptsWithMoreThanThreeRoleGroups.java
/** * The main method./*from w w w.ja v a 2s .c o m*/ * * @param args the arguments * @throws Exception the exception */ public static void main(String[] args) throws Exception { System.out.println("Starting..."); Map<String, Set<String>> groupsMap = new HashMap<String, Set<String>>(); File relsFile = new File( "/Users/alo/Downloads/SnomedCT_RF2Release_INT_20160131-1/Snapshot/Terminology/sct2_Relationship_Snapshot_INT_20160131.txt"); BufferedReader br2 = new BufferedReader(new FileReader(relsFile)); String line2; int count2 = 0; while ((line2 = br2.readLine()) != null) { // process the line. count2++; if (count2 % 10000 == 0) { //System.out.println(count2); } List<String> columns = Arrays.asList(line2.split("\t", -1)); if (columns.size() >= 6) { if (columns.get(2).equals("1") && !columns.get(6).equals("0")) { if (!groupsMap.containsKey(columns.get(4))) { groupsMap.put(columns.get(4), new HashSet<String>()); } groupsMap.get(columns.get(4)).add(columns.get(6)); } } } System.out.println("Relationship groups loaded"); Gson gson = new Gson(); System.out.println("Reading JSON 1"); File crossoverFile1 = new File("/Users/alo/Downloads/crossover_role_to_group.json"); String contents = FileUtils.readFileToString(crossoverFile1, "utf-8"); Type collectionType = new TypeToken<Collection<ControlResultLine>>() { }.getType(); List<ControlResultLine> lineObject = gson.fromJson(contents, collectionType); Set<String> crossovers1 = new HashSet<String>(); for (ControlResultLine loopResult : lineObject) { crossovers1.add(loopResult.conceptId); } System.out.println("Crossovers 1 loaded, " + lineObject.size() + " Objects"); System.out.println("Reading JSON 2"); File crossoverFile2 = new File("/Users/alo/Downloads/crossover_group_to_group.json"); String contents2 = FileUtils.readFileToString(crossoverFile2, "utf-8"); List<ControlResultLine> lineObject2 = gson.fromJson(contents2, collectionType); Set<String> crossovers2 = new HashSet<String>(); for (ControlResultLine loopResult : lineObject2) { crossovers2.add(loopResult.conceptId); } System.out.println("Crossovers 2 loaded, " + lineObject2.size() + " Objects"); Set<String> foundConcepts = new HashSet<String>(); int count3 = 0; BufferedWriter writer = new BufferedWriter( new FileWriter(new File("ConceptsWithMoreThanThreeRoleGroups.csv"))); ; for (String loopConcept : groupsMap.keySet()) { if (groupsMap.get(loopConcept).size() > 3) { writer.write(loopConcept); writer.newLine(); foundConcepts.add(loopConcept); count3++; } } writer.close(); System.out.println("Found " + foundConcepts.size() + " concepts"); int countCrossover1 = 0; for (String loopConcept : foundConcepts) { if (crossovers1.contains(loopConcept)) { countCrossover1++; } } System.out.println(countCrossover1 + " are present in crossover_role_to_group"); int countCrossover2 = 0; for (String loopConcept : foundConcepts) { if (crossovers2.contains(loopConcept)) { countCrossover2++; } } System.out.println(countCrossover2 + " are present in crossover_group_to_group"); System.out.println("Done"); }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step4MTurkOutputCollector.java
@SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { String inputDirWithArgumentPairs = args[0]; File[] resultFiles;/* ww w .ja va2 s . co m*/ if (args[1].contains("*")) { File path = new File(args[1]); File directory = path.getParentFile(); String regex = path.getName().replaceAll("\\*", ""); List<File> files = new ArrayList<>(FileUtils.listFiles(directory, new String[] { regex }, false)); resultFiles = new File[files.size()]; for (int i = 0; i < files.size(); i++) { resultFiles[i] = files.get(i); } } else { // result file is a comma-separated list of CSV files from MTurk String[] split = args[1].split(","); resultFiles = new File[split.length]; for (int i = 0; i < split.length; i++) { resultFiles[i] = new File(split[i]); } } File outputDir = new File(args[2]); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { throw new IOException("Cannot create directory " + outputDir); } } // error if output folder not empty to prevent any confusion by mixing files if (!FileUtils.listFiles(outputDir, null, false).isEmpty()) { throw new IllegalArgumentException("Output dir " + outputDir + " is not empty"); } // collected assignments with empty reason for rejections Set<String> assignmentsWithEmptyReason = new HashSet<>(); // parse with first line as header MTurkOutputReader mTurkOutputReader = new MTurkOutputReader(resultFiles); Collection<File> files = FileUtils.listFiles(new File(inputDirWithArgumentPairs), new String[] { "xml" }, false); if (files.isEmpty()) { throw new IOException("No xml files found in " + inputDirWithArgumentPairs); } // statistics: how many hits with how many assignments ; hit ID / assignments Map<String, Map<String, Integer>> assignmentsPerHits = new HashMap<>(); // collect accept/reject statistics for (Map<String, String> record : mTurkOutputReader) { boolean wasRejected = "Rejected".equals(record.get("assignmentstatus")); String hitID = record.get("hitid"); String hitTypeId = record.get("hittypeid"); if (!wasRejected) { // update statistics if (!assignmentsPerHits.containsKey(hitTypeId)) { assignmentsPerHits.put(hitTypeId, new HashMap<String, Integer>()); } if (!assignmentsPerHits.get(hitTypeId).containsKey(hitID)) { assignmentsPerHits.get(hitTypeId).put(hitID, 0); } assignmentsPerHits.get(hitTypeId).put(hitID, assignmentsPerHits.get(hitTypeId).get(hitID) + 1); } } // statistics: how many hits with how many assignments ; hit ID / assignments Map<String, Integer> approvedAssignmentsPerHit = new HashMap<>(); Map<String, Integer> rejectedAssignmentsPerHit = new HashMap<>(); // collect accept/reject statistics for (Map<String, String> record : mTurkOutputReader) { boolean approved = "Approved".equals(record.get("assignmentstatus")); boolean rejected = "Rejected".equals(record.get("assignmentstatus")); String hitID = record.get("hitid"); if (approved) { // update statistics if (!approvedAssignmentsPerHit.containsKey(hitID)) { approvedAssignmentsPerHit.put(hitID, 0); } approvedAssignmentsPerHit.put(hitID, approvedAssignmentsPerHit.get(hitID) + 1); } else if (rejected) { // update statistics if (!rejectedAssignmentsPerHit.containsKey(hitID)) { rejectedAssignmentsPerHit.put(hitID, 0); } rejectedAssignmentsPerHit.put(hitID, rejectedAssignmentsPerHit.get(hitID) + 1); } else { throw new IllegalStateException( "Unknown state: " + record.get("assignmentstatus") + " HITID: " + hitID); } } // System.out.println("Approved: " + approvedAssignmentsPerHit); // System.out.println("Rejected: " + rejectedAssignmentsPerHit); System.out.println("Approved (values): " + new HashSet<>(approvedAssignmentsPerHit.values())); System.out.println("Rejected (values): " + new HashSet<>(rejectedAssignmentsPerHit.values())); // rejection statistics int totalRejected = 0; for (Map.Entry<String, Integer> rejectionEntry : rejectedAssignmentsPerHit.entrySet()) { totalRejected += rejectionEntry.getValue(); } System.out.println("Total rejections: " + totalRejected); /* // generate .success files for adding more annotations for (File resultFile : resultFiles) { String hitTypeID = mTurkOutputReader.getHitTypeIdForFile().get(resultFile); // assignments for that hittypeid (= file) Map<String, Integer> assignments = assignmentsPerHits.get(hitTypeID); prepareUpdateHITsFiles(assignments, hitTypeID, resultFile); } */ int totalSavedPairs = 0; // load all previously prepared argument pairs for (File file : files) { List<ArgumentPair> argumentPairs = (List<ArgumentPair>) XStreamTools.getXStream().fromXML(file); List<AnnotatedArgumentPair> annotatedArgumentPairs = new ArrayList<>(); for (ArgumentPair argumentPair : argumentPairs) { AnnotatedArgumentPair annotatedArgumentPair = new AnnotatedArgumentPair(argumentPair); // is there such an answer? String key = "Answer." + argumentPair.getId(); // iterate only if there is such column to save time if (mTurkOutputReader.getColumnNames().contains(key)) { // now find the results for (Map<String, String> record : mTurkOutputReader) { if (record.containsKey(key)) { // extract the values AnnotatedArgumentPair.MTurkAssignment assignment = new AnnotatedArgumentPair.MTurkAssignment(); boolean wasRejected = "Rejected".equals(record.get("assignmentstatus")); // only non-rejected (if required) if (!wasRejected) { String hitID = record.get("hitid"); String workerID = record.get("workerid"); String assignmentId = record.get("assignmentid"); try { assignment.setAssignmentAcceptTime( DATE_FORMAT.parse(record.get("assignmentaccepttime"))); assignment.setAssignmentSubmitTime( DATE_FORMAT.parse(record.get("assignmentsubmittime"))); assignment.setHitComment(record.get("Answer.feedback")); assignment.setHitID(hitID); assignment.setTurkID(workerID); assignment.setAssignmentId(assignmentId); // and answer specific fields String valueRaw = record.get(key); // so far the label has had format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal // strip now only true label String label = valueRaw.split("_")[2]; assignment.setValue(label); String reason = record.get(key + "_reason"); // missing reason if (reason == null) { assignmentsWithEmptyReason.add(assignmentId); } else { assignment.setReason(reason); // get worker's stance String stanceRaw = record.get(key + "_stance"); if (stanceRaw != null) { // parse stance String stance = stanceRaw.split("_stance_")[1]; assignment.setWorkerStance(stance); } // we take maximal 5 assignments Collections.sort(annotatedArgumentPair.mTurkAssignments, new Comparator<AnnotatedArgumentPair.MTurkAssignment>() { @Override public int compare(AnnotatedArgumentPair.MTurkAssignment o1, AnnotatedArgumentPair.MTurkAssignment o2) { return o1.getAssignmentAcceptTime() .compareTo(o2.getAssignmentAcceptTime()); } }); if (annotatedArgumentPair.mTurkAssignments .size() < MAXIMUM_ASSIGNMENTS_PER_HIT) { annotatedArgumentPair.mTurkAssignments.add(assignment); } } } catch (IllegalArgumentException | NullPointerException ex) { System.err.println("Malformed annotations for HIT " + hitID + ", worker " + workerID + ", assignment " + assignmentId + "; " + ex.getMessage() + ", full record: " + record); } } } } } // and if there are some annotations, add it to the result set if (!annotatedArgumentPair.mTurkAssignments.isEmpty()) { annotatedArgumentPairs.add(annotatedArgumentPair); } } if (!annotatedArgumentPairs.isEmpty()) { File outputFile = new File(outputDir, file.getName()); XStreamTools.toXML(annotatedArgumentPairs, outputFile); System.out.println("Saved " + annotatedArgumentPairs.size() + " annotated pairs to " + outputFile); totalSavedPairs += annotatedArgumentPairs.size(); } } System.out.println("Total saved " + totalSavedPairs + " pairs"); // print assignments with empty reasons if (!assignmentsWithEmptyReason.isEmpty()) { System.out.println( "== Assignments with empty reason:\nassignmentIdToReject\tassignmentIdToRejectComment"); for (String assignmentId : assignmentsWithEmptyReason) { System.out.println( assignmentId + "\t\"Dear worker, you did not fill the required field with a reason.\""); } } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step7aLearningDataProducer.java
@SuppressWarnings("unchecked") public static void main(String[] args) throws IOException { String inputDir = args[0];/*from www .j a v a 2 s. c om*/ File outputDir = new File(args[1]); if (!outputDir.exists()) { outputDir.mkdirs(); } Collection<File> files = IOHelper.listXmlFiles(new File(inputDir)); // for generating ConvArgStrict use this String prefix = "no-eq_DescendingScoreArgumentPairListSorter"; // for oversampling using graph transitivity properties use this // String prefix = "generated_no-eq_AscendingScoreArgumentPairListSorter"; Iterator<File> iterator = files.iterator(); while (iterator.hasNext()) { File file = iterator.next(); if (!file.getName().startsWith(prefix)) { iterator.remove(); } } int totalGoldPairsCounter = 0; Map<String, Integer> goldDataDistribution = new HashMap<>(); DescriptiveStatistics statsPerTopic = new DescriptiveStatistics(); int totalPairsWithReasonSameAsGold = 0; DescriptiveStatistics ds = new DescriptiveStatistics(); for (File file : files) { List<AnnotatedArgumentPair> argumentPairs = (List<AnnotatedArgumentPair>) XStreamTools.getXStream() .fromXML(file); int pairsPerTopicCounter = 0; String name = file.getName().replaceAll(prefix, "").replaceAll("\\.xml", ""); PrintWriter pw = new PrintWriter(new File(outputDir, name + ".csv"), "utf-8"); pw.println("#id\tlabel\ta1\ta2"); for (AnnotatedArgumentPair argumentPair : argumentPairs) { String goldLabel = argumentPair.getGoldLabel(); if (!goldDataDistribution.containsKey(goldLabel)) { goldDataDistribution.put(goldLabel, 0); } goldDataDistribution.put(goldLabel, goldDataDistribution.get(goldLabel) + 1); pw.printf(Locale.ENGLISH, "%s\t%s\t%s\t%s%n", argumentPair.getId(), goldLabel, multipleParagraphsToSingleLine(argumentPair.getArg1().getText()), multipleParagraphsToSingleLine(argumentPair.getArg2().getText())); pairsPerTopicCounter++; int sameInOnePair = 0; // get gold reason statistics for (AnnotatedArgumentPair.MTurkAssignment assignment : argumentPair.mTurkAssignments) { String label = assignment.getValue(); if (goldLabel.equals(label)) { sameInOnePair++; } } ds.addValue(sameInOnePair); totalPairsWithReasonSameAsGold += sameInOnePair; } totalGoldPairsCounter += pairsPerTopicCounter; statsPerTopic.addValue(pairsPerTopicCounter); pw.close(); } System.out.println("Total reasons matching gold " + totalPairsWithReasonSameAsGold); System.out.println(ds); System.out.println("Total gold pairs: " + totalGoldPairsCounter); System.out.println(statsPerTopic); int totalPairs = 0; for (Integer pairs : goldDataDistribution.values()) { totalPairs += pairs; } System.out.println("Total pairs: " + totalPairs); System.out.println(goldDataDistribution); }
From source file:com.act.biointerpretation.ProductExtractor.java
public static void main(String[] args) throws Exception { CLIUtil cliUtil = new CLIUtil(ProductExtractor.class, HELP_MESSAGE, OPTION_BUILDERS); CommandLine cl = cliUtil.parseCommandLine(args); String orgPrefix = cl.getOptionValue(OPTION_ORGANISM_PREFIX); LOGGER.info("Using organism prefix %s", orgPrefix); MongoDB db = new MongoDB(DEFAULT_DB_HOST, DEFAULT_DB_PORT, cl.getOptionValue(OPTION_DB_NAME)); Map<Long, String> validOrganisms = new TreeMap<>(); DBIterator orgIter = db.getDbIteratorOverOrgs(); Organism o = null;/*from ww w.j av a 2 s . c om*/ while ((o = db.getNextOrganism(orgIter)) != null) { if (!o.getName().isEmpty() && o.getName().startsWith(orgPrefix)) { validOrganisms.put(o.getUUID(), o.getName()); } } LOGGER.info("Found %d valid organisms", validOrganisms.size()); Set<Long> productIds = new TreeSet<>(); // Use something with implicit ordering we can traverse in order. DBIterator reactionIterator = db.getIteratorOverReactions(); Reaction r; while ((r = db.getNextReaction(reactionIterator)) != null) { Set<JSONObject> proteins = r.getProteinData(); boolean valid = false; for (JSONObject j : proteins) { if (j.has("organism") && validOrganisms.containsKey(j.getLong("organism"))) { valid = true; break; } else if (j.has("organisms")) { JSONArray organisms = j.getJSONArray("organisms"); for (int i = 0; i < organisms.length(); i++) { if (validOrganisms.containsKey(organisms.getLong(i))) { valid = true; break; } } } } if (valid) { for (Long id : r.getProducts()) { productIds.add(id); } for (Long id : r.getProductCofactors()) { productIds.add(id); } } } LOGGER.info("Found %d valid product ids for '%s'", productIds.size(), orgPrefix); PrintWriter writer = cl.hasOption(OPTION_OUTPUT_FILE) ? new PrintWriter(new FileWriter(cl.getOptionValue(OPTION_OUTPUT_FILE))) : new PrintWriter(System.out); for (Long id : productIds) { Chemical c = db.getChemicalFromChemicalUUID(id); String inchi = c.getInChI(); if (inchi.startsWith("InChI=") && !inchi.startsWith("InChI=/FAKE")) { writer.println(inchi); } } if (cl.hasOption(OPTION_OUTPUT_FILE)) { writer.close(); } LOGGER.info("Done."); }