List of usage examples for java.util HashMap HashMap
public HashMap()
From source file:org.ala.harvester.FlickrHarvester.java
/** * Main method for testing this particular Harvester * * @param args/*from w w w . ja v a2s . c o m*/ */ public static void main(String[] args) throws Exception { String[] locations = { "classpath*:spring.xml" }; ApplicationContext context = new ClassPathXmlApplicationContext(locations); FlickrHarvester h = new FlickrHarvester(); Repository r = (Repository) context.getBean("repository"); h.setDocumentMapper(new FlickrDocumentMapper()); h.setRepository(r); //set the connection params Map<String, String> connectParams = new HashMap<String, String>(); connectParams.put("endpoint", "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key=08f5318120189e9d12669465c0113351&page=1"); // connectParams.put("eolGroupId", "806927@N20"); connectParams.put("eolGroupId", "22545712@N05"); connectParams.put("flickrRestBaseUrl", "http://api.flickr.com/services/rest"); connectParams.put("flickrApiKey", "08f5318120189e9d12669465c0113351"); connectParams.put("recordsPerPage", "50"); h.setConnectionParams(connectParams); h.start(1106); //1013 is the ID for the data source flickr }
From source file:DIA_Umpire_Quant.DIA_Umpire_IntLibSearch.java
/** * @param args the command line arguments *//*from w ww. j av a 2s .co m*/ public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using internal library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be : java -jar -Xmx10G DIA_Umpire_IntLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_intlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String InternalLibID = ""; float ProbThreshold = 0.99f; float RTWindow_Int = -1f; float Freq = 0f; int TopNFrag = 6; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "InternalLibID": { InternalLibID = value; break; } case "RTWindow_Int": { RTWindow_Int = Float.parseFloat(value); break; } case "ProbThreshold": { ProbThreshold = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library"> Logger.getRootLogger().info( "================================================================================================="); if (FileList.size() > 1) { Logger.getRootLogger().info("Targeted re-extraction using internal library"); FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, InternalLibID); if (libManager == null) { Logger.getRootLogger().info("Building internal spectral library"); libManager = new FragmentLibManager(InternalLibID); ArrayList<LCMSID> LCMSIDList = new ArrayList<>(); for (DIAPack dia : FileList) { LCMSIDList.add(dia.IDsummary); } libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag); libManager.WriteFragmentLibSerialization(WorkFolder); } libManager.ReduceMemoryUsage(); Logger.getRootLogger() .info("Building retention time prediction model and generate candidate peptide list"); for (int i = 0; i < FileList.size(); i++) { FileList.get(i).IDsummary.ClearMappedPep(); } for (int i = 0; i < FileList.size(); i++) { for (int j = i + 1; j < FileList.size(); j++) { RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder, FileList.get(i).GetParameter(), FileList.get(i).IDsummary, FileList.get(j).IDsummary); alignment.GenerateModel(); alignment.GenerateMappedPepIon(); } FileList.get(i).ExportID(); FileList.get(i).IDsummary = null; } Logger.getRootLogger().info("Targeted matching........"); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) { diafile.UseMappedIon = true; diafile.FilterMappedIonByProb = false; diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); diafile.GenerateMassCalibrationRTMap(); diafile.TargetedExtractionQuant(false, libManager, ProbThreshold, RTWindow_Int); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold); diafile.ExportID(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); diafile.ClearStructure(); } diafile.IDsummary = null; System.gc(); } Logger.getRootLogger().info( "================================================================================================="); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:io.kahu.hawaii.util.call.example.Example2.java
public static final void main(String[] args) throws Exception { DOMConfigurator.configure(Example2.class.getResource("/log4j.xml").getFile()); RestServer server = null;/*from w ww .j a v a 2 s . co m*/ ExecutorRepository executorRepository = null; try { /* * Create our rest server with a 'ClientResource'. */ server = new RestServer(SERVER_PORT); server.addResource(ClientResource.class); server.start(); /* * START of generic setup */ // Create a log manager (purpose and explanation out of scope for this example). LogManager logManager = new DefaultLogManager(new LogManagerConfiguration(new LoggingConfiguration())); // Create an executor, which holds a queue with core size 1, max size 2, a queue of size 2. Threads 'outside the core pool' that are still active after one minute will get cleaned up. HawaiiExecutorImpl executor = new HawaiiExecutorImpl(ExecutorRepository.DEFAULT_EXECUTOR_NAME, 1, 2, 2, new TimeOut(1, TimeUnit.MINUTES), logManager); // Create an executor, which holds a queue with core size 1, max size 2, a queue of size 2. Threads 'outside the core pool' that are still active after one minute will get cleaned up. HawaiiExecutorImpl executor2 = new HawaiiExecutorImpl("crm", 1, 2, 2, new TimeOut(1, TimeUnit.MINUTES), logManager); // Create the repository that holds all executors executorRepository = new ExecutorRepository(logManager); executorRepository.add(executor); executorRepository.add(executor2); Map<String, String> defaultExecutors = new HashMap<>(); defaultExecutors.put("crm", "crm"); executorRepository.setDefaultExecutors(defaultExecutors); // Create a new request dispatcher. RequestDispatcher requestDispatcher = new RequestDispatcher(executorRepository, logManager); /* * END of generic setup */ /* * Setup the request (builder). */ HttpRequestContext<Person> context = new HttpRequestContext<>(HttpMethod.GET, "http://localhost:" + SERVER_PORT, "/client/{client-id}", "crm", "get_client_by_id", new TimeOut(10, TimeUnit.SECONDS)); CallLogger callLogger = new CallLoggerImpl<>(logManager, new HttpRequestLogger(), new JsonPayloadResponseLogger<Person>()); RequestPrototype<HttpResponse, Person> prototype = new RequestPrototype(requestDispatcher, context, new GetCustomerByIdResponseHandler(), callLogger); HttpRequestBuilder<Person> getPersonByIdRequest = new HttpRequestBuilder<>(prototype); /* * Use the request (builder). */ Request<Person> request = getPersonByIdRequest.newInstance().withPathVariables("10").build(); Person person = request.execute().get(); System.err.println("CLIENT - Got client '" + person.getName() + "' with id '" + person.getId() + "'."); } finally { server.stop(); if (executorRepository != null) { executorRepository.stop(); } } }
From source file:gov.nih.nci.ncicb.tcga.dcc.dam.util.TempClinicalDataLoader.java
public static void main(String[] args) { // first get the db connection properties String url = urlSet.get(args[1]); String user = args[2];/*from ww w .j a v a 2s . co m*/ String word = args[3]; // make sure we have the Oracle driver somewhere try { Class.forName("oracle.jdbc.OracleDriver"); Class.forName("org.postgresql.Driver"); } catch (Exception x) { System.out.println("Unable to load the driver class!"); System.exit(0); } // connect to the database try { dbConnection = DriverManager.getConnection(url, user, word); ClinicalBean.setDBConnection(dbConnection); } catch (SQLException x) { x.printStackTrace(); System.exit(1); } final String xmlList = args[0]; BufferedReader br = null; try { final Map<String, String> clinicalFiles = new HashMap<String, String>(); final Map<String, String> biospecimenFiles = new HashMap<String, String>(); final Map<String, String> fullFiles = new HashMap<String, String>(); //noinspection IOResourceOpenedButNotSafelyClosed br = new BufferedReader(new FileReader(xmlList)); // read the file list to get all the files to load while (br.ready()) { final String[] in = br.readLine().split("\\t"); String xmlfile = in[0]; String archive = in[1]; if (xmlfile.contains("_clinical")) { clinicalFiles.put(xmlfile, archive); } else if (xmlfile.contains("_biospecimen")) { biospecimenFiles.put(xmlfile, archive); } else { fullFiles.put(xmlfile, archive); } } Date dateAdded = Calendar.getInstance().getTime(); // NOTE!!! This deletes all data before the load starts, assuming we are re-loading everything. // a better way would be to figure out what has changed and load that, or to be able to load multiple versions of the data in the schema emptyClinicalTables(user); // load any "full" files first -- in case some archives aren't split yet for (final String file : fullFiles.keySet()) { String archive = fullFiles.get(file); System.out.println("Full file " + file + " in " + archive); // need to re-instantiate the disease-specific beans for each file createDiseaseSpecificBeans(xmlList); String disease = getDiseaseName(archive); processFullXmlFile(file, archive, disease, dateAdded); // memory leak or something... have to commit and close all connections and re-get connection // after each file to keep from using too much heap space. this troubles me, but I have never had // the time to figure out why it happens resetConnections(url, user, word); } // now process all clinical files, and insert patients and clinical data for (final String clinicalFile : clinicalFiles.keySet()) { createDiseaseSpecificBeans(xmlList); String archive = clinicalFiles.get(clinicalFile); System.out.println("Clinical file " + clinicalFile + " in " + archive); String disease = getDiseaseName(archive); processClinicalXmlFile(clinicalFile, archive, disease, dateAdded); resetConnections(url, user, word); } // now process biospecimen files for (final String biospecimenFile : biospecimenFiles.keySet()) { createDiseaseSpecificBeans(xmlList); String archive = biospecimenFiles.get(biospecimenFile); String disease = getDiseaseName(archive); System.out.println("Biospecimen file " + biospecimenFile); processBiospecimenXmlFile(biospecimenFile, archive, disease, dateAdded); resetConnections(url, user, word); } // this sets relationships between these clinical tables and data browser tables, since we delete // and reload every time setForeignKeys(); dbConnection.commit(); dbConnection.close(); } catch (Exception e) { e.printStackTrace(); System.exit(-1); } finally { IOUtils.closeQuietly(br); } }
From source file:com.blackboard.WebdavBulkDeleterClient.java
public static void main(String[] args) { if (System.getProperty("log4j.configuration") != null) { PropertyConfigurator.configure(System.getProperty("log4j.configuration")); } else {/* w ww. j av a 2 s .c om*/ BasicConfigurator.configure(); } // Perform command line parsing in an as friendly was as possible. // Could be improved CommandLineParser parser = new PosixParser(); Options options = new Options(); // Use a map to store our options and loop over it to check and parse options via // addAllOptions() and verifyOptions() below Map<String, String> optionsAvailable = new HashMap<String, String>(); optionsAvailable.put("deletion-list", "The file containing the list of courses to delete"); optionsAvailable.put("user", "User with deletion privileges, usually bbsupport"); optionsAvailable.put("password", "Password - ensure you escape any shell characters"); optionsAvailable.put("url", "The Learn URL - usually https://example.com/bbcswebdav/courses"); options = addAllOptions(options, optionsAvailable); options.addOption(OptionBuilder.withLongOpt("no-verify-ssl").withDescription("Don't verify SSL") .hasArg(false).create()); CommandLine line = null; try { line = parser.parse(options, args); verifyOptions(line, optionsAvailable); } catch (ParseException e) { // Detailed reason will be printed by verifyOptions above logger.fatal("Incorrect options specified, exiting..."); System.exit(1); } Scanner scanner = null; try { scanner = new Scanner(new File(line.getOptionValue("deletion-list"))); } catch (FileNotFoundException e) { logger.fatal("Cannot open file : " + e.getLocalizedMessage()); System.exit(1); } // By default we verify SSL certs boolean verifyCertStatus = true; if (line.hasOption("no-verify-ssl")) { verifyCertStatus = false; } // Loop through deletion list and delete courses if they exist. LearnServer instance; try { logger.debug("Attempting to open connection"); instance = new LearnServer(line.getOptionValue("user"), line.getOptionValue("password"), line.getOptionValue("url"), verifyCertStatus); String currentCourse = null; logger.debug("Connection open"); while (scanner.hasNextLine()) { currentCourse = scanner.nextLine(); if (instance.exists(currentCourse)) { try { instance.deleteCourse(currentCourse); logger.info("Processing " + currentCourse + " : Result - Deletion Successful"); } catch (IOException ioe) { logger.error("Processing " + currentCourse + " : Result - Could not Delete (" + ioe.getLocalizedMessage() + ")"); } } else { logger.info("Processing " + currentCourse + " : Result - Course does not exist"); } } } catch (IllegalArgumentException e) { logger.fatal(e.getLocalizedMessage()); System.exit(1); } catch (IOException ioe) { logger.debug(ioe); logger.fatal(ioe.getMessage()); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step4MTurkOutputCollector.java
@SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { String inputDirWithArgumentPairs = args[0]; File[] resultFiles;/* w ww. j av a2 s.co m*/ if (args[1].contains("*")) { File path = new File(args[1]); File directory = path.getParentFile(); String regex = path.getName().replaceAll("\\*", ""); List<File> files = new ArrayList<>(FileUtils.listFiles(directory, new String[] { regex }, false)); resultFiles = new File[files.size()]; for (int i = 0; i < files.size(); i++) { resultFiles[i] = files.get(i); } } else { // result file is a comma-separated list of CSV files from MTurk String[] split = args[1].split(","); resultFiles = new File[split.length]; for (int i = 0; i < split.length; i++) { resultFiles[i] = new File(split[i]); } } File outputDir = new File(args[2]); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { throw new IOException("Cannot create directory " + outputDir); } } // error if output folder not empty to prevent any confusion by mixing files if (!FileUtils.listFiles(outputDir, null, false).isEmpty()) { throw new IllegalArgumentException("Output dir " + outputDir + " is not empty"); } // collected assignments with empty reason for rejections Set<String> assignmentsWithEmptyReason = new HashSet<>(); // parse with first line as header MTurkOutputReader mTurkOutputReader = new MTurkOutputReader(resultFiles); Collection<File> files = FileUtils.listFiles(new File(inputDirWithArgumentPairs), new String[] { "xml" }, false); if (files.isEmpty()) { throw new IOException("No xml files found in " + inputDirWithArgumentPairs); } // statistics: how many hits with how many assignments ; hit ID / assignments Map<String, Map<String, Integer>> assignmentsPerHits = new HashMap<>(); // collect accept/reject statistics for (Map<String, String> record : mTurkOutputReader) { boolean wasRejected = "Rejected".equals(record.get("assignmentstatus")); String hitID = record.get("hitid"); String hitTypeId = record.get("hittypeid"); if (!wasRejected) { // update statistics if (!assignmentsPerHits.containsKey(hitTypeId)) { assignmentsPerHits.put(hitTypeId, new HashMap<String, Integer>()); } if (!assignmentsPerHits.get(hitTypeId).containsKey(hitID)) { assignmentsPerHits.get(hitTypeId).put(hitID, 0); } assignmentsPerHits.get(hitTypeId).put(hitID, assignmentsPerHits.get(hitTypeId).get(hitID) + 1); } } // statistics: how many hits with how many assignments ; hit ID / assignments Map<String, Integer> approvedAssignmentsPerHit = new HashMap<>(); Map<String, Integer> rejectedAssignmentsPerHit = new HashMap<>(); // collect accept/reject statistics for (Map<String, String> record : mTurkOutputReader) { boolean approved = "Approved".equals(record.get("assignmentstatus")); boolean rejected = "Rejected".equals(record.get("assignmentstatus")); String hitID = record.get("hitid"); if (approved) { // update statistics if (!approvedAssignmentsPerHit.containsKey(hitID)) { approvedAssignmentsPerHit.put(hitID, 0); } approvedAssignmentsPerHit.put(hitID, approvedAssignmentsPerHit.get(hitID) + 1); } else if (rejected) { // update statistics if (!rejectedAssignmentsPerHit.containsKey(hitID)) { rejectedAssignmentsPerHit.put(hitID, 0); } rejectedAssignmentsPerHit.put(hitID, rejectedAssignmentsPerHit.get(hitID) + 1); } else { throw new IllegalStateException( "Unknown state: " + record.get("assignmentstatus") + " HITID: " + hitID); } } // System.out.println("Approved: " + approvedAssignmentsPerHit); // System.out.println("Rejected: " + rejectedAssignmentsPerHit); System.out.println("Approved (values): " + new HashSet<>(approvedAssignmentsPerHit.values())); System.out.println("Rejected (values): " + new HashSet<>(rejectedAssignmentsPerHit.values())); // rejection statistics int totalRejected = 0; for (Map.Entry<String, Integer> rejectionEntry : rejectedAssignmentsPerHit.entrySet()) { totalRejected += rejectionEntry.getValue(); } System.out.println("Total rejections: " + totalRejected); /* // generate .success files for adding more annotations for (File resultFile : resultFiles) { String hitTypeID = mTurkOutputReader.getHitTypeIdForFile().get(resultFile); // assignments for that hittypeid (= file) Map<String, Integer> assignments = assignmentsPerHits.get(hitTypeID); prepareUpdateHITsFiles(assignments, hitTypeID, resultFile); } */ int totalSavedPairs = 0; // load all previously prepared argument pairs for (File file : files) { List<ArgumentPair> argumentPairs = (List<ArgumentPair>) XStreamTools.getXStream().fromXML(file); List<AnnotatedArgumentPair> annotatedArgumentPairs = new ArrayList<>(); for (ArgumentPair argumentPair : argumentPairs) { AnnotatedArgumentPair annotatedArgumentPair = new AnnotatedArgumentPair(argumentPair); // is there such an answer? String key = "Answer." + argumentPair.getId(); // iterate only if there is such column to save time if (mTurkOutputReader.getColumnNames().contains(key)) { // now find the results for (Map<String, String> record : mTurkOutputReader) { if (record.containsKey(key)) { // extract the values AnnotatedArgumentPair.MTurkAssignment assignment = new AnnotatedArgumentPair.MTurkAssignment(); boolean wasRejected = "Rejected".equals(record.get("assignmentstatus")); // only non-rejected (if required) if (!wasRejected) { String hitID = record.get("hitid"); String workerID = record.get("workerid"); String assignmentId = record.get("assignmentid"); try { assignment.setAssignmentAcceptTime( DATE_FORMAT.parse(record.get("assignmentaccepttime"))); assignment.setAssignmentSubmitTime( DATE_FORMAT.parse(record.get("assignmentsubmittime"))); assignment.setHitComment(record.get("Answer.feedback")); assignment.setHitID(hitID); assignment.setTurkID(workerID); assignment.setAssignmentId(assignmentId); // and answer specific fields String valueRaw = record.get(key); // so far the label has had format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal // strip now only true label String label = valueRaw.split("_")[2]; assignment.setValue(label); String reason = record.get(key + "_reason"); // missing reason if (reason == null) { assignmentsWithEmptyReason.add(assignmentId); } else { assignment.setReason(reason); // get worker's stance String stanceRaw = record.get(key + "_stance"); if (stanceRaw != null) { // parse stance String stance = stanceRaw.split("_stance_")[1]; assignment.setWorkerStance(stance); } // we take maximal 5 assignments Collections.sort(annotatedArgumentPair.mTurkAssignments, new Comparator<AnnotatedArgumentPair.MTurkAssignment>() { @Override public int compare(AnnotatedArgumentPair.MTurkAssignment o1, AnnotatedArgumentPair.MTurkAssignment o2) { return o1.getAssignmentAcceptTime() .compareTo(o2.getAssignmentAcceptTime()); } }); if (annotatedArgumentPair.mTurkAssignments .size() < MAXIMUM_ASSIGNMENTS_PER_HIT) { annotatedArgumentPair.mTurkAssignments.add(assignment); } } } catch (IllegalArgumentException | NullPointerException ex) { System.err.println("Malformed annotations for HIT " + hitID + ", worker " + workerID + ", assignment " + assignmentId + "; " + ex.getMessage() + ", full record: " + record); } } } } } // and if there are some annotations, add it to the result set if (!annotatedArgumentPair.mTurkAssignments.isEmpty()) { annotatedArgumentPairs.add(annotatedArgumentPair); } } if (!annotatedArgumentPairs.isEmpty()) { File outputFile = new File(outputDir, file.getName()); XStreamTools.toXML(annotatedArgumentPairs, outputFile); System.out.println("Saved " + annotatedArgumentPairs.size() + " annotated pairs to " + outputFile); totalSavedPairs += annotatedArgumentPairs.size(); } } System.out.println("Total saved " + totalSavedPairs + " pairs"); // print assignments with empty reasons if (!assignmentsWithEmptyReason.isEmpty()) { System.out.println( "== Assignments with empty reason:\nassignmentIdToReject\tassignmentIdToRejectComment"); for (String assignmentId : assignmentsWithEmptyReason) { System.out.println( assignmentId + "\t\"Dear worker, you did not fill the required field with a reason.\""); } } }
From source file:org.ala.harvester.WaissHarvester.java
/** * Main method for testing this particular Harvester * * @param args/* w w w . ja v a 2 s. c o m*/ */ public static void main(String[] args) throws Exception { String[] locations = { "classpath*:spring.xml" }; ApplicationContext context = new ClassPathXmlApplicationContext(locations); WaissHarvester h = new WaissHarvester(); Repository r = (Repository) context.getBean("repository"); h.setRepository(r); //set the connection params Map<String, String> connectParams = new HashMap<String, String>(); connectParams.put("endpoint", "http://www.museum.wa.gov.au/waiss/pages/image.htm"); h.setConnectionParams(connectParams); h.start(WAISS_INFOSOURCE_ID); }
From source file:com.da.daum.DaumCafeParser.java
public static void main(String[] args) throws IOException { DaumCafeParser parser = new DaumCafeParser(); String listBody = "(?)^*~.txt"; listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", ""); System.out.println(listBody); // FileUtils.writeStringToFile(new File(file), listBody, "utf-8"); //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt"); File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt"); listBody = FileUtils.readFileToString(file, "utf-8"); Map pageMap = new HashMap(); parser.setDaumListVoList(listBody, pageMap); //parser.setDaumView(listBody); }
From source file:com.example.geomesa.kafka.KafkaListener.java
public static void main(String[] args) throws Exception { // read command line args for a connection to Kafka CommandLineParser parser = new BasicParser(); Options options = getCommonRequiredOptions(); CommandLine cmd = parser.parse(options, args); // create the consumer KafkaDataStore object Map<String, String> dsConf = getKafkaDataStoreConf(cmd); DataStore consumerDS = DataStoreFinder.getDataStore(dsConf); // verify that we got back our KafkaDataStore object properly if (consumerDS == null) { throw new Exception("Null consumer KafkaDataStore"); }//from ww w . j a v a2 s . c o m Map<String, FeatureListener> listeners = new HashMap<>(); try { for (String typeName : consumerDS.getTypeNames()) { System.out.println("Registering a feature listener for type " + typeName + "."); FeatureListener listener = new FeatureListener() { @Override public void changed(FeatureEvent featureEvent) { System.out.println("Received FeatureEvent from layer " + typeName + " of Type: " + featureEvent.getType()); if (featureEvent.getType() == FeatureEvent.Type.CHANGED && featureEvent instanceof KafkaFeatureChanged) { printFeature(((KafkaFeatureChanged) featureEvent).feature()); } else if (featureEvent.getType() == FeatureEvent.Type.REMOVED) { System.out.println("Received Delete for filter: " + featureEvent.getFilter()); } } }; consumerDS.getFeatureSource(typeName).addFeatureListener(listener); listeners.put(typeName, listener); } while (true) { // Wait for user to terminate with ctrl-C. } } finally { for (Entry<String, FeatureListener> entry : listeners.entrySet()) { consumerDS.getFeatureSource(entry.getKey()).removeFeatureListener(entry.getValue()); } consumerDS.dispose(); } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java
public static void main(String[] args) throws Exception { // input dir - list of xml query containers // step5-linguistic-annotation/ System.err.println("Starting step 6 HIT Preparation"); File inputDir = new File(args[0]); // output dir File outputDir = new File(args[1]); if (outputDir.exists()) { outputDir.delete();//from w ww.ja va 2 s . c o m } outputDir.mkdir(); List<String> queries = new ArrayList<>(); // iterate over query containers int countClueWeb = 0; int countSentence = 0; for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); if (queries.contains(f.getName()) || queries.size() == 0) { // groups contain only non-empty documents Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>(); // split to groups according to number of sentences for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) { if (rankedResult.originalXmi != null) { byte[] bytes = new BASE64Decoder() .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes())); JCas jCas = JCasFactory.createJCas(); XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas()); Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class); int groupId = sentences.size() / 40; if (rankedResult.originalXmi == null) { System.err.println("Empty document: " + rankedResult.clueWebID); } else { if (!groups.containsKey(groupId)) { groups.put(groupId, new ArrayList<>()); } } //handle it groups.get(groupId).add(rankedResult); countClueWeb++; } } for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) { Integer groupId = entry.getKey(); List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue(); // make sure the results are sorted // DEBUG // for (QueryResultContainer.SingleRankedResult r : rankedResults) { // System.out.print(r.rank + "\t"); // } Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank)); // iterate over results for one query and group for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) { QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i); QueryResultContainer.SingleRankedResult r = rankedResults.get(i); int rank = r.rank; MustacheFactory mf = new DefaultMustacheFactory(); Mustache mustache = mf.compile("template/template.html"); String queryId = queryResultContainer.qID; String query = queryResultContainer.query; // make the first letter uppercase query = query.substring(0, 1).toUpperCase() + query.substring(1); List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples; List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples; byte[] bytes = new BASE64Decoder() .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes())); JCas jCas = JCasFactory.createJCas(); XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas()); List<generators.Sentence> sentences = new ArrayList<>(); List<Integer> paragraphs = new ArrayList<>(); paragraphs.add(0); for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) { for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) { String sentenceBegin = String.valueOf(s.getBegin()); generators.Sentence sentence = new generators.Sentence(s.getCoveredText(), sentenceBegin); sentences.add(sentence); countSentence++; } int SentenceID = paragraphs.get(paragraphs.size() - 1); if (sentences.size() > 120) while (SentenceID < sentences.size()) { if (!paragraphs.contains(SentenceID)) paragraphs.add(SentenceID); SentenceID = SentenceID + 120; } paragraphs.add(sentences.size()); } System.err.println("Output dir: " + outputDir); int startID = 0; int endID; for (int j = 0; j < paragraphs.size(); j++) { endID = paragraphs.get(j); int sentLength = endID - startID; if (sentLength > 120 || j == paragraphs.size() - 1) { if (sentLength > 120) { endID = paragraphs.get(j - 1); j--; } sentLength = endID - startID; if (sentLength <= 40) groupId = 40; else if (sentLength <= 80 && sentLength > 40) groupId = 80; else if (sentLength > 80) groupId = 120; File folder = new File(outputDir + "/" + groupId); if (!folder.exists()) { System.err.println("creating directory: " + outputDir + "/" + groupId); boolean result = false; try { folder.mkdir(); result = true; } catch (SecurityException se) { //handle it } if (result) { System.out.println("DIR created"); } } String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_" + rankedResult.clueWebID + "_" + sentLength + ".html"; System.err.println("Printing a file: " + newHtmlFile); File newHTML = new File(newHtmlFile); int t = 0; while (newHTML.exists()) { newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_" + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html"); t++; } mustache.execute(new PrintWriter(new FileWriter(newHTML)), new generators(query, relevantInformationExamples, irrelevantInformationExamples, sentences.subList(startID, endID), queryId, rank)) .flush(); startID = endID; } } } } } } System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences"); }