List of usage examples for java.lang IllegalArgumentException IllegalArgumentException
public IllegalArgumentException(Throwable cause)
From source file:com.tamingtext.tagging.LuceneCategoryExtractor.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(false) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription(/*w w w . j a v a 2 s. c o m*/ "The maximum number of documents to analyze. If not specified, then it will loop over all docs") .withShortName("m").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The field in the index").withShortName("f").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt) .withOption(fieldOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } File inputDir = new File(cmdLine.getValue(inputOpt).toString()); if (!inputDir.isDirectory()) { throw new IllegalArgumentException(inputDir + " does not exist or is not a directory"); } long maxDocs = Long.MAX_VALUE; if (cmdLine.hasOption(maxOpt)) { maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString()); } if (maxDocs < 0) { throw new IllegalArgumentException("maxDocs must be >= 0"); } String field = cmdLine.getValue(fieldOpt).toString(); PrintWriter out = null; if (cmdLine.hasOption(outputOpt)) { out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString())); } else { out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")); } dumpDocumentFields(inputDir, field, maxDocs, out); IOUtils.close(Collections.singleton(out)); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:Supplier.java
public static void main(String[] args) { String url = "tcp://localhost:61616"; String user = null;// w w w . ja v a 2 s. c o m String password = null; String item = "HardDrive"; if (args.length >= 1) { item = args[0]; } String queue; if ("HardDrive".equals(item)) { queue = "StorageOrderQueue"; } else if ("Monitor".equals(item)) { queue = "MonitorOrderQueue"; } else { throw new IllegalArgumentException("Item must be either HardDrive or Monitor"); } if (args.length >= 2) { url = args[1]; } if (args.length >= 3) { user = args[2]; } if (args.length >= 4) { password = args[3]; } Supplier s = new Supplier(item, queue, url, user, password); new Thread(s, "Supplier " + item).start(); }
From source file:de.uni_potsdam.hpi.bpt.promnicat.importer.ModelImporter.java
/** * Usage: <code>'Path to config' Collection PATH [PATH2 [PATH3] ...]]</code> * </br></br>//from w ww .j a v a 2 s . c o m * <code>Path to config</code> is a path in file system to the configuration file being used * for import. It can be given relative to the PromniCAT folder.</br> * If an empty string is provided, the default file 'PromniCAT/configuration.properties' is used.</br></br> * <code>Collection</code> is the process model collection and can be one of * 'BPMN', 'NPB', 'SAP_RM' or 'AOK' </br></br> * <code>PATH</code> is a path in file system to a directory containing the models that should be imported. */ public static void main(String[] args) { // wrong number of parameter? if (args.length < 3) { printHelpMessage(); return; } try { //read configuration file IPersistenceApi persistenceApi = new ConfigurationParser(args[0]) .getDbInstance(Constants.DATABASE_TYPES.ORIENT_DB); //import models // BPMAI model? if (args[1].toUpperCase().equals(Constants.ORIGIN_BPMAI)) { startImport(new BpmaiImporter(persistenceApi), args); return; } // NPB model? if (args[1].toUpperCase().equals(Constants.ORIGIN_NPB)) { startImport(new NPBImporter(persistenceApi), args); return; } // SAP_RM model? if (args[1].toUpperCase().equals(Constants.ORIGIN_SAP_RM)) { startImport(new SapReferenceModelImporter(persistenceApi), args); return; } // AOK model? if (args[1].toUpperCase().equals(Constants.ORIGIN_AOK)) { startImport(new AokModelImporter(persistenceApi), args); return; } // wrong argument value printHelpMessage(); throw new IllegalArgumentException(WRONG_USAGE_MESSAGE); } catch (Exception e) { logger.severe(e.getMessage()); String stackTraceString = ""; for (StackTraceElement ste : e.getStackTrace()) { stackTraceString = stackTraceString.concat(ste.toString() + "\n"); } logger.severe(stackTraceString); } }
From source file:org.craftercms.search.util.SearchIndexUpdater.java
public static void main(String... args) throws IOException { OptionParser parser = new OptionParser(); parser.accepts("h", "prints the help"); parser.accepts("s", "the Crafter site name").withRequiredArg(); parser.accepts("u", "the Crafter Search server URL").withRequiredArg(); parser.accepts("p", "the Crafter site path").withRequiredArg(); OptionSet options = parser.parse(args); if (options.has("h")) { parser.printHelpOn(System.out); } else {/* www.j a v a2s .co m*/ checkForRequiredOption(options, "s"); checkForRequiredOption(options, "u"); checkForRequiredOption(options, "p"); RestClientSearchService searchService = new RestClientSearchService(); searchService.setServerUrl(options.valueOf("u").toString()); String sitePath = options.valueOf("p").toString(); File siteRoot = new File(sitePath); if (!siteRoot.exists()) { throw new IllegalArgumentException( "The specified document's path [" + sitePath + "] doesn't exist"); } if (!siteRoot.isDirectory()) { throw new IllegalArgumentException( "The specified document's path [" + sitePath + "] is not a " + "directory"); } SearchIndexUpdater searchIndexUpdater = new SearchIndexUpdater(); searchIndexUpdater.setSiteName(options.valueOf("s").toString()); searchIndexUpdater.setSiteRoot(siteRoot); searchIndexUpdater.setSearchService(searchService); searchIndexUpdater.updateIndex(); } }
From source file:com.ibm.watson.app.common.tools.services.classifier.TrainClassifier.java
public static void main(String[] args) throws Exception { Option urlOption = createOption(URL_OPTION, URL_OPTION_LONG, true, "The absolute URL of the NL classifier service to connect to. If omitted, the default will be used (" + DEFAULT_URL + ")", false, "url"); Option usernameOption = createOption(USERNAME_OPTION, USERNAME_OPTION_LONG, true, "The username to use during authentication to the NL classifier service", true, "username"); Option passwordOption = createOption(PASSWORD_OPTION, PASSWORD_OPTION_LONG, true, "The password to use during authentication to the NL classifier service", true, "password"); Option fileOption = createOption(FILE_OPTION, FILE_OPTION_LONG, true, "The filepath to be used as training data", false, "file"); Option deleteOption = createOption(DELETE_OPTION, DELETE_OPTION_LONG, false, "If specified, the classifier instance will be deleted if training is not successful"); final Options options = buildOptions(urlOption, usernameOption, passwordOption, fileOption, deleteOption); CommandLine cmd;/*w w w . j av a 2 s. c o m*/ try { CommandLineParser parser = new GnuParser(); cmd = parser.parse(options, args); } catch (ParseException e) { System.err.println(MessageKey.AQWEGA14016E_could_not_parse_cmd_line_args_1.getMessage(e.getMessage()) .getFormattedMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(120, "java " + TrainClassifier.class.getName(), null, options, null); return; } final String url = cmd.hasOption(URL_OPTION) ? cmd.getOptionValue(URL_OPTION) : DEFAULT_URL; final String username = cmd.getOptionValue(USERNAME_OPTION).trim(); final String password = cmd.getOptionValue(PASSWORD_OPTION).trim(); if (username.isEmpty() || password.isEmpty()) { throw new IllegalArgumentException( MessageKey.AQWEGA14014E_username_and_password_cannot_empty.getMessage().getFormattedMessage()); } final NLClassifierRestClient client = new NLClassifierRestClient(url, username, password); listClassifiers(client); System.out.println(MessageKey.AQWEGA10012I_h_help.getMessage().getFormattedMessage()); String userInput; boolean exit = false; NLClassifier classifier = null; boolean isTraining = false; if (cmd.hasOption(FILE_OPTION)) { // File option was specified, go directly to training classifier = train(client, cmd.getOptionValue(FILE_OPTION)); isTraining = true; } try (final Scanner scanner = new Scanner(System.in)) { while (!exit) { System.out.print("> "); userInput = scanner.nextLine().trim(); if (userInput.equals("q") || userInput.equals("quit") || userInput.equals("exit")) { exit = true; } else if (userInput.equals("h") || userInput.equals("help")) { printHelp(); } else if (userInput.equals("l") || userInput.equals("list")) { listClassifiers(client); } else if (userInput.equals("t") || userInput.equals("train")) { if (!isTraining) { System.out.print("Enter filename: "); String filename = scanner.nextLine().trim(); classifier = train(client, filename); isTraining = true; } else { System.err.println(MessageKey.AQWEGA10013I_t_cannot_used_during_training.getMessage() .getFormattedMessage()); } } else if (userInput.equals("s") || userInput.equals("status")) { if (isTraining) { exit = getStatus(client, classifier, cmd.hasOption(DELETE_OPTION)); } else { System.err.println(MessageKey.AQWEGA10014I_s_can_used_during_training.getMessage() .getFormattedMessage()); } } else if (userInput.equals("c") || userInput.equals("classify")) { if (classifier != null && classifier.getStatus().equals(Status.AVAILABLE)) { isTraining = false; System.out.print(MessageKey.AQWEGA10015I_text_classify.getMessage().getFormattedMessage()); String text = scanner.nextLine().trim(); classify(client, classifier, text); } else { System.err.println(MessageKey.AQWEGA10016I_c_can_used_after_training_has_completed .getMessage().getFormattedMessage()); } } else { System.err.println( MessageKey.AQWEGA14017E_unknown_command_1.getMessage(userInput).getFormattedMessage()); } Thread.sleep(100); // Give the out / err consoles time to battle it out before printing the command prompt } } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step7CollectMTurkResults.java
public static void main(String[] args) throws Exception { // input dir - list of xml query containers // /home/user-ukp/research/data/dip/wp1-documents/step4-boiler-plate/ File inputDir = new File(args[0] + "/"); // MTurk result file // output dir File outputDir = new File(args[2]); if (!outputDir.exists()) { outputDir.mkdirs();//from ww w . ja va2 s.c o m } // Folder with success files File mturkSuccessDir = new File(args[1]); Collection<File> files = FileUtils.listFiles(mturkSuccessDir, new String[] { "result" }, false); if (files.isEmpty()) { throw new IllegalArgumentException("Input folder is empty. " + mturkSuccessDir); } HashMap<String, List<MTurkAnnotation>> mturkAnnotations = new HashMap<>(); // parsing all CSV files for (File mturkCSVResultFile : files) { System.out.println("Parsing " + mturkCSVResultFile.getName()); MTurkOutputReader outputReader = new MTurkOutputReader( new HashSet<>(Arrays.asList("annotation", "workerid")), mturkCSVResultFile); // for fixing broken data input: for each hit, collect all sentence IDs Map<String, SortedSet<String>> hitSentences = new HashMap<>(); // first iteration: collect the sentences for (Map<String, String> record : outputReader) { String hitID = record.get("hitid"); if (!hitSentences.containsKey(hitID)) { hitSentences.put(hitID, new TreeSet<>()); } String relevantSentences = record.get("Answer.relevant_sentences"); String irrelevantSentences = record.get("Answer.irrelevant_sentences"); if (relevantSentences != null) { hitSentences.get(hitID).addAll(Arrays.asList(relevantSentences.split(","))); } if (irrelevantSentences != null) { hitSentences.get(hitID).addAll(Arrays.asList(irrelevantSentences.split(","))); } } // and now second iteration for (Map<String, String> record : outputReader) { String hitID = record.get("hitid"); String annotatorID = record.get("workerid"); String acceptTime = record.get("assignmentaccepttime"); String submitTime = record.get("assignmentsubmittime"); String relevantSentences = record.get("Answer.relevant_sentences"); String irrelevantSentences = record.get("Answer.irrelevant_sentences"); String reject = record.get("reject"); String filename[]; String comment; String clueWeb; String[] relevant = {}; String[] irrelevant = {}; filename = record.get("annotation").split("_"); String fileXml = filename[0]; clueWeb = filename[1].trim(); comment = record.get("Answer.comment"); if (relevantSentences != null) { relevant = relevantSentences.split(","); } if (irrelevantSentences != null) { irrelevant = irrelevantSentences.split(","); } // sanitizing data: if both relevant and irrelevant are empty, that's a bug // we're gonna look up all sentences from this HIT and treat this assignment // as if there were only irrelevant ones if (relevant.length == 0 && irrelevant.length == 0) { SortedSet<String> strings = hitSentences.get(hitID); irrelevant = new String[strings.size()]; strings.toArray(irrelevant); } if (reject != null) { System.out.println(" HIT " + hitID + " annotated by " + annotatorID + " was rejected "); } else { /* // relevant sentences is a comma-delimited string, // this regular expression is rather strange // it must contain digits, it might be that there is only one space or a comma or some other char // digits are the sentence ids. if relevant sentences do not contain digits then it is wrong if (relevantSentences.matches("^\\D*$") && irrelevantSentences.matches("^\\D*$")) { try { throw new IllegalStateException( "No annotations found for HIT " + hitID + " in " + fileXml + " for document " + clueWeb); } catch (IllegalStateException ex) { ex.printStackTrace(); } } */ MTurkAnnotation mturkAnnotation; try { mturkAnnotation = new MTurkAnnotation(hitID, annotatorID, acceptTime, submitTime, comment, clueWeb, relevant, irrelevant); } catch (IllegalArgumentException ex) { throw new IllegalArgumentException("Record: " + record, ex); } List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileXml); if (listOfAnnotations == null) { listOfAnnotations = new ArrayList<>(); } listOfAnnotations.add(mturkAnnotation); mturkAnnotations.put(fileXml, listOfAnnotations); } } // parser.close(); } // Debugging: output number of HITs of a query System.out.println("Accepted HITs for a query:"); for (Map.Entry e : mturkAnnotations.entrySet()) { ArrayList<MTurkAnnotation> a = (ArrayList<MTurkAnnotation>) e.getValue(); System.out.println(e.getKey() + " " + a.size()); } for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); String fileName = f.getName(); List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileName); if (listOfAnnotations == null || listOfAnnotations.isEmpty()) { throw new IllegalStateException("No annotations for " + f.getName()); } for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { for (MTurkAnnotation mtAnnotation : listOfAnnotations) { String clueWeb = mtAnnotation.clueWeb; if (rankedResults.clueWebID.equals(clueWeb)) { List<QueryResultContainer.MTurkRelevanceVote> mTurkRelevanceVotes = rankedResults.mTurkRelevanceVotes; QueryResultContainer.MTurkRelevanceVote relevanceVote = new QueryResultContainer.MTurkRelevanceVote(); String annotatorID = mtAnnotation.annotatorID; String hitID = mtAnnotation.hitID; String acceptTime = mtAnnotation.acceptTime; String submitTime = mtAnnotation.submitTime; String comment = mtAnnotation.comment; String[] relevant = mtAnnotation.relevant; String[] irrelevant = mtAnnotation.irrelevant; relevanceVote.turkID = annotatorID.trim(); relevanceVote.hitID = hitID.trim(); relevanceVote.acceptTime = acceptTime.trim(); relevanceVote.submitTime = submitTime.trim(); relevanceVote.comment = comment != null ? comment.trim() : null; if (relevant.length == 0 && irrelevant.length == 0) { try { throw new IllegalStateException("the length of the annotations is 0" + rankedResults.clueWebID + " for HIT " + relevanceVote.hitID); } catch (IllegalStateException e) { e.printStackTrace(); } } for (String r : relevant) { String sentenceId = r.trim(); if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) { QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = sentenceId; singleSentenceVote.relevant = "true"; relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote); } } for (String r : irrelevant) { String sentenceId = r.trim(); if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) { QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = sentenceId; singleSentenceVote.relevant = "false"; relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote); } } mTurkRelevanceVotes.add(relevanceVote); } } } File outputFile = new File(outputDir, f.getName()); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } }
From source file:ObfuscatingStream.java
/** * Obfuscates or unobfuscates the second command-line argument, depending on * whether the first argument starts with "o" or "u" * /* www. j a v a2 s . c o m*/ * @param args * Command-line arguments * @throws IOException * If an error occurs obfuscating or unobfuscating */ public static void main(String[] args) throws IOException { InputStream input = new ByteArrayInputStream(args[1].getBytes()); StringBuilder toPrint = new StringBuilder(); if (args[0].startsWith("o")) { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); OutputStream out = obfuscate(bytes); int read = input.read(); while (read >= 0) { out.write(read); read = input.read(); } byte[] receiptBytes = bytes.toByteArray(); for (int b = 0; b < receiptBytes.length; b++) { int chr = (receiptBytes[b] + 256) % 256; toPrint.append(HEX_CHARS[chr >>> 4]); toPrint.append(HEX_CHARS[chr & 0xf]); } } else if (args[0].startsWith("u")) { input = unobfuscate(input); InputStreamReader reader = new InputStreamReader(input); int read = reader.read(); while (read >= 0) { toPrint.append((char) read); read = reader.read(); } } else throw new IllegalArgumentException("First argument must start with o or u"); System.out.println(toPrint.toString()); }
From source file:at.tuwien.ifs.feature.evaluation.SimilarityRetrievalWriter.java
public static void main(String[] args) throws SOMToolboxException, IOException { // register and parse all options JSAPResult config = OptionFactory.parseResults(args, OPTIONS); File inputVectorFile = config.getFile("inputVectorFile"); String outputDirStr = AbstractOptionFactory.getFilePath(config, "outputDirectory"); File outputDirBase = new File(outputDirStr); outputDirBase.mkdirs();//from w w w.j a v a2 s . c om String metricName = config.getString("metric"); DistanceMetric metric = AbstractMetric.instantiateNice(metricName); int neighbours = config.getInt("numberNeighbours"); int startIndex = config.getInt("startIndex"); int numberItems = config.getInt("numberItems", -1); try { SOMLibSparseInputData data = new SOMLibSparseInputData(inputVectorFile.getAbsolutePath()); int endIndex = data.numVectors(); if (numberItems != -1) { if (startIndex + numberItems > endIndex) { System.out.println("Specified number of items (" + numberItems + ") exceeds maximum (" + data.numVectors() + "), limiting to " + (endIndex - startIndex) + "."); } else { endIndex = startIndex + numberItems; } } StdErrProgressWriter progress = new StdErrProgressWriter(endIndex - startIndex, "processing vector "); // SortedSet<InputDistance> distances; for (int inputDatumIndex = startIndex; inputDatumIndex < endIndex; inputDatumIndex++) { InputDatum inputDatum = data.getInputDatum(inputDatumIndex); String inputLabel = inputDatum.getLabel(); if (inputDatumIndex == -1) { throw new IllegalArgumentException( "Input with label '" + inputLabel + "' not found in vector file '" + inputVectorFile + "'; possible labels are: " + StringUtils.toString(data.getLabels(), 15)); } File outputDir = new File(outputDirBase, inputLabel.charAt(2) + "/" + inputLabel.charAt(3) + "/" + inputLabel.charAt(4)); outputDir.mkdirs(); File outputFile = new File(outputDir, inputLabel + ".txt"); boolean fileExistsAndValid = false; if (outputFile.exists()) { // check if it the valid data String linesInvalid = ""; int validLineCount = 0; ArrayList<String> lines = FileUtils.readLinesAsList(outputFile.getAbsolutePath()); for (String string : lines) { if (string.trim().length() == 0) { continue; } String[] parts = string.split("\t"); if (parts.length != 2) { linesInvalid += "Line '" + string + "' invalid - contains " + parts.length + " elements.\n"; } else if (!NumberUtils.isNumber(parts[1])) { linesInvalid = "Line '" + string + "' invalid - 2nd part is not a number.\n"; } else { validLineCount++; } } if (validLineCount != neighbours) { linesInvalid = "Not enough valid lines; expected " + neighbours + ", found " + validLineCount + ".\n"; } fileExistsAndValid = true; if (org.apache.commons.lang.StringUtils.isNotBlank(linesInvalid)) { System.out.println("File " + outputFile.getAbsolutePath() + " exists, but is not valid:\n" + linesInvalid); } } if (fileExistsAndValid) { Logger.getLogger("at.tuwien.ifs.feature.evaluation").finer( "File " + outputFile.getAbsolutePath() + " exists and is valid; not recomputing"); } else { PrintWriter p = new PrintWriter(outputFile); SmallestElementSet<InputDistance> distances = data.getNearestDistances(inputDatumIndex, neighbours, metric); for (InputDistance inputDistance : distances) { p.println(inputDistance.getInput().getLabel() + "\t" + inputDistance.getDistance()); } p.close(); } progress.progress(); } } catch (IllegalArgumentException e) { System.out.println(e.getMessage() + ". Aborting."); System.exit(-1); } }
From source file:joshelser.as2015.query.Query.java
public static void main(String[] args) throws Exception { JCommander commander = new JCommander(); final Opts options = new Opts(); commander.addObject(options);//from ww w .ja v a 2s .co m commander.setProgramName("Query"); try { commander.parse(args); } catch (ParameterException ex) { commander.usage(); System.err.println(ex.getMessage()); System.exit(1); } ClientConfiguration conf = ClientConfiguration.loadDefault(); if (null != options.clientConfFile) { conf = new ClientConfiguration(new PropertiesConfiguration(options.clientConfFile)); } conf.withInstance(options.instanceName).withZkHosts(options.zookeepers); ZooKeeperInstance inst = new ZooKeeperInstance(conf); Connector conn = inst.getConnector(options.user, new PasswordToken(options.password)); BatchScanner bs = conn.createBatchScanner(options.table, Authorizations.EMPTY, 16); try { bs.setRanges(Collections.singleton(new Range())); final Text categoryText = new Text("category"); bs.fetchColumn(categoryText, new Text("name")); bs.fetchColumn(new Text("review"), new Text("score")); bs.fetchColumn(new Text("review"), new Text("userId")); bs.addScanIterator(new IteratorSetting(50, "wri", WholeRowIterator.class)); final Text colf = new Text(); Map<String, List<Integer>> scoresByUser = new HashMap<>(); for (Entry<Key, Value> entry : bs) { SortedMap<Key, Value> row = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue()); Iterator<Entry<Key, Value>> iter = row.entrySet().iterator(); if (!iter.hasNext()) { // row was empty continue; } Entry<Key, Value> categoryEntry = iter.next(); categoryEntry.getKey().getColumnFamily(colf); if (!colf.equals(categoryText)) { throw new IllegalArgumentException("Unknown!"); } if (!categoryEntry.getValue().toString().equals("books")) { // not a book review continue; } if (!iter.hasNext()) { continue; } Entry<Key, Value> reviewScore = iter.next(); if (!iter.hasNext()) { continue; } Entry<Key, Value> reviewUserId = iter.next(); String userId = reviewUserId.getValue().toString(); if (userId.equals("unknown")) { // filter unknow user id continue; } List<Integer> scores = scoresByUser.get(userId); if (null == scores) { scores = new ArrayList<>(); scoresByUser.put(userId, scores); } scores.add(Float.valueOf(reviewScore.getValue().toString()).intValue()); } for (Entry<String, List<Integer>> entry : scoresByUser.entrySet()) { int sum = 0; for (Integer val : entry.getValue()) { sum += val; } System.out.println(entry.getKey() + " => " + new Float(sum) / entry.getValue().size()); } } finally { bs.close(); } }
From source file:edu.byu.nlp.data.app.AnnotationStream2Annotators.java
public static void main(String[] args) throws IOException { // parse CLI arguments new ArgumentParser(AnnotationStream2Annotators.class).parseArgs(args); Preconditions.checkNotNull(jsonStream, "You must provide a valid --json-stream!"); Preconditions.checkArgument(smooth >= 0, "invalid smoothing value=" + smooth); Preconditions.checkArgument(k > 0, "invalid number of clusters=" + k); // compile annotation stream data into a dataset RandomGenerator rnd = new MersenneTwister(seed); Dataset data = readData(jsonStream); // create confusion matrices for each annotator wrt some truth int[][][] confusionMatrices; // confusionMatrices[annotator][true label][annotation] = count logger.info("dataset=" + data); switch (confusionMatrixTruth) { case GOLD://w w w. jav a 2s .co m confusionMatrices = Datasets.confusionMatricesWrtGoldLabels(data); break; case MAJORITY: confusionMatrices = Datasets.confusionMatricesWrtMajorityVoteLabels(data, rnd); break; default: throw new IllegalArgumentException( "unknown truth standard for constructing confusion matrices: " + confusionMatrixTruth); } // aggregate annotators based on their confusion matrices double[][][] annotatorParameters = confusionMatrices2AnnotatorParameters(confusionMatrices); int[] clusterAssignments = clusterAnnotatorParameters(annotatorParameters, aggregate, k, maxIterations, smooth, rnd); double[][][] clusteredAnnotatorParameters = aggregateAnnotatorParameterClusters(annotatorParameters, clusterAssignments); // aggregate annotator rates double[] annotationRates = new double[clusteredAnnotatorParameters.length]; for (int j = 0; j < confusionMatrices.length; j++) { long numAnnotationsPerJ = Matrices.sum(confusionMatrices[j]); // add this annotator's annotation count to the cluster total annotationRates[clusterAssignments[j]] += numAnnotationsPerJ; } DoubleArrays.normalizeToSelf(annotationRates); // output to console logger.info("aggregated annotators=\n" + Matrices.toString(clusteredAnnotatorParameters, 10, 10, 20, 3)); for (int c = 0; c < clusteredAnnotatorParameters.length; c++) { logger.info("aggregated annotator #" + c + " accuracy=" + accuracyOf(clusteredAnnotatorParameters[c])); logger.info("aggregated annotator #" + c + " rate=" + annotationRates[c]); } // output to file if (output != null) { List<SimulatedAnnotator> annotators = SimulatedAnnotators.from(clusteredAnnotatorParameters, annotationRates); Files2.write(SimulatedAnnotators.serialize(annotators), output); } }