Example usage for java.util List get

List of usage examples for java.util List get

Introduction

In this page you can find the example usage for java.util List get.

Prototype

E get(int index);

Source Link

Document

Returns the element at the specified position in this list.

Usage

From source file:sdmx.net.service.insee.INSEERESTQueryable.java

public static void main(String args[]) {
    INSEERESTQueryable registry = new INSEERESTQueryable("FR1", "http://www.bdm.insee.fr/series/sdmx");
    List<DataflowType> dfs = registry.listDataflows();
    for (int i = 0; i < dfs.size(); i++) {
        System.out.println(dfs.get(i).getName());
    }/*from  ww  w  .ja va 2 s  . c om*/
    registry.find(dfs.get(0).getStructure()).dump();
}

From source file:com.fun.rrs.common.excel.ExportExcel.java

/**
 * /*from   ww  w  . j  av a  2 s .co m*/
 */
public static void main(String[] args) throws Throwable {

    List<String> headerList = new ArrayList<String>();
    for (int i = 1; i <= 10; i++) {
        headerList.add("" + i);
    }

    List<String> dataRowList = new ArrayList<String>();
    for (int i = 1; i <= headerList.size(); i++) {
        dataRowList.add("?" + i);
    }

    List<List<String>> dataList = new ArrayList<List<String>>();
    for (int i = 1; i <= 100; i++) {
        dataList.add(dataRowList);
    }

    ExportExcel ee = new ExportExcel("", headerList);

    for (int i = 0; i < dataList.size(); i++) {
        Row row = ee.addRow();
        for (int j = 0; j < dataList.get(i).size(); j++) {
            ee.addCell(row, j, dataList.get(i).get(j));
        }
    }

    ee.writeFile("target/export.xlsx");

    ee.dispose();

    log.debug("Export success.");

}

From source file:com.meidusa.venus.benchmark.FileLineRandomData.java

public static void main(String[] args) throws Exception {
    final FileLineRandomData mapping = new FileLineRandomData();
    mapping.setFile(new File("./role.txt"));
    mapping.init();//from  w  ww  .jav a2s . c om
    List<Thread> list = new ArrayList<Thread>();
    long start = TimeUtil.currentTimeMillis();
    for (int j = 0; j < 1; j++) {
        Thread thread = new Thread() {
            public void run() {
                for (int i = 0; i < 1000; i++) {
                    System.out.println(((String[]) mapping.nextData())[1]);
                }
            }
        };
        list.add(thread);
        thread.start();
    }

    for (int i = 0; i < list.size(); i++) {
        list.get(i).join();
    }

    System.out.println("time=" + (TimeUtil.currentTimeMillis() - start));
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step4MTurkOutputCollector.java

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    String inputDirWithArgumentPairs = args[0];

    File[] resultFiles;//from   w w w.j  ava 2 s.  com

    if (args[1].contains("*")) {
        File path = new File(args[1]);
        File directory = path.getParentFile();
        String regex = path.getName().replaceAll("\\*", "");

        List<File> files = new ArrayList<>(FileUtils.listFiles(directory, new String[] { regex }, false));
        resultFiles = new File[files.size()];
        for (int i = 0; i < files.size(); i++) {
            resultFiles[i] = files.get(i);
        }
    } else {
        // result file is a comma-separated list of CSV files from MTurk
        String[] split = args[1].split(",");
        resultFiles = new File[split.length];
        for (int i = 0; i < split.length; i++) {
            resultFiles[i] = new File(split[i]);
        }
    }

    File outputDir = new File(args[2]);

    if (!outputDir.exists()) {
        if (!outputDir.mkdirs()) {
            throw new IOException("Cannot create directory " + outputDir);
        }
    }

    // error if output folder not empty to prevent any confusion by mixing files
    if (!FileUtils.listFiles(outputDir, null, false).isEmpty()) {
        throw new IllegalArgumentException("Output dir " + outputDir + " is not empty");
    }

    // collected assignments with empty reason for rejections
    Set<String> assignmentsWithEmptyReason = new HashSet<>();

    // parse with first line as header
    MTurkOutputReader mTurkOutputReader = new MTurkOutputReader(resultFiles);

    Collection<File> files = FileUtils.listFiles(new File(inputDirWithArgumentPairs), new String[] { "xml" },
            false);

    if (files.isEmpty()) {
        throw new IOException("No xml files found in " + inputDirWithArgumentPairs);
    }

    // statistics: how many hits with how many assignments ; hit ID / assignments
    Map<String, Map<String, Integer>> assignmentsPerHits = new HashMap<>();

    // collect accept/reject statistics
    for (Map<String, String> record : mTurkOutputReader) {
        boolean wasRejected = "Rejected".equals(record.get("assignmentstatus"));
        String hitID = record.get("hitid");
        String hitTypeId = record.get("hittypeid");

        if (!wasRejected) {
            // update statistics
            if (!assignmentsPerHits.containsKey(hitTypeId)) {
                assignmentsPerHits.put(hitTypeId, new HashMap<String, Integer>());
            }

            if (!assignmentsPerHits.get(hitTypeId).containsKey(hitID)) {
                assignmentsPerHits.get(hitTypeId).put(hitID, 0);
            }

            assignmentsPerHits.get(hitTypeId).put(hitID, assignmentsPerHits.get(hitTypeId).get(hitID) + 1);
        }
    }

    // statistics: how many hits with how many assignments ; hit ID / assignments
    Map<String, Integer> approvedAssignmentsPerHit = new HashMap<>();
    Map<String, Integer> rejectedAssignmentsPerHit = new HashMap<>();

    // collect accept/reject statistics
    for (Map<String, String> record : mTurkOutputReader) {
        boolean approved = "Approved".equals(record.get("assignmentstatus"));
        boolean rejected = "Rejected".equals(record.get("assignmentstatus"));
        String hitID = record.get("hitid");

        if (approved) {
            // update statistics
            if (!approvedAssignmentsPerHit.containsKey(hitID)) {
                approvedAssignmentsPerHit.put(hitID, 0);
            }

            approvedAssignmentsPerHit.put(hitID, approvedAssignmentsPerHit.get(hitID) + 1);
        } else if (rejected) {
            // update statistics
            if (!rejectedAssignmentsPerHit.containsKey(hitID)) {
                rejectedAssignmentsPerHit.put(hitID, 0);
            }

            rejectedAssignmentsPerHit.put(hitID, rejectedAssignmentsPerHit.get(hitID) + 1);
        } else {
            throw new IllegalStateException(
                    "Unknown state: " + record.get("assignmentstatus") + " HITID: " + hitID);
        }
    }

    //        System.out.println("Approved: " + approvedAssignmentsPerHit);
    //        System.out.println("Rejected: " + rejectedAssignmentsPerHit);

    System.out.println("Approved (values): " + new HashSet<>(approvedAssignmentsPerHit.values()));
    System.out.println("Rejected (values): " + new HashSet<>(rejectedAssignmentsPerHit.values()));
    // rejection statistics
    int totalRejected = 0;
    for (Map.Entry<String, Integer> rejectionEntry : rejectedAssignmentsPerHit.entrySet()) {
        totalRejected += rejectionEntry.getValue();
    }

    System.out.println("Total rejections: " + totalRejected);

    /*
    // generate .success files for adding more annotations
    for (File resultFile : resultFiles) {
    String hitTypeID = mTurkOutputReader.getHitTypeIdForFile().get(resultFile);
            
    // assignments for that hittypeid (= file)
    Map<String, Integer> assignments = assignmentsPerHits.get(hitTypeID);
            
    prepareUpdateHITsFiles(assignments, hitTypeID, resultFile);
    }
    */

    int totalSavedPairs = 0;

    // load all previously prepared argument pairs
    for (File file : files) {
        List<ArgumentPair> argumentPairs = (List<ArgumentPair>) XStreamTools.getXStream().fromXML(file);

        List<AnnotatedArgumentPair> annotatedArgumentPairs = new ArrayList<>();

        for (ArgumentPair argumentPair : argumentPairs) {
            AnnotatedArgumentPair annotatedArgumentPair = new AnnotatedArgumentPair(argumentPair);

            // is there such an answer?
            String key = "Answer." + argumentPair.getId();

            // iterate only if there is such column to save time
            if (mTurkOutputReader.getColumnNames().contains(key)) {
                // now find the results
                for (Map<String, String> record : mTurkOutputReader) {
                    if (record.containsKey(key)) {
                        // extract the values
                        AnnotatedArgumentPair.MTurkAssignment assignment = new AnnotatedArgumentPair.MTurkAssignment();

                        boolean wasRejected = "Rejected".equals(record.get("assignmentstatus"));

                        // only non-rejected (if required)
                        if (!wasRejected) {
                            String hitID = record.get("hitid");
                            String workerID = record.get("workerid");
                            String assignmentId = record.get("assignmentid");
                            try {
                                assignment.setAssignmentAcceptTime(
                                        DATE_FORMAT.parse(record.get("assignmentaccepttime")));
                                assignment.setAssignmentSubmitTime(
                                        DATE_FORMAT.parse(record.get("assignmentsubmittime")));
                                assignment.setHitComment(record.get("Answer.feedback"));
                                assignment.setHitID(hitID);
                                assignment.setTurkID(workerID);
                                assignment.setAssignmentId(assignmentId);

                                // and answer specific fields
                                String valueRaw = record.get(key);

                                // so far the label has had format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal
                                // strip now only true label
                                String label = valueRaw.split("_")[2];

                                assignment.setValue(label);
                                String reason = record.get(key + "_reason");

                                // missing reason
                                if (reason == null) {
                                    assignmentsWithEmptyReason.add(assignmentId);
                                } else {
                                    assignment.setReason(reason);

                                    // get worker's stance
                                    String stanceRaw = record.get(key + "_stance");
                                    if (stanceRaw != null) {
                                        // parse stance
                                        String stance = stanceRaw.split("_stance_")[1];
                                        assignment.setWorkerStance(stance);
                                    }

                                    // we take maximal 5 assignments
                                    Collections.sort(annotatedArgumentPair.mTurkAssignments,
                                            new Comparator<AnnotatedArgumentPair.MTurkAssignment>() {
                                                @Override
                                                public int compare(AnnotatedArgumentPair.MTurkAssignment o1,
                                                        AnnotatedArgumentPair.MTurkAssignment o2) {
                                                    return o1.getAssignmentAcceptTime()
                                                            .compareTo(o2.getAssignmentAcceptTime());
                                                }
                                            });

                                    if (annotatedArgumentPair.mTurkAssignments
                                            .size() < MAXIMUM_ASSIGNMENTS_PER_HIT) {
                                        annotatedArgumentPair.mTurkAssignments.add(assignment);
                                    }
                                }
                            } catch (IllegalArgumentException | NullPointerException ex) {
                                System.err.println("Malformed annotations for HIT " + hitID + ", worker "
                                        + workerID + ", assignment " + assignmentId + "; " + ex.getMessage()
                                        + ", full record: " + record);
                            }
                        }
                    }
                }
            }

            // and if there are some annotations, add it to the result set
            if (!annotatedArgumentPair.mTurkAssignments.isEmpty()) {
                annotatedArgumentPairs.add(annotatedArgumentPair);
            }
        }

        if (!annotatedArgumentPairs.isEmpty()) {
            File outputFile = new File(outputDir, file.getName());
            XStreamTools.toXML(annotatedArgumentPairs, outputFile);

            System.out.println("Saved " + annotatedArgumentPairs.size() + " annotated pairs to " + outputFile);
            totalSavedPairs += annotatedArgumentPairs.size();
        }
    }

    System.out.println("Total saved " + totalSavedPairs + " pairs");

    // print assignments with empty reasons
    if (!assignmentsWithEmptyReason.isEmpty()) {
        System.out.println(
                "== Assignments with empty reason:\nassignmentIdToReject\tassignmentIdToRejectComment");
        for (String assignmentId : assignmentsWithEmptyReason) {
            System.out.println(
                    assignmentId + "\t\"Dear worker, you did not fill the required field with a reason.\"");
        }
    }

}

From source file:kindleclippings.word.QuizletSync.java

public static void main(String[] args) throws IOException, JSONException, URISyntaxException,
        InterruptedException, BackingStoreException, BadLocationException {

    JFileChooser fc = new JFileChooser();
    fc.setFileFilter(new FileNameExtensionFilter("Word documents", "doc", "rtf", "txt"));
    fc.setMultiSelectionEnabled(true);//from  w ww.j  av  a2 s . co m
    int result = fc.showOpenDialog(null);
    if (result != JFileChooser.APPROVE_OPTION) {
        return;
    }
    File[] clf = fc.getSelectedFiles();
    if (clf == null || clf.length == 0)
        return;

    ProgressMonitor progress = new ProgressMonitor(null, "QuizletSync", "loading notes files", 0, 100);
    progress.setMillisToPopup(0);
    progress.setMillisToDecideToPopup(0);
    progress.setProgress(0);
    try {

        progress.setNote("checking Quizlet account");
        progress.setProgress(5);

        Preferences prefs = kindleclippings.quizlet.QuizletSync.getPrefs();

        QuizletAPI api = new QuizletAPI(prefs.get("access_token", null));

        Collection<TermSet> sets = null;
        try {
            progress.setNote("checking Quizlet library");
            progress.setProgress(10);
            sets = api.getSets(prefs.get("user_id", null));
        } catch (IOException e) {
            if (e.toString().contains("401")) {
                // Not Authorized => Token has been revoked
                kindleclippings.quizlet.QuizletSync.clearPrefs();
                prefs = kindleclippings.quizlet.QuizletSync.getPrefs();
                api = new QuizletAPI(prefs.get("access_token", null));
                sets = api.getSets(prefs.get("user_id", null));
            } else {
                throw e;
            }
        }

        progress.setProgress(15);
        progress.setMaximum(15 + clf.length * 10);
        progress.setNote("uploading new notes");

        int pro = 15;

        int addedSets = 0;
        int updatedTerms = 0;
        int updatedSets = 0;

        for (File f : clf) {
            progress.setProgress(pro);
            List<Clipping> clippings = readClippingsFile(f);

            if (clippings == null) {
                pro += 10;
                continue;
            }

            if (clippings.isEmpty()) {
                pro += 10;
                continue;
            }

            if (clippings.size() < 2) {
                pro += 10;
                continue;
            }

            String book = clippings.get(0).getBook();
            progress.setNote(book);

            TermSet termSet = null;
            String x = book.toLowerCase().replaceAll("\\W", "");

            for (TermSet t : sets) {
                if (t.getTitle().toLowerCase().replaceAll("\\W", "").equals(x)) {
                    termSet = t;
                    break;
                }
            }

            if (termSet == null) {

                addSet(api, book, clippings);
                addedSets++;
                pro += 10;
                continue;
            }

            // compare against existing terms
            boolean hasUpdated = false;
            for (Clipping cl : clippings) {
                if (!kindleclippings.quizlet.QuizletSync.checkExistingTerm(cl, termSet)) {
                    kindleclippings.quizlet.QuizletSync.addTerm(api, termSet, cl);
                    updatedTerms++;
                    hasUpdated = true;
                }
            }

            pro += 10;

            if (hasUpdated)
                updatedSets++;

        }

        if (updatedTerms == 0 && addedSets == 0) {
            JOptionPane.showMessageDialog(null, "Done.\nNo new data was uploaded", "QuizletSync",
                    JOptionPane.OK_OPTION);
        } else {
            if (addedSets > 0) {
                JOptionPane.showMessageDialog(null,
                        String.format("Done.\nCreated %d new sets and added %d cards to %d existing sets",
                                addedSets, updatedSets, updatedTerms),
                        "QuizletSync", JOptionPane.OK_OPTION);
            } else {
                JOptionPane.showMessageDialog(null,
                        String.format("Done.\nAdded %d cards to %d existing sets", updatedTerms, updatedSets),
                        "QuizletSync", JOptionPane.OK_OPTION);
            }
        }
    } finally {
        progress.close();
    }

    System.exit(0);
}

From source file:com.topsem.common.io.excel.ExportExcel.java

/**
 * //www . j a va2 s .  co m
 */
public static void main(String[] args) throws Throwable {

    List<String> headerList = Lists.newArrayList();
    for (int i = 1; i <= 10; i++) {
        headerList.add("" + i);
    }

    List<String> dataRowList = Lists.newArrayList();
    for (int i = 1; i <= headerList.size(); i++) {
        dataRowList.add("?" + i);
    }

    List<List<String>> dataList = Lists.newArrayList();
    for (int i = 1; i <= 100; i++) {
        dataList.add(dataRowList);
    }

    ExportExcel ee = new ExportExcel("", headerList);

    for (int i = 0; i < dataList.size(); i++) {
        Row row = ee.addRow();
        for (int j = 0; j < dataList.get(i).size(); j++) {
            ee.addCell(row, j, dataList.get(i).get(j));
        }
    }

    ee.writeFile("target/export.xlsx");

    ee.dispose();

    log.debug("Export success.");

}

From source file:edu.osu.ling.pep.Pep.java

/**
 * Invokes Pep from the command line./*from  w ww  . ja v a 2  s .c o  m*/
 * <p>
 * The main work this method does, apart from tokenizing the arguments and
 * input tokens, is to load and parse the XML grammar file (as specified by
 * <code>-g</code> or <code>--grammar</code>). If any of the arguments
 * <code>-g</code>, <code>--grammar</code>, <code>-s</code>,
 * <code>--seed</code>, <code>-o</code>, <code>--option</code>, occur with
 * no argument following, this method prints an error notifying the user.
 * 
 * @param args
 *            The expected arguments are as follows, and can occur in any
 *            particular order:
 *            <ul>
 *            <li><code>-g|--grammar &lt;grammar file&gt;</code></li> <li>
 *            <code>-s|--seed &lt;seed category&gt;</code></li> <li><code>
 *            -v|--verbose {verbosity level}</code></li> <li><code>
 *            -o|--option &lt;OPTION_NAME=value&gt;</code></li> <li><code>
 *            -h|--help (prints usage information)</code></li> <li><code>
 *            &lt;token1 ... token<em>n</em>&gt;</code> (or <code>-</code>
 *            for standard input)</li>
 *            </ul>
 *            <code>OPTION_NAME</code> must be the name of one of the
 *            recognized {@link ParserOption options}. If <code>-h</code> or
 *            <code>--help</code> occur anywhere in the arguments, usage
 *            information is printed and no parsing takes place.
 */
@SuppressWarnings("static-access")
public static final void main(final String[] args) {
    try {
        final Options opts = new Options();

        opts.addOption(OptionBuilder.withLongOpt("grammar").withDescription("the grammar to use").hasArg()
                .isRequired().withArgName("grammar file").create('g'));

        opts.addOption(OptionBuilder.withLongOpt("seed").withDescription("the seed category to parse for")
                .hasArg().isRequired().withArgName("seed category").create('s'));

        opts.addOption(OptionBuilder.withLongOpt("verbose").withDescription("0-3").hasOptionalArg()
                .withArgName("verbosity level").create('v'));

        opts.addOption(OptionBuilder.withLongOpt("option").withDescription("sets parser options")
                .withArgName("OPTION=value").hasArgs(2).withValueSeparator()
                .withDescription("use value for given property").create("o"));

        opts.addOption(OptionBuilder.withLongOpt("help").withDescription("prints this message").create('h'));

        final CommandLineParser parser = new GnuParser();
        try {
            final CommandLine line = parser.parse(opts, args);
            if (line.hasOption('h')) {
                Pep.printHelp(opts);
            } else {
                final int v = Integer.parseInt(line.getOptionValue('v', Integer.toString(Pep.V_PARSE)));
                if (v < 0) {
                    throw new PepException("verbosity < 0: " + v);
                }

                Pep.verbosity = v;
                final Map<ParserOption, Boolean> options = new EnumMap<ParserOption, Boolean>(
                        ParserOption.class);

                final Properties props = line.getOptionProperties("o");
                for (final Object key : props.keySet()) {
                    try {
                        options.put(ParserOption.valueOf(key.toString()),
                                Boolean.valueOf(props.get(key).toString()));
                    } catch (final IllegalArgumentException iae) {
                        Pep.printError("no option named " + key.toString());
                        Pep.printHelp(opts);
                        return;
                    }
                }

                final Pep pep = new Pep(options);
                // final Grammar grammar =
                // new GrammarParser(Pep.findGrammar(line
                // .getOptionValue('g'))).t.parse();

                final List<?> ts = line.getArgList();
                List<String> tokens = null;
                if (ts.isEmpty() || ts.get(0).equals("-")) {
                    tokens = Pep.readTokens(new Scanner(System.in));
                } else {
                    tokens = new ArrayList<String>(ts.size());
                    for (final Object t : ts) {
                        tokens.add(t.toString());
                    }
                }

                pep.lastParseStart = System.currentTimeMillis();
                // try {
                // pep.parse(grammar, tokens,
                // new Category(line.getOptionValue('s')));
                // } catch (final PepException ignore) {
                // // ignore here, we're listening
                // }
            }
        } catch (final ParseException pe) {
            Pep.printError("command-line syntax problem: " + pe.getMessage());
            Pep.printHelp(opts);
        }
    } catch (final PepException pe) {
        final Throwable cause = pe.getCause();
        Pep.printError((cause == null) ? pe : cause);
    } catch (final RuntimeException re) {
        Pep.printError(re);
    }
}

From source file:eu.fbk.dh.tint.tokenizer.ItalianTokenizer.java

public static void main(String argv[]) throws IOException {

        ItalianTokenizer tokenizer = new ItalianTokenizer();

        //        byte[] file = Files.readAllBytes((new File("/Users/alessio/Desktop/milano.txt")).toPath());
        //        String text = new String(file);
        String text = "Clinton in testa nei sondaggi dopo lassoluzione dellFbi sulluso di un server di posta privato quando era Segretario di stato.";
        //        text = "``Determinato, pronto a fare tutto il necessario per mantenere la stabilit dei prezzi.''"
        //                + " Ma anche allarmato per come le conseguenze del referendum britannico minacciano leconomia e i mercati europei."
        //                + " Sono nato nel 200 S.p.A."
        //                + " Il mio indirizzo e-mail  alessio@apnetwork.it."
        //                + " Il blog  http://www.ziorufus.it e mi piace molto.";
        //        text = "Questo  un test per una sigla qualsiasi tipo a.B.C. che non ha senso.";
        //        text = "Milano (/milano/ ascolta[?info], in milanese: Milan[4], /mil?/[5])  una citt italiana di 1 346 153 abitanti[2], capoluogo dell'omonima citt metropolitana e della regione Lombardia, secondo comune italiano per numero di abitanti, tredicesimo comune dell'Unione europea e diciannovesimo del continente e, con l'agglomerato urbano, quarta area metropolitana pi popolata d'Europa dopo Londra, Madrid e Parigi[6].\n"
        //                + "\n"
        //                + "Fondata dagli Insubri all'inizio del VI secolo a.C.[7], fu conquistata dai Romani nel 222 a.C.";

        //        System.out.println(text);

        long time = System.currentTimeMillis();
        List<List<CoreLabel>> sentences = tokenizer.parse(text);
        time = System.currentTimeMillis() - time;

        for (int i = 0; i < Math.min(10, sentences.size()); i++) {
            List<CoreLabel> sentence = sentences.get(i);
            for (CoreLabel token : sentence) {
                System.out.println(token.word() + " -- " + token.originalText() + " -- " + token.beginPosition());

            }//from  ww  w  .  java2 s  . c  om
            System.out.println();
        }

        int sentenceSize = sentences.size();
        int lastTokenIndex = sentences.get(sentenceSize - 1).get(sentences.get(sentenceSize - 1).size() - 1)
                .index();
        System.out.println("Length: " + text.length());
        System.out.println("Time: " + time);
        System.out.println("Sentences: " + sentenceSize);
        System.out.println("Tokens: " + lastTokenIndex);
    }

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step5bAgreementMeasures.java

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    String inputDir = args[0];// ww w.j a  va2 s.  c  o m

    // all annotations
    List<AnnotatedArgumentPair> allArgumentPairs = new ArrayList<>();

    Collection<File> files = IOHelper.listXmlFiles(new File(inputDir));

    for (File file : files) {
        allArgumentPairs.addAll((List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file));
    }

    // for collecting the rank of n-th best worker per HIT
    SortedMap<Integer, DescriptiveStatistics> nThWorkerOnHITRank = new TreeMap<>();
    // confusion matrix wrt. gold data for each n-th best worker on HIT
    SortedMap<Integer, ConfusionMatrix> nThWorkerOnHITConfusionMatrix = new TreeMap<>();

    // initialize maps
    for (int i = 0; i < TOP_K_VOTES; i++) {
        nThWorkerOnHITRank.put(i, new DescriptiveStatistics());
        nThWorkerOnHITConfusionMatrix.put(i, new ConfusionMatrix());
    }

    for (AnnotatedArgumentPair argumentPair : allArgumentPairs) {
        // sort turker rank and their vote
        SortedMap<Integer, String> rankAndVote = new TreeMap<>();

        System.out.println(argumentPair.mTurkAssignments.size());

        for (AnnotatedArgumentPair.MTurkAssignment assignment : argumentPair.mTurkAssignments) {
            rankAndVote.put(assignment.getTurkRank(), assignment.getValue());
        }

        String goldLabel = argumentPair.getGoldLabel();

        System.out.println(rankAndVote);

        // top K workers for the HIT
        List<String> topKVotes = new ArrayList<>(rankAndVote.values()).subList(0, TOP_K_VOTES);

        // rank of top K workers
        List<Integer> topKRanks = new ArrayList<>(rankAndVote.keySet()).subList(0, TOP_K_VOTES);

        System.out.println("Top K votes: " + topKVotes);
        System.out.println("Top K ranks: " + topKRanks);

        // extract only category (a1, a2, or equal)
        List<String> topKVotesOnlyCategory = new ArrayList<>();
        for (String vote : topKVotes) {
            String category = vote.split("_")[2];
            topKVotesOnlyCategory.add(category);
        }

        System.out.println("Top " + TOP_K_VOTES + " workers' decisions: " + topKVotesOnlyCategory);

        if (goldLabel == null) {
            System.out.println("No gold label estimate for " + argumentPair.getId());
        } else {
            // update statistics
            for (int i = 0; i < TOP_K_VOTES; i++) {
                nThWorkerOnHITConfusionMatrix.get(i).increaseValue(goldLabel, topKVotesOnlyCategory.get(i));

                // rank is +1 (we don't start ranking from zero)
                nThWorkerOnHITRank.get(i).addValue(topKRanks.get(i) + 1);
            }
        }
    }

    for (int i = 0; i < TOP_K_VOTES; i++) {
        System.out.println("n-th worker : " + (i + 1) + " -----------");
        System.out.println(nThWorkerOnHITConfusionMatrix.get(i).printNiceResults());
        System.out.println(nThWorkerOnHITConfusionMatrix.get(i));
        System.out.println("Average rank: " + nThWorkerOnHITRank.get(i).getMean() + ", stddev "
                + nThWorkerOnHITRank.get(i).getStandardDeviation());
    }

}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step5LinguisticPreprocessing.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step4-boiler-plate/
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();// www. j av  a2s. c  om
    }

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            //                System.out.println(rankedResults.plainText);

            if (rankedResults.plainText != null) {
                String[] lines = StringUtils.split(rankedResults.plainText, "\n");

                // collecting all cleaned lines
                List<String> cleanLines = new ArrayList<>(lines.length);
                // collecting line tags
                List<String> lineTags = new ArrayList<>(lines.length);

                for (String line : lines) {
                    // get the tag
                    String tag = null;
                    Matcher m = OPENING_TAG_PATTERN.matcher(line);

                    if (m.find()) {
                        tag = m.group(1);
                    }

                    if (tag == null) {
                        throw new IllegalArgumentException("No html tag found for line:\n" + line);
                    }

                    // replace the tag at the beginning and the end
                    String noTagText = line.replaceAll("^<\\S+>", "").replaceAll("</\\S+>$", "");

                    // do some html cleaning
                    noTagText = noTagText.replaceAll("&nbsp;", " ");

                    noTagText = noTagText.trim();

                    // add to the output
                    if (!noTagText.isEmpty()) {
                        cleanLines.add(noTagText);
                        lineTags.add(tag);
                    }
                }

                if (cleanLines.isEmpty()) {
                    // the document is empty
                    System.err.println("Document " + rankedResults.clueWebID + " in query "
                            + queryResultContainer.qID + " is empty");
                } else {
                    // now join them back to paragraphs
                    String text = StringUtils.join(cleanLines, "\n");

                    // create JCas
                    JCas jCas = JCasFactory.createJCas();
                    jCas.setDocumentText(text);
                    jCas.setDocumentLanguage("en");

                    // annotate WebParagraph
                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(WebParagraphAnnotator.class));

                    // fill the original tag information
                    List<WebParagraph> webParagraphs = new ArrayList<>(
                            JCasUtil.select(jCas, WebParagraph.class));

                    // they must be the same size as original ones
                    if (webParagraphs.size() != lineTags.size()) {
                        throw new IllegalStateException(
                                "Different size of annotated paragraphs and original lines");
                    }

                    for (int i = 0; i < webParagraphs.size(); i++) {
                        WebParagraph p = webParagraphs.get(i);
                        // get tag
                        String tag = lineTags.get(i);

                        p.setOriginalHtmlTag(tag);
                    }

                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
                                    // only on existing WebParagraph annotations
                                    StanfordSegmenter.PARAM_ZONE_TYPES, WebParagraph.class.getCanonicalName()));

                    // now convert to XMI
                    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
                    XmiCasSerializer.serialize(jCas.getCas(), byteOutputStream);

                    // encode to base64
                    String encoded = new BASE64Encoder().encode(byteOutputStream.toByteArray());

                    rankedResults.originalXmi = encoded;
                }
            }
        }

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}