Example usage for org.apache.commons.io FileUtils readFileToString

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils readFileToString.

Prototype

public static String readFileToString(File file, String encoding) throws IOException

Source Link

Document

Reads the contents of a file into a String.

Usage

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step5GoldLabelEstimator.java

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    String inputDir = args[0];/*from   w  w  w.j  a  v  a  2 s  . c o  m*/
    File outputDir = new File(args[1]);

    if (!outputDir.exists()) {
        outputDir.mkdirs();
    }

    // we will process only a subset first
    List<AnnotatedArgumentPair> allArgumentPairs = new ArrayList<>();

    Collection<File> files = IOHelper.listXmlFiles(new File(inputDir));

    for (File file : files) {
        allArgumentPairs.addAll((List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file));
    }

    // collect turkers and csv
    List<String> turkerIDs = extractAndSortTurkerIDs(allArgumentPairs);
    String preparedCSV = prepareCSV(allArgumentPairs, turkerIDs);

    // save CSV and run MACE
    Path tmpDir = Files.createTempDirectory("mace");
    File maceInputFile = new File(tmpDir.toFile(), "input.csv");
    FileUtils.writeStringToFile(maceInputFile, preparedCSV, "utf-8");

    File outputPredictions = new File(tmpDir.toFile(), "predictions.txt");
    File outputCompetence = new File(tmpDir.toFile(), "competence.txt");

    // run MACE
    MACE.main(new String[] { "--iterations", "500", "--threshold", String.valueOf(MACE_THRESHOLD), "--restarts",
            "50", "--outputPredictions", outputPredictions.getAbsolutePath(), "--outputCompetence",
            outputCompetence.getAbsolutePath(), maceInputFile.getAbsolutePath() });

    // read back the predictions and competence
    List<String> predictions = FileUtils.readLines(outputPredictions, "utf-8");

    // check the output
    if (predictions.size() != allArgumentPairs.size()) {
        throw new IllegalStateException("Wrong size of the predicted file; expected " + allArgumentPairs.size()
                + " lines but was " + predictions.size());
    }

    String competenceRaw = FileUtils.readFileToString(outputCompetence, "utf-8");
    String[] competence = competenceRaw.split("\t");
    if (competence.length != turkerIDs.size()) {
        throw new IllegalStateException(
                "Expected " + turkerIDs.size() + " competence number, got " + competence.length);
    }

    // rank turkers by competence
    Map<String, Double> turkerIDCompetenceMap = new TreeMap<>();
    for (int i = 0; i < turkerIDs.size(); i++) {
        turkerIDCompetenceMap.put(turkerIDs.get(i), Double.valueOf(competence[i]));
    }

    // sort by value descending
    Map<String, Double> sortedCompetences = IOHelper.sortByValue(turkerIDCompetenceMap, false);
    System.out.println("Sorted turker competences: " + sortedCompetences);

    // assign the gold label and competence

    for (int i = 0; i < allArgumentPairs.size(); i++) {
        AnnotatedArgumentPair annotatedArgumentPair = allArgumentPairs.get(i);
        String goldLabel = predictions.get(i).trim();

        // might be empty
        if (!goldLabel.isEmpty()) {
            // so far the gold label has format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal
            // strip now only the gold label
            annotatedArgumentPair.setGoldLabel(goldLabel);
        }

        // update turker competence
        for (AnnotatedArgumentPair.MTurkAssignment assignment : annotatedArgumentPair.mTurkAssignments) {
            String turkID = assignment.getTurkID();

            int turkRank = getTurkerRank(turkID, sortedCompetences);
            assignment.setTurkRank(turkRank);

            double turkCompetence = turkerIDCompetenceMap.get(turkID);
            assignment.setTurkCompetence(turkCompetence);
        }
    }

    // now sort the data back according to their original file name
    Map<String, List<AnnotatedArgumentPair>> fileNameAnnotatedPairsMap = new HashMap<>();
    for (AnnotatedArgumentPair argumentPair : allArgumentPairs) {
        String fileName = IOHelper.createFileName(argumentPair.getDebateMetaData(),
                argumentPair.getArg1().getStance());

        if (!fileNameAnnotatedPairsMap.containsKey(fileName)) {
            fileNameAnnotatedPairsMap.put(fileName, new ArrayList<AnnotatedArgumentPair>());
        }

        fileNameAnnotatedPairsMap.get(fileName).add(argumentPair);
    }

    // and save them to the output file
    for (Map.Entry<String, List<AnnotatedArgumentPair>> entry : fileNameAnnotatedPairsMap.entrySet()) {
        String fileName = entry.getKey();
        List<AnnotatedArgumentPair> argumentPairs = entry.getValue();

        File outputFile = new File(outputDir, fileName);

        // and save all sampled pairs into a XML file
        XStreamTools.toXML(argumentPairs, outputFile);

        System.out.println("Saved " + argumentPairs.size() + " pairs to " + outputFile);
    }

}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step5-linguistic-annotation/
    System.err.println("Starting step 6 HIT Preparation");

    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (outputDir.exists()) {
        outputDir.delete();// ww w.  j  a  va2  s  . co m
    }
    outputDir.mkdir();

    List<String> queries = new ArrayList<>();

    // iterate over query containers
    int countClueWeb = 0;
    int countSentence = 0;
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        if (queries.contains(f.getName()) || queries.size() == 0) {
            // groups contain only non-empty documents
            Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>();

            // split to groups according to number of sentences
            for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {
                if (rankedResult.originalXmi != null) {
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));
                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);

                    int groupId = sentences.size() / 40;
                    if (rankedResult.originalXmi == null) {
                        System.err.println("Empty document: " + rankedResult.clueWebID);
                    } else {
                        if (!groups.containsKey(groupId)) {
                            groups.put(groupId, new ArrayList<>());

                        }
                    }
                    //handle it
                    groups.get(groupId).add(rankedResult);
                    countClueWeb++;
                }
            }

            for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) {
                Integer groupId = entry.getKey();
                List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue();

                // make sure the results are sorted
                // DEBUG
                //                for (QueryResultContainer.SingleRankedResult r : rankedResults) {
                //                    System.out.print(r.rank + "\t");
                //                }

                Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank));

                // iterate over results for one query and group
                for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) {
                    QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i);

                    QueryResultContainer.SingleRankedResult r = rankedResults.get(i);
                    int rank = r.rank;
                    MustacheFactory mf = new DefaultMustacheFactory();
                    Mustache mustache = mf.compile("template/template.html");
                    String queryId = queryResultContainer.qID;
                    String query = queryResultContainer.query;
                    // make the first letter uppercase
                    query = query.substring(0, 1).toUpperCase() + query.substring(1);

                    List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples;
                    List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples;
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));

                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    List<generators.Sentence> sentences = new ArrayList<>();
                    List<Integer> paragraphs = new ArrayList<>();
                    paragraphs.add(0);

                    for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) {
                        for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) {

                            String sentenceBegin = String.valueOf(s.getBegin());
                            generators.Sentence sentence = new generators.Sentence(s.getCoveredText(),
                                    sentenceBegin);
                            sentences.add(sentence);
                            countSentence++;
                        }
                        int SentenceID = paragraphs.get(paragraphs.size() - 1);
                        if (sentences.size() > 120)
                            while (SentenceID < sentences.size()) {
                                if (!paragraphs.contains(SentenceID))
                                    paragraphs.add(SentenceID);
                                SentenceID = SentenceID + 120;
                            }
                        paragraphs.add(sentences.size());

                    }
                    System.err.println("Output dir: " + outputDir);
                    int startID = 0;
                    int endID;

                    for (int j = 0; j < paragraphs.size(); j++) {

                        endID = paragraphs.get(j);
                        int sentLength = endID - startID;
                        if (sentLength > 120 || j == paragraphs.size() - 1) {
                            if (sentLength > 120) {

                                endID = paragraphs.get(j - 1);
                                j--;
                            }
                            sentLength = endID - startID;
                            if (sentLength <= 40)
                                groupId = 40;
                            else if (sentLength <= 80 && sentLength > 40)
                                groupId = 80;
                            else if (sentLength > 80)
                                groupId = 120;

                            File folder = new File(outputDir + "/" + groupId);
                            if (!folder.exists()) {
                                System.err.println("creating directory: " + outputDir + "/" + groupId);
                                boolean result = false;

                                try {
                                    folder.mkdir();
                                    result = true;
                                } catch (SecurityException se) {
                                    //handle it
                                }
                                if (result) {
                                    System.out.println("DIR created");
                                }
                            }

                            String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_"
                                    + rankedResult.clueWebID + "_" + sentLength + ".html";
                            System.err.println("Printing a file: " + newHtmlFile);
                            File newHTML = new File(newHtmlFile);
                            int t = 0;
                            while (newHTML.exists()) {
                                newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_"
                                        + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html");
                                t++;
                            }
                            mustache.execute(new PrintWriter(new FileWriter(newHTML)),
                                    new generators(query, relevantInformationExamples,
                                            irrelevantInformationExamples, sentences.subList(startID, endID),
                                            queryId, rank))
                                    .flush();
                            startID = endID;
                        }
                    }
                }
            }

        }
    }
    System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences");
}

From source file:com.ontotext.s4.SBTDemo.Main.java

public static void main(String[] args) {
    /*/*  ww w  .j a va 2s.  c  o m*/
     * Read log4j properties file.
     */
    org.apache.log4j.PropertyConfigurator.configure(args.length >= 1 ? args[1] : DEFAULT_LOG4J_FILE);

    /*
     * Read properties file  List all annotated files. We should
     * use absolute path to the files
     */
    init(args);

    ProcessingDocuments processingDocuments = new ProcessingDocuments(
            programProperties.getProperty(PropertiesNames.S4_API_KEY),
            programProperties.getProperty(PropertiesNames.S4_API_PASS),
            programProperties.getProperty(PropertiesNames.RAW_FOLDER),
            programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER),
            programProperties.getProperty(PropertiesNames.SERVICE),
            programProperties.getProperty(PropertiesNames.MIME_TYPE),
            programProperties.getProperty(PropertiesNames.RESPONSE_FORMAT),
            Integer.parseInt(programProperties.getProperty(PropertiesNames.NUMBER_OF_THREADS)));

    processingDocuments.ProcessData();

    File directory = new File(programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER));
    listOfAllAnnotatedFiles = FileUtils.listFiles(directory, new RegexFileFilter("^(.*?)"),
            DirectoryFileFilter.DIRECTORY);

    RepoManager repoManager = new RepoManager(programProperties.getProperty(PropertiesNames.REPOSITORY_URL));
    JsonToRDF jsonToRdfParser = new JsonToRDF(programProperties.getProperty(PropertiesNames.MIME_TYPE),
            programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER));

    for (File file : listOfAllAnnotatedFiles) {
        String fileContent = null;
        try {
            fileContent = FileUtils.readFileToString(file, "UTF-8");
        } catch (IOException e) {
            logger.error(e);
        }

        Model graph = jsonToRdfParser.wirteDataToRDF(fileContent, file.getName(),
                programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER));
        try {
            repoManager.sendDataTOGraphDB(graph);
        } catch (RepositoryException e) {
            logger.error(e);
        }

    }

    repoManager.close();
}

From source file:com.da.daum.DaumCafeParser.java

public static void main(String[] args) throws IOException {
    DaumCafeParser parser = new DaumCafeParser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    //parser.setDaumView(listBody);
}

From source file:de.tudarmstadt.ukp.argumentation.data.roomfordebate.DataFetcher.java

public static void main(String[] args) throws Exception {
    File crawledPagesFolder = new File(args[0]);
    if (!crawledPagesFolder.exists()) {
        crawledPagesFolder.mkdirs();//from   ww  w  .  ja  v  a  2s.  c  om
    }

    File outputFolder = new File(args[1]);
    if (!outputFolder.exists()) {
        outputFolder.mkdirs();
    }

    // read links from text file
    final String urlsResourceName = "roomfordebate-urls.txt";

    InputStream urlsStream = DataFetcher.class.getClassLoader().getResourceAsStream(urlsResourceName);

    if (urlsStream == null) {
        throw new IOException("Cannot find resource " + urlsResourceName + " on the classpath");
    }

    // read list of urls
    List<String> urls = new ArrayList<>();
    LineIterator iterator = IOUtils.lineIterator(urlsStream, "utf-8");
    while (iterator.hasNext()) {
        // ignore commented url (line starts with #)
        String line = iterator.nextLine();
        if (!line.startsWith("#") && !line.trim().isEmpty()) {
            urls.add(line.trim());
        }
    }

    // download all
    crawlPages(urls, crawledPagesFolder);

    List<File> files = new ArrayList<>(FileUtils.listFiles(crawledPagesFolder, null, false));
    Collections.sort(files, new Comparator<File>() {
        @Override
        public int compare(File o1, File o2) {
            return o1.getName().compareTo(o2.getName());
        }
    });

    int idCounter = 0;

    for (File file : files) {
        NYTimesCommentsScraper commentsScraper = new NYTimesCommentsScraper();
        NYTimesArticleExtractor extractor = new NYTimesArticleExtractor();

        String html = FileUtils.readFileToString(file, "utf-8");

        idCounter++;
        File outputFileArticle = new File(outputFolder, String.format("Cx%03d.txt", idCounter));
        File outputFileComments = new File(outputFolder, String.format("Dx%03d.txt", idCounter));

        try {
            List<Comment> comments = commentsScraper.extractComments(html);
            Article article = extractor.extractArticle(html);

            saveArticleToText(article, outputFileArticle);
            System.out.println("Saved to " + outputFileArticle);

            saveCommentsToText(comments, outputFileComments, article);
            System.out.println("Saved to " + outputFileComments);
        } catch (IOException ex) {
            System.err.println(file.getName() + "\n" + ex.getMessage());
        }
    }
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step8GoldDataAggregator.java

public static void main(String[] args) throws Exception {
    String inputDir = args[0] + "/";
    // output dir
    File outputDir = new File(args[1]);
    File turkersConfidence = new File(args[2]);
    if (outputDir.exists()) {
        outputDir.delete();//from w  w  w  .jav  a2s  .co m
    }
    outputDir.mkdir();

    List<String> annotatorsIDs = new ArrayList<>();
    //        for (File f : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) {
    //            QueryResultContainer queryResultContainer = QueryResultContainer
    //                    .fromXML(FileUtils.readFileToString(f, "utf-8"));
    //            for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
    //                for (QueryResultContainer.MTurkRelevanceVote relevanceVote : rankedResults.mTurkRelevanceVotes) {
    //                    if (!annotatorsIDs.contains(relevanceVote.turkID))
    //                        annotatorsIDs.add(relevanceVote.turkID);
    //                }
    //            }
    //        }
    HashMap<String, Integer> countVotesForATurker = new HashMap<>();
    // creates temporary file with format for mace
    // Hashmap annotations: key is the id of a document and a sentence
    // Value is an array votes[] of turkers decisions: true or false (relevant or not)
    // the length of this array equals the number of annotators in List<String> annotatorsIDs.
    // If an annotator worked on the task his decision is written in the array otherwise the value is NULL

    // key: queryID + clueWebID + sentenceID
    // value: true and false annotations
    TreeMap<String, Annotations> annotations = new TreeMap<>();

    for (File f : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        System.out.println("Reading " + f.getName());
        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            String documentID = rankedResults.clueWebID;
            for (QueryResultContainer.MTurkRelevanceVote relevanceVote : rankedResults.mTurkRelevanceVotes) {
                Integer turkerID;
                if (!annotatorsIDs.contains(relevanceVote.turkID)) {
                    annotatorsIDs.add(relevanceVote.turkID);
                    turkerID = annotatorsIDs.size() - 1;
                } else {
                    turkerID = annotatorsIDs.indexOf(relevanceVote.turkID);
                }
                Integer count = countVotesForATurker.get(relevanceVote.turkID);
                if (count == null) {
                    count = 0;
                }
                count++;
                countVotesForATurker.put(relevanceVote.turkID, count);

                String id;
                List<Integer> trueVotes;
                List<Integer> falseVotes;
                for (QueryResultContainer.SingleSentenceRelevanceVote singleSentenceRelevanceVote : relevanceVote.singleSentenceRelevanceVotes)
                    if (!"".equals(singleSentenceRelevanceVote.sentenceID)) {

                        id = f.getName() + "_" + documentID + "_" + singleSentenceRelevanceVote.sentenceID;
                        Annotations turkerVotes = annotations.get(id);
                        if (turkerVotes == null) {
                            trueVotes = new ArrayList<>();
                            falseVotes = new ArrayList<>();
                            turkerVotes = new Annotations(trueVotes, falseVotes);
                        }
                        trueVotes = turkerVotes.trueAnnotations;
                        falseVotes = turkerVotes.falseAnnotations;
                        if ("true".equals(singleSentenceRelevanceVote.relevant)) {
                            // votes[turkerID] = true;
                            trueVotes.add(turkerID);
                        } else if ("false".equals(singleSentenceRelevanceVote.relevant)) {
                            //   votes[turkerID] = false;
                            falseVotes.add(turkerID);
                        } else {
                            throw new IllegalStateException("Annotation value of sentence "
                                    + singleSentenceRelevanceVote.sentenceID + " in " + rankedResults.clueWebID
                                    + " equals " + singleSentenceRelevanceVote.relevant);
                        }
                        try {
                            int allVotesCount = trueVotes.size() + falseVotes.size();
                            if (allVotesCount > 5) {
                                System.err.println(id + " doesn't have 5 annotators: true: " + trueVotes.size()
                                        + " false: " + falseVotes.size());

                                // nasty hack, we're gonna strip some data; true votes first
                                /* we can't do that, it breaks something down the line
                                int toRemove = allVotesCount - 5;
                                if (trueVotes.size() >= toRemove) {
                                trueVotes = trueVotes
                                        .subList(0, trueVotes.size() - toRemove);
                                }
                                else if (
                                    falseVotes.size() >= toRemove) {
                                falseVotes = falseVotes
                                        .subList(0, trueVotes.size() - toRemove);
                                }
                                */
                                System.err.println("Adjusted: " + id + " doesn't have 5 annotators: true: "
                                        + trueVotes.size() + " false: " + falseVotes.size());
                            }
                        } catch (IllegalStateException e) {
                            e.printStackTrace();
                        }
                        turkerVotes.trueAnnotations = trueVotes;
                        turkerVotes.falseAnnotations = falseVotes;
                        annotations.put(id, turkerVotes);
                    } else {
                        throw new IllegalStateException(
                                "Empty Sentence ID in " + f.getName() + " for turker " + turkerID);
                    }

            }
        }

    }
    File tmp = printHashMap(annotations, annotatorsIDs.size());

    String file = TEMP_DIR + "/" + tmp.getName();
    MACE.main(new String[] { "--prefix", file });

    //gets the keys of the documents and sentences
    ArrayList<String> lines = (ArrayList<String>) FileUtils.readLines(new File(file + ".prediction"));
    int i = 0;
    TreeMap<String, TreeMap<String, ArrayList<HashMap<String, String>>>> ids = new TreeMap<>();
    ArrayList<HashMap<String, String>> sentences;
    if (lines.size() != annotations.size()) {
        throw new IllegalStateException(
                "The size of prediction file is " + lines.size() + "but expected " + annotations.size());
    }
    for (Map.Entry entry : annotations.entrySet()) { //1001.xml_clueweb12-1905wb-13-07360_8783
        String key = (String) entry.getKey();
        String[] IDs = key.split("_");
        if (IDs.length > 2) {
            String queryID = IDs[0];
            String clueWebID = IDs[1];
            String sentenceID = IDs[2];
            TreeMap<String, ArrayList<HashMap<String, String>>> clueWebIDs = ids.get(queryID);
            if (clueWebIDs == null) {
                clueWebIDs = new TreeMap<>();
            }
            sentences = clueWebIDs.get(clueWebID);
            if (sentences == null) {
                sentences = new ArrayList<>();
            }
            HashMap<String, String> sentence = new HashMap<>();
            sentence.put(sentenceID, lines.get(i));
            sentences.add(sentence);
            clueWebIDs.put(clueWebID, sentences);
            ids.put(queryID, clueWebIDs);
        } else {
            throw new IllegalStateException("Wrong ID " + key);
        }

        i++;
    }

    for (Map.Entry entry : ids.entrySet()) {
        TreeMap<Integer, String> value = (TreeMap<Integer, String>) entry.getValue();
        String queryID = (String) entry.getKey();
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(new File(inputDir, queryID), "utf-8"));
        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            for (Map.Entry val : value.entrySet()) {
                String clueWebID = (String) val.getKey();
                if (clueWebID.equals(rankedResults.clueWebID)) {
                    List<QueryResultContainer.SingleSentenceRelevanceVote> goldEstimatedLabels = new ArrayList<>();
                    List<QueryResultContainer.SingleSentenceRelevanceVote> turkersVotes = new ArrayList<>();
                    int size = 0;
                    int hitSize = 0;
                    String hitID = "";
                    for (QueryResultContainer.MTurkRelevanceVote vote : rankedResults.mTurkRelevanceVotes) {
                        if (!hitID.equals(vote.hitID)) {
                            hitID = vote.hitID;
                            hitSize = vote.singleSentenceRelevanceVotes.size();
                            size = size + hitSize;
                            turkersVotes.addAll(vote.singleSentenceRelevanceVotes);
                        } else {
                            if (vote.singleSentenceRelevanceVotes.size() != hitSize) {
                                hitSize = vote.singleSentenceRelevanceVotes.size();
                                size = size + hitSize;
                                turkersVotes.addAll(vote.singleSentenceRelevanceVotes);
                            }
                        }
                    }
                    ArrayList<HashMap<String, String>> sentenceList = (ArrayList<HashMap<String, String>>) val
                            .getValue();
                    if (sentenceList.size() != turkersVotes.size()) {
                        try {
                            throw new IllegalStateException("Expected size of annotations is "
                                    + turkersVotes.size() + "but found " + sentenceList.size()
                                    + " for document " + rankedResults.clueWebID + " in " + queryID);
                        } catch (IllegalStateException ex) {
                            ex.printStackTrace();
                        }
                    }
                    for (QueryResultContainer.SingleSentenceRelevanceVote s : turkersVotes) {
                        String valSentence = null;
                        for (HashMap<String, String> anno : sentenceList) {
                            if (anno.keySet().contains(s.sentenceID)) {
                                valSentence = anno.get(s.sentenceID);
                            }
                        }
                        QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote();
                        singleSentenceVote.sentenceID = s.sentenceID;
                        if (("false").equals(valSentence)) {
                            singleSentenceVote.relevant = "false";
                        } else if (("true").equals(valSentence)) {
                            singleSentenceVote.relevant = "true";
                        } else {
                            throw new IllegalStateException("Annotation value of sentence "
                                    + singleSentenceVote.sentenceID + " equals " + val.getValue());
                        }
                        goldEstimatedLabels.add(singleSentenceVote);
                    }
                    rankedResults.goldEstimatedLabels = goldEstimatedLabels;
                }
            }
        }
        File outputFile = new File(outputDir, queryID);
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

    ArrayList<String> annotators = (ArrayList<String>) FileUtils.readLines(new File(file + ".competence"));
    FileWriter fileWriter;
    StringBuilder sb = new StringBuilder();
    for (int j = 0; j < annotatorsIDs.size(); j++) {
        String[] s = annotators.get(0).split("\t");
        Float score = Float.parseFloat(s[j]);
        String turkerID = annotatorsIDs.get(j);
        System.out.println(turkerID + " " + score + " " + countVotesForATurker.get(turkerID));
        sb.append(turkerID).append(" ").append(score).append(" ").append(countVotesForATurker.get(turkerID))
                .append("\n");
    }
    fileWriter = new FileWriter(turkersConfidence);
    fileWriter.append(sb.toString());
    fileWriter.close();

}

From source file:com.da.daum.DaumCafeBungImgParser.java

public static void main(String[] args) throws IOException {
    DaumCafeBungImgParser parser = new DaumCafeBungImgParser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    //parser.setDaumView(listBody);

}

From source file:com.da.daum.DaumCafeJw0Parser.java

public static void main(String[] args) throws IOException {
    DaumCafeJw0Parser parser = new DaumCafeJw0Parser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    //parser.setDaumView(listBody);

}

From source file:com.da.daum.DaumCafeBungParser.java

public static void main(String[] args) throws IOException {
    DaumCafeBungParser parser = new DaumCafeBungParser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    //parser.setDaumView(listBody);

}

From source file:com.da.daum.DaumOk211Parser.java

public static void main(String[] args) throws IOException {
    DaumOk211Parser parser = new DaumOk211Parser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    //parser.setDaumView(listBody);

}