Example usage for java.io PrintWriter printf

List of usage examples for java.io PrintWriter printf

Introduction

In this page you can find the example usage for java.io PrintWriter printf.

Prototype

public PrintWriter printf(String format, Object... args) 

Source Link

Document

A convenience method to write a formatted string to this writer using the specified format string and arguments.

Usage

From source file:de.tudarmstadt.ukp.dariah.IO.DARIAHWriter.java

private void convert(JCas aJCas, PrintWriter aOut) {
    int paragraphId = 0, sentenceId = 0, tokenId = 0;

    Map<Token, Collection<NamedEntity>> neCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            NamedEntity.class);
    Map<Token, Collection<Chunk>> chunksCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, Chunk.class);

    Map<Token, Collection<Section>> sectionCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            Section.class);
    Map<Token, Collection<DirectSpeech>> directSpeechCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            DirectSpeech.class);

    Map<Token, Collection<SemanticPredicate>> predIdx = JCasUtil.indexCovered(aJCas, Token.class,
            SemanticPredicate.class);

    Map<SemanticPredicate, Collection<Token>> pred2TokenIdx = JCasUtil.indexCovering(aJCas,
            SemanticPredicate.class, Token.class);

    Map<SemanticArgument, Collection<Token>> argIdx = JCasUtil.indexCovered(aJCas, SemanticArgument.class,
            Token.class);

    //Coreference
    Map<Token, Collection<CoreferenceLink>> corefLinksCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            CoreferenceLink.class);
    HashMap<CoreferenceLink, CoreferenceChain> linkToChainMap = new HashMap<>();
    HashMap<CoreferenceChain, Integer> corefChainToIntMap = new HashMap<>();

    int corefChainId = 0;
    for (CoreferenceChain chain : JCasUtil.select(aJCas, CoreferenceChain.class)) {

        CoreferenceLink link = chain.getFirst();
        int count = 0;
        while (link != null) {
            linkToChainMap.put(link, chain);
            link = link.getNext();// ww  w  .  jav a2  s.  c o m
            count++;
        }
        if (count > 0) {
            corefChainToIntMap.put(chain, corefChainId);
            corefChainId++;
        }
    }

    HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();

    Collection<Paragraph> paragraphs = select(aJCas, Paragraph.class);
    Collection<Sentence> sentences = select(aJCas, Sentence.class);
    TreeSet<Integer> sentenceEnds = new TreeSet<>();

    for (Sentence sentence : sentences) {
        sentenceEnds.add(sentence.getEnd());
    }

    for (Paragraph paragraph : paragraphs) {
        sentenceEnds.add(paragraph.getEnd());
    }

    for (Paragraph para : select(aJCas, Paragraph.class)) {

        for (Sentence sentence : selectCovered(Sentence.class, para)) {

            // Tokens
            List<Token> tokens = selectCovered(Token.class, sentence);

            // Check if we should try to include the morphology in output
            List<Morpheme> morphologies = selectCovered(Morpheme.class, sentence);
            boolean useMorphology = tokens.size() == morphologies.size();

            // Check if we should try to include the morphology in output
            List<Hyphenation> hyphenations = selectCovered(Hyphenation.class, sentence);
            boolean useHyphenation = tokens.size() == hyphenations.size();

            //Parsing information
            String[] parseFragments = null;
            List<ROOT> root = selectCovered(ROOT.class, sentence);
            if (root.size() == 1) {
                PennTreeNode rootNode = PennTreeUtils.convertPennTree(root.get(0));
                if ("ROOT".equals(rootNode.getLabel())) {
                    rootNode.setLabel("TOP");
                }
                parseFragments = toPrettyPennTree(rootNode);
            }
            boolean useParseFragements = (parseFragments != null && parseFragments.length == tokens.size());

            List<SemanticPredicate> preds = selectCovered(SemanticPredicate.class, sentence);

            for (int i = 0; i < tokens.size(); i++) {
                Row row = new Row();

                row.paragraphId = paragraphId;
                row.sentenceId = sentenceId;
                row.tokenId = tokenId;
                row.token = tokens.get(i);
                row.args = new SemanticArgument[preds.size()];

                if (useParseFragements) {
                    row.parseFragment = parseFragments[i];
                }

                if (useMorphology) {
                    row.morphology = morphologies.get(i);
                }

                if (useHyphenation) {
                    row.hyphenation = hyphenations.get(i);
                }

                // Section ID
                Collection<Section> section = sectionCoveringMap.get(row.token);
                if (section.size() > 0)
                    row.sectionId = section.toArray(new Section[0])[0].getValue();

                // Named entities
                Collection<NamedEntity> ne = neCoveringMap.get(row.token);
                if (ne.size() > 0)
                    row.ne = ne.toArray(new NamedEntity[0])[0];

                // Chunk
                Collection<Chunk> chunks = chunksCoveringMap.get(row.token);
                if (chunks.size() > 0)
                    row.chunk = chunks.toArray(new Chunk[0])[0];

                //Quote annotation
                Collection<DirectSpeech> ds = directSpeechCoveringMap.get(row.token);
                if (ds.size() > 0)
                    row.directSpeech = ds.toArray(new DirectSpeech[0])[0];

                //Coref
                Collection<CoreferenceLink> corefLinks = corefLinksCoveringMap.get(row.token);
                row.corefChains = UNUSED;
                if (corefLinks.size() > 0) {

                    String[] chainIds = new String[corefLinks.size()];
                    //                  StringBuilder chainIdsStr = new StringBuilder();

                    int k = 0;
                    for (CoreferenceLink link : corefLinks) {
                        CoreferenceChain chain = linkToChainMap.get(link);
                        int chainId = corefChainToIntMap.get(chain);

                        //chainIds[k++] = chainId;

                        String BIOMarker = "I";
                        if (link.getCoveredText().substring(0, row.token.getCoveredText().length())
                                .equals(row.token.getCoveredText())) {
                            BIOMarker = "B";
                        }
                        chainIds[k++] = BIOMarker + "-" + chainId;
                    }

                    //Sort without the BIO marker
                    Arrays.sort(chainIds, new Comparator<String>() {
                        public int compare(String idx1, String idx2) {
                            Integer id1 = new Integer(idx1.substring(2));
                            Integer id2 = new Integer(idx2.substring(2));

                            return Integer.compare(id1, id2);
                        }
                    });

                    StringBuilder chainIdsStr = new StringBuilder();
                    for (String chainId : chainIds) {
                        chainIdsStr.append(chainId + ",");
                    }

                    row.corefChains = chainIdsStr.substring(0, chainIdsStr.length() - 1);
                }

                //Predicate
                Collection<SemanticPredicate> predsForToken = predIdx.get(row.token);
                if (predsForToken != null && !predsForToken.isEmpty()) {
                    row.pred = predsForToken.iterator().next();
                }

                ctokens.put(row.token, row);
                tokenId++;
            }

            // Dependencies
            for (Dependency rel : selectCovered(Dependency.class, sentence)) {
                ctokens.get(rel.getDependent()).deprel = rel;
            }

            // Semantic arguments
            for (int p = 0; p < preds.size(); p++) {
                FSArray args = preds.get(p).getArguments();

                //Set the column position info
                Collection<Token> tokensOfPredicate = pred2TokenIdx.get(preds.get(p));
                for (Token t : tokensOfPredicate) {
                    Row row = ctokens.get(t);
                    row.semanticArgIndex = p;
                }

                //Set the arguments information
                for (SemanticArgument arg : select(args, SemanticArgument.class)) {
                    for (Token t : argIdx.get(arg)) {
                        Row row = ctokens.get(t);
                        row.args[p] = arg;
                    }
                }
            }

            sentenceId++;
        }
        paragraphId++;
    }

    // Write to output file
    int maxPredArguments = 0;
    for (Row row : ctokens.values()) {
        maxPredArguments = Math.max(maxPredArguments, row.args.length);
    }

    aOut.printf("%s\n", StringUtils.join(getHeader(maxPredArguments), "\t").trim());

    for (Row row : ctokens.values()) {
        String[] output = getData(ctokens, maxPredArguments, row);
        aOut.printf("%s\n", StringUtils.join(output, "\t").trim());
    }

}