Example usage for java.io PrintWriter printf

Introduction

In this page you can find the example usage for java.io PrintWriter printf.

Prototype

public PrintWriter printf(String format, Object... args)

Source Link

Document

A convenience method to write a formatted string to this writer using the specified format string and arguments.

Usage

From source file:de.tudarmstadt.ukp.dariah.IO.DARIAHWriter.java

private void convert(JCas aJCas, PrintWriter aOut) {
    int paragraphId = 0, sentenceId = 0, tokenId = 0;

    Map<Token, Collection<NamedEntity>> neCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            NamedEntity.class);
    Map<Token, Collection<Chunk>> chunksCoveringMap = JCasUtil.indexCovering(aJCas, Token.class, Chunk.class);

    Map<Token, Collection<Section>> sectionCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            Section.class);
    Map<Token, Collection<DirectSpeech>> directSpeechCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            DirectSpeech.class);

    Map<Token, Collection<SemanticPredicate>> predIdx = JCasUtil.indexCovered(aJCas, Token.class,
            SemanticPredicate.class);

    Map<SemanticPredicate, Collection<Token>> pred2TokenIdx = JCasUtil.indexCovering(aJCas,
            SemanticPredicate.class, Token.class);

    Map<SemanticArgument, Collection<Token>> argIdx = JCasUtil.indexCovered(aJCas, SemanticArgument.class,
            Token.class);

    //Coreference
    Map<Token, Collection<CoreferenceLink>> corefLinksCoveringMap = JCasUtil.indexCovering(aJCas, Token.class,
            CoreferenceLink.class);
    HashMap<CoreferenceLink, CoreferenceChain> linkToChainMap = new HashMap<>();
    HashMap<CoreferenceChain, Integer> corefChainToIntMap = new HashMap<>();

    int corefChainId = 0;
    for (CoreferenceChain chain : JCasUtil.select(aJCas, CoreferenceChain.class)) {

        CoreferenceLink link = chain.getFirst();
        int count = 0;
        while (link != null) {
            linkToChainMap.put(link, chain);
            link = link.getNext();// ww  w  .  jav a2  s.  c o m
            count++;
        }
        if (count > 0) {
            corefChainToIntMap.put(chain, corefChainId);
            corefChainId++;
        }
    }

    HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();

    Collection<Paragraph> paragraphs = select(aJCas, Paragraph.class);
    Collection<Sentence> sentences = select(aJCas, Sentence.class);
    TreeSet<Integer> sentenceEnds = new TreeSet<>();

    for (Sentence sentence : sentences) {
        sentenceEnds.add(sentence.getEnd());
    }

    for (Paragraph paragraph : paragraphs) {
        sentenceEnds.add(paragraph.getEnd());
    }

    for (Paragraph para : select(aJCas, Paragraph.class)) {

        for (Sentence sentence : selectCovered(Sentence.class, para)) {

            // Tokens
            List<Token> tokens = selectCovered(Token.class, sentence);

            // Check if we should try to include the morphology in output
            List<Morpheme> morphologies = selectCovered(Morpheme.class, sentence);
            boolean useMorphology = tokens.size() == morphologies.size();

            // Check if we should try to include the morphology in output
            List<Hyphenation> hyphenations = selectCovered(Hyphenation.class, sentence);
            boolean useHyphenation = tokens.size() == hyphenations.size();

            //Parsing information
            String[] parseFragments = null;
            List<ROOT> root = selectCovered(ROOT.class, sentence);
            if (root.size() == 1) {
                PennTreeNode rootNode = PennTreeUtils.convertPennTree(root.get(0));
                if ("ROOT".equals(rootNode.getLabel())) {
                    rootNode.setLabel("TOP");
                }
                parseFragments = toPrettyPennTree(rootNode);
            }
            boolean useParseFragements = (parseFragments != null && parseFragments.length == tokens.size());

            List<SemanticPredicate> preds = selectCovered(SemanticPredicate.class, sentence);

            for (int i = 0; i < tokens.size(); i++) {
                Row row = new Row();

                row.paragraphId = paragraphId;
                row.sentenceId = sentenceId;
                row.tokenId = tokenId;
                row.token = tokens.get(i);
                row.args = new SemanticArgument[preds.size()];

                if (useParseFragements) {
                    row.parseFragment = parseFragments[i];
                }

                if (useMorphology) {
                    row.morphology = morphologies.get(i);
                }

                if (useHyphenation) {
                    row.hyphenation = hyphenations.get(i);
                }

                // Section ID
                Collection<Section> section = sectionCoveringMap.get(row.token);
                if (section.size() > 0)
                    row.sectionId = section.toArray(new Section[0])[0].getValue();

                // Named entities
                Collection<NamedEntity> ne = neCoveringMap.get(row.token);
                if (ne.size() > 0)
                    row.ne = ne.toArray(new NamedEntity[0])[0];

                // Chunk
                Collection<Chunk> chunks = chunksCoveringMap.get(row.token);
                if (chunks.size() > 0)
                    row.chunk = chunks.toArray(new Chunk[0])[0];

                //Quote annotation
                Collection<DirectSpeech> ds = directSpeechCoveringMap.get(row.token);
                if (ds.size() > 0)
                    row.directSpeech = ds.toArray(new DirectSpeech[0])[0];

                //Coref
                Collection<CoreferenceLink> corefLinks = corefLinksCoveringMap.get(row.token);
                row.corefChains = UNUSED;
                if (corefLinks.size() > 0) {

                    String[] chainIds = new String[corefLinks.size()];
                    //                  StringBuilder chainIdsStr = new StringBuilder();

                    int k = 0;
                    for (CoreferenceLink link : corefLinks) {
                        CoreferenceChain chain = linkToChainMap.get(link);
                        int chainId = corefChainToIntMap.get(chain);

                        //chainIds[k++] = chainId;

                        String BIOMarker = "I";
                        if (link.getCoveredText().substring(0, row.token.getCoveredText().length())
                                .equals(row.token.getCoveredText())) {
                            BIOMarker = "B";
                        }
                        chainIds[k++] = BIOMarker + "-" + chainId;
                    }

                    //Sort without the BIO marker
                    Arrays.sort(chainIds, new Comparator<String>() {
                        public int compare(String idx1, String idx2) {
                            Integer id1 = new Integer(idx1.substring(2));
                            Integer id2 = new Integer(idx2.substring(2));

                            return Integer.compare(id1, id2);
                        }
                    });

                    StringBuilder chainIdsStr = new StringBuilder();
                    for (String chainId : chainIds) {
                        chainIdsStr.append(chainId + ",");
                    }

                    row.corefChains = chainIdsStr.substring(0, chainIdsStr.length() - 1);
                }

                //Predicate
                Collection<SemanticPredicate> predsForToken = predIdx.get(row.token);
                if (predsForToken != null && !predsForToken.isEmpty()) {
                    row.pred = predsForToken.iterator().next();
                }

                ctokens.put(row.token, row);
                tokenId++;
            }

            // Dependencies
            for (Dependency rel : selectCovered(Dependency.class, sentence)) {
                ctokens.get(rel.getDependent()).deprel = rel;
            }

            // Semantic arguments
            for (int p = 0; p < preds.size(); p++) {
                FSArray args = preds.get(p).getArguments();

                //Set the column position info
                Collection<Token> tokensOfPredicate = pred2TokenIdx.get(preds.get(p));
                for (Token t : tokensOfPredicate) {
                    Row row = ctokens.get(t);
                    row.semanticArgIndex = p;
                }

                //Set the arguments information
                for (SemanticArgument arg : select(args, SemanticArgument.class)) {
                    for (Token t : argIdx.get(arg)) {
                        Row row = ctokens.get(t);
                        row.args[p] = arg;
                    }
                }
            }

            sentenceId++;
        }
        paragraphId++;
    }

    // Write to output file
    int maxPredArguments = 0;
    for (Row row : ctokens.values()) {
        maxPredArguments = Math.max(maxPredArguments, row.args.length);
    }

    aOut.printf("%s\n", StringUtils.join(getHeader(maxPredArguments), "\t").trim());

    for (Row row : ctokens.values()) {
        String[] output = getData(ctokens, maxPredArguments, row);
        aOut.printf("%s\n", StringUtils.join(output, "\t").trim());
    }

}