Example usage for org.apache.commons.csv CSVFormat TDF

List of usage examples for org.apache.commons.csv CSVFormat TDF

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat TDF.

Prototype

CSVFormat TDF

To view the source code for org.apache.commons.csv CSVFormat TDF.

Click Source Link

Document

Tab-delimited format.

Usage

From source file:org.phenotips.vocabulary.internal.hpoannotations.NegativePhenotypeForDiseaseAnnotationsExtensionTest.java

@Test
public void tdfParserIsUsed() throws Exception {
    final CSVFormat result = this.extension.setupCSVParser(this.vocabulary);
    Assert.assertEquals(CSVFormat.TDF, result);
}

From source file:org.phenotips.vocabulary.internal.solr.MendelianInheritanceInMan.java

private void parseOmimData(URL sourceUrl) {
    try {/*  ww  w. j av  a  2 s. c o m*/
        Reader in = new InputStreamReader(sourceUrl.openConnection().getInputStream(),
                Charset.forName(ENCODING));
        for (CSVRecord row : CSVFormat.TDF.withCommentMarker('#').parse(in)) {
            // Ignore moved or removed entries
            if ("Caret".equals(row.get(0))) {
                continue;
            }

            SolrInputDocument crtTerm = new SolrInputDocument();
            // set id
            addFieldValue(ID_FIELD, row.get(1), crtTerm);

            // set symbol
            addFieldValue(SYMBOL_FIELD, SYMBOLS.get(row.get(0)), crtTerm);
            // set type (multivalued)
            for (String type : TYPES.get(row.get(0))) {
                addFieldValue(TYPE_FIELD, type, crtTerm);
            }
            // set name
            String name = StringUtils.substringBefore(row.get(2), TITLE_SEPARATOR).trim();
            addFieldValue(NAME_FIELD, name, crtTerm);
            // set short name
            String shortNameString = StringUtils.substringAfter(row.get(2), TITLE_SEPARATOR).trim();
            String[] shortNames = StringUtils.split(shortNameString, TITLE_SEPARATOR);
            for (String shortName : shortNames) {
                addFieldValue(SHORT_NAME_FIELD, shortName.trim(), crtTerm);
            }

            // set synonyms
            setListFieldValue(SYNONYM_FIELD, row.get(3), crtTerm);
            // set included name
            setListFieldValue(INCLUDED_NAME_FIELD, row.get(4), crtTerm);

            this.data.put(String.valueOf(crtTerm.get(ID_FIELD).getFirstValue()), crtTerm);
        }
    } catch (IOException ex) {
        this.logger.warn("Failed to read/parse the OMIM source: {}", ex.getMessage());
    }
}

From source file:org.phenotips.vocabulary.internal.solr.MendelianInheritanceInMan.java

private void loadGenes() {
    try (BufferedReader in = new BufferedReader(
            new InputStreamReader(new URL(GENE_ANNOTATIONS_URL).openConnection().getInputStream(), ENCODING))) {
        for (CSVRecord row : CSVFormat.TDF.withCommentMarker('#').parse(in)) {
            SolrInputDocument term = this.data.get(row.get(0).trim());
            if (term != null) {
                String gs = row.get(3).trim();
                if (StringUtils.isNotBlank(gs)) {
                    term.addField(GENE_FIELD, gs);
                }//from   www.  ja va2  s . c  om
                String eidLine = row.get(4).trim();
                if (StringUtils.isNotBlank(eidLine)) {
                    String[] eids = StringUtils.split(eidLine, ",");
                    for (String eid : eids) {
                        term.addField(GENE_FIELD, eid.trim());
                    }
                }
            }
        }
    } catch (IOException ex) {
        this.logger.error("Failed to load OMIM-Gene links: {}", ex.getMessage(), ex);
    }
}

From source file:org.phenotips.vocabulary.internal.solr.MendelianInheritanceInMan.java

private void loadGeneReviews() {
    try (BufferedReader in = new BufferedReader(new InputStreamReader(
            new URL(GENEREVIEWS_MAPPING_URL).openConnection().getInputStream(), ENCODING))) {
        for (CSVRecord row : CSVFormat.TDF.withHeader().parse(in)) {
            SolrInputDocument term = this.data.get(row.get(2));
            if (term != null) {
                term.setField("gene_reviews_link", "https://www.ncbi.nlm.nih.gov/books/" + row.get(0));
            }/*from  ww w  .j ava2  s  . c o  m*/
        }
    } catch (IOException ex) {
        this.logger.error("Failed to load OMIM-GeneReviews links: {}", ex.getMessage(), ex);
    }
}

From source file:org.phenotips.vocabulary.internal.solr.OmimSourceParser.java

private void loadSymptoms(boolean positive) {
    String omimId = "";
    String previousOmimId = null;
    Set<String> ancestors = new HashSet<>();
    try (BufferedReader in = new BufferedReader(
            new InputStreamReader(new URL(positive ? POSITIVE_ANNOTATIONS_URL : NEGATIVE_ANNOTATIONS_URL)
                    .openConnection().getInputStream(), ENCODING))) {
        for (CSVRecord row : CSVFormat.TDF.parse(in)) {
            if ("OMIM".equals(row.get(0))) {
                omimId = row.get(1);/* ww  w.  ja va  2 s . c o  m*/
                addAncestors(previousOmimId, omimId, ancestors, positive);
                previousOmimId = omimId;
                SolrInputDocument term = this.data.get(omimId);
                if (term != null) {
                    term.addField(positive ? "actual_symptom" : "actual_not_symptom", row.get(4));
                }
                VocabularyTerm vterm = this.hpo.getTerm(row.get(4));
                if (vterm != null) {
                    for (VocabularyTerm ancestor : vterm.getAncestorsAndSelf()) {
                        ancestors.add(ancestor.getId());
                    }
                }
            }
        }
        addAncestors(omimId, null, ancestors, positive);
    } catch (IOException ex) {
        this.logger.error("Failed to load OMIM-HPO links: {}", ex.getMessage(), ex);
    }
}

From source file:org.wheatinitiative.vitro.webapp.ontology.update.OntologyChangeParser.java

/**
 * @param args//ww  w.  ja v  a 2 s . c om
 * @throws IOException 
 */

@SuppressWarnings({ "unchecked", "null", "static-access" })
public ArrayList<AtomicOntologyChange> parseFile(String diffPath) throws IOException {
    AtomicOntologyChange changeObj;
    ArrayList<AtomicOntologyChange> changeObjects = new ArrayList<AtomicOntologyChange>();
    int countColumns = 0;
    String URI = null;
    String rename = null;
    String sourceURI = null;
    String destinationURI = null;
    StringTokenizer stArr = null;
    InputStreamReader in = new InputStreamReader(this.getClass().getResourceAsStream(diffPath));

    Iterable<CSVRecord> records = CSVFormat.TDF.parse(in);
    Iterator<CSVRecord> rows = records.iterator();
    //CSVReader readFile = new SimpleReader();
    //readFile.setSeperator('\t');

    //List<String[]> rows = readFile.parse(in);

    int rowNum = 0;
    while (rows.hasNext()) {
        rowNum++;
        CSVRecord row = rows.next();
        if (row.size() != 5) {
            log.error("Invalid PromptDiff data at row " + (rowNum + 1) + ". Expected 5 columns; found "
                    + row.size());
        } else {
            changeObj = new AtomicOntologyChange();
            if (row.get(0) != null && row.get(0).length() > 0) {
                changeObj.setSourceURI(row.get(0));
            }
            if (row.get(1) != null && row.get(1).length() > 0) {
                changeObj.setDestinationURI(row.get(1));
            }
            if (row.get(4) != null && row.get(4).length() > 0) {
                changeObj.setNotes(row.get(4));
            }
            if ("Yes".equals(row.get(2))) {
                changeObj.setAtomicChangeType(AtomicChangeType.RENAME);
            } else if ("Delete".equals(row.get(3))) {
                changeObj.setAtomicChangeType(AtomicChangeType.DELETE);
            } else if ("Add".equals(row.get(3))) {
                changeObj.setAtomicChangeType(AtomicChangeType.ADD);
            } else {
                log.error("Invalid rename or change type data: '" + row.get(2) + " " + row.get(3) + "'");
            }
            log.debug(changeObj);
            changeObjects.add(changeObj);
        }

    }
    if (changeObjects.size() == 0) {
        log.debug("No ABox updates are required.");
    }
    return changeObjects;
}

From source file:org.wso2.carbon.event.simulator.core.internal.generator.csv.util.CSVReader.java

/**
 * parseFile() method is used to parse the CSV file using the delimiter specified in CSV simulation Configuration
 *
 * @param delimiter delimiter to be used when parsing CSV file
 * @throws IOException if an error occurs when creating a CSVReader
 *///from w ww.j a v a2 s.com
private CSVParser parseFile(String delimiter) throws IOException {
    switch (delimiter) {
    case ",":
        return new CSVParser(fileReader, CSVFormat.DEFAULT);
    case ";":
        return new CSVParser(fileReader, CSVFormat.EXCEL);
    case "\\t":
        return new CSVParser(fileReader, CSVFormat.TDF);
    default:
        return new CSVParser(fileReader, CSVFormat.newFormat(delimiter.charAt(0)));
    }
}

From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java

/**
 * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously.
 * Read the values from uploaded CSV file and convert those values into event and send those events to
 * input handler/*from w  w  w  . j  a v  a2  s  .  c  om*/
 * <p>
 * <p>
 * To read the CSV file It uses CSV parser Library.
 * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>}
 * </p>
 * <p>
 * <p>
 * CSV file can be separated by one of these fallowing character , , ; , \t by default
 * It has capability to have user defined delimiter
 * Any field may be quoted (with double quotes)
 * Fields with embedded commas or delimiter characters must be double quoted.
 * </p>
 * <p>
 * Initialize CSVParser
 *
 * @param executionPlanDto ExecutionPlanDto
 * @param csvFileConfig    CSVFileSimulationDto
 */
private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) {

    /*
      return no of events read from CSV file during ever iteration
     */
    long noOfEvents = 0;
    int delay = csvFileConfig.getDelay();
    /*
    Reader for reading character streams from file
     */
    Reader in = null;
    /*
    CSVParser to read CSV Values
     */
    CSVParser csvParser = null;
    if (delay <= 0) {
        log.warn("Events will be sent continuously since the delay between events are set to " + delay
                + "milliseconds");
        delay = 0;
    }

    try {
        /*
        Initialize Reader
         */
        in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"),
                csvFileConfig.getFileDto().getFileInfo().getFileName())));

        /*
        Initialize CSVParser with appropriate CSVFormat according to delimiter
         */

        switch (csvFileConfig.getDelimiter()) {
        case ",":
            csvParser = CSVParser.parse(in, CSVFormat.DEFAULT);
            break;
        case ";":
            csvParser = CSVParser.parse(in, CSVFormat.EXCEL);
            break;
        case "\\t":
            csvParser = CSVParser.parse(in, CSVFormat.TDF);
            break;
        default:
            csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0)));
        }

        int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName())
                .getStreamAttributeDtos().size();

        /*
        Iterate through the CSV file line by line
         */

        for (CSVRecord record : csvParser) {
            try {
                synchronized (this) {
                    if (isStopped) {
                        isStopped = false;
                        break;
                    }
                    if (isPaused) {
                        this.wait();
                    }
                }

                if (record.size() != attributeSize) {
                    log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed"
                            + "in Row no:" + noOfEvents + 1);
                }
                String[] attributes = new String[attributeSize];
                noOfEvents = csvParser.getCurrentLineNumber();

                for (int i = 0; i < record.size(); i++) {
                    attributes[i] = record.get(i);
                }

                //convert Attribute values into event
                Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes,
                        executionPlanDto);
                // TODO: 13/12/16 delete sout
                System.out.println("Input Event " + Arrays.deepToString(event.getEventData()));
                //

                //send the event to input handler
                send(csvFileConfig.getStreamName(), event);

                //delay between two events
                if (delay > 0) {
                    Thread.sleep(delay);
                }
            } catch (EventSimulationException e) {
                log.error("Event dropped due to Error occurred during generating an event" + e.getMessage());
            } catch (InterruptedException e) {
                log.error("Error occurred during send event" + e.getMessage());
            }
        }

    } catch (IllegalArgumentException e) {
        // TODO: 02/12/16 proper error message
        throw new EventSimulationException("File Parameters are null" + e.getMessage());
    } catch (FileNotFoundException e) {
        throw new EventSimulationException(
                "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName());
    } catch (IOException e) {
        throw new EventSimulationException("Error occurred while reading the file");
    } finally {
        try {
            if (in != null && csvParser != null)
                in.close();
            csvParser.close();
        } catch (IOException e) {
            throw new EventSimulationException("Error occurred during closing the file");
        }
    }
}

From source file:trainer.userinput.TrainingFileDB.java

public static CSVFormat getCSVFormat() {
    // Create the CSVFormat object with "\n" as a record delimiter
    CSVFormat csvFileFormat = CSVFormat.TDF.withRecordSeparator(NEW_LINE_SEPARATOR);
    csvFileFormat = csvFileFormat.withEscape('^');
    csvFileFormat = csvFileFormat.withQuoteMode(QuoteMode.NONE);
    return csvFileFormat;
}

From source file:us.parr.animl.data.DataTable.java

public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride,
        String[] colNamesOverride, boolean hasHeaderRow) {
    try {//w w w .j a  va  2 s.  c  o  m
        // use apache commons io + csv to load but convert to list of String[]
        // byte-order markers are handled if present at start of file.
        FileInputStream fis = new FileInputStream(fileName);
        final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
        CSVFormat format;
        if (formatType == null) {
            format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
        } else {
            switch (formatType.toLowerCase()) {
            case "tsv":
                format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF;
                break;
            case "mysql":
                format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL;
                break;
            case "excel":
                format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL;
                break;
            case "rfc4180":
            default:
                format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
                break;
            }
        }
        final CSVParser parser = new CSVParser(reader, format);
        List<String[]> rows = new ArrayList<>();
        int numHeaderNames = parser.getHeaderMap().size();
        try {
            for (final CSVRecord record : parser) {
                String[] row = new String[record.size()];
                for (int j = 0; j < record.size(); j++) {
                    row[j] = record.get(j);
                }
                rows.add(row);
            }
        } finally {
            parser.close();
            reader.close();
        }

        VariableType[] actualTypes = computeColTypes(rows, numHeaderNames);

        Set<String> colNameSet = parser.getHeaderMap().keySet();
        String[] colNames = colNameSet.toArray(new String[colNameSet.size()]);
        if (colNamesOverride != null) {
            colNames = colNamesOverride;
        }
        if (colTypesOverride != null) {
            actualTypes = colTypesOverride;
        }
        return fromStrings(rows, actualTypes, colNames, false);
    } catch (Exception e) {
        throw new IllegalArgumentException("Can't open and/or read " + fileName, e);
    }
}