Example usage for org.apache.commons.csv CSVParser parse

List of usage examples for org.apache.commons.csv CSVParser parse

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser parse.

Prototype

public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException 

Source Link

Document

Creates a parser for the given URL.

Usage

From source file:com.ibm.g11n.pipeline.example.MultiBundleCSVFilter.java

@Override
public void merge(InputStream baseStream, OutputStream outStream, Map<String, LanguageBundle> languageBundles,
        FilterOptions options) throws IOException, ResourceFilterException {
    // create key-value map for each bundle
    Map<String, Map<String, String>> kvMaps = new HashMap<String, Map<String, String>>();
    for (Entry<String, LanguageBundle> bundleEntry : languageBundles.entrySet()) {
        LanguageBundle languageBundle = bundleEntry.getValue();
        Map<String, String> kvMap = new HashMap<String, String>();
        for (ResourceString resString : languageBundle.getResourceStrings()) {
            kvMap.put(resString.getKey(), resString.getValue());
        }/*w w w  . j  a va  2  s  . c o  m*/
        kvMaps.put(bundleEntry.getKey(), kvMap);
    }

    CSVParser parser = CSVParser.parse(baseStream, StandardCharsets.UTF_8,
            CSVFormat.RFC4180.withHeader("module", "key", "value").withSkipHeaderRecord(true));
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream, StandardCharsets.UTF_8));
    CSVPrinter printer = CSVFormat.RFC4180.withHeader("module", "key", "value").print(writer);
    for (CSVRecord record : parser) {
        String module = record.get(0);
        String key = record.get(1);
        String value = record.get(2);
        Map<String, String> moduleKVMap = kvMaps.get(module);
        if (moduleKVMap != null) {
            String trValue = moduleKVMap.get(key);
            if (trValue != null) {
                value = trValue;
            }
        }
        printer.printRecord(module, key, value);
    }
    printer.flush();
}

From source file:MasterRoomControllerFx.rooms.charts.RoomChartController.java

public void getHumData() {
    try {/*from w w  w. j av a 2 s  .  c o  m*/
        File csvData = new File("humHistory" + roomRow + roomColumn + ".csv");
        if (csvData.exists()) {
            CSVParser parser = CSVParser.parse(csvData, StandardCharsets.UTF_8,
                    CSVFormat.EXCEL.withDelimiter(';'));
            for (CSVRecord csvRecord : parser) {
                for (int i = 0; i < csvRecord.size() - 1; i++) {
                    hum.add(Float.parseFloat(csvRecord.get(i)));
                }
            }
        }
    } catch (IOException ex) {
        Logger.getLogger(RoomChartController.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:canreg.client.dataentry.Import.java

/**
 *
 * @param task/*from   w ww  .  j  av a 2 s.c  o m*/
 * @param doc
 * @param map
 * @param file
 * @param server
 * @param io
 * @return
 * @throws java.sql.SQLException
 * @throws java.rmi.RemoteException
 * @throws canreg.server.database.RecordLockedException
 */
public static boolean importFile(Task<Object, String> task, Document doc,
        List<canreg.client.dataentry.Relation> map, File file, CanRegServerInterface server, ImportOptions io)
        throws SQLException, RemoteException, SecurityException, RecordLockedException {
    //public static boolean importFile(canreg.client.gui.management.CanReg4MigrationInternalFrame.MigrationTask task, Document doc, List<canreg.client.dataentry.Relation> map, File file, CanRegServerInterface server, ImportOptions io) throws SQLException, RemoteException, SecurityException, RecordLockedException {
    boolean success = false;

    Set<String> noNeedToLookAtPatientVariables = new TreeSet<String>();

    noNeedToLookAtPatientVariables.add(io.getPatientIDVariableName());
    noNeedToLookAtPatientVariables.add(io.getPatientRecordIDVariableName());

    String firstNameVariableName = io.getFirstNameVariableName();
    String sexVariableName = io.getSexVariableName();

    CSVParser parser = null;
    CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparator());

    int linesToRead = io.getMaxLines();

    HashMap mpCodes = new HashMap();

    int numberOfLinesRead = 0;

    Map<String, Integer> nameSexTable = server.getNameSexTables();

    try {
        //            FileInputStream fis = new FileInputStream(file);
        //           BufferedReader bsr = new BufferedReader(new InputStreamReader(fis, io.getFileCharset()));

        // Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}");
        int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(file.getAbsolutePath());

        if (linesToRead > 0) {
            linesToRead = Math.min(numberOfRecordsInFile, linesToRead);
        } else {
            linesToRead = numberOfRecordsInFile;
        }

        parser = CSVParser.parse(file, io.getFileCharset(), format);

        for (CSVRecord csvRecord : parser) {
            numberOfLinesRead++;
            // We allow for null tasks...
            boolean needToSavePatientAgain = true;
            int patientDatabaseRecordID = -1;

            if (task != null) {
                task.firePropertyChange("progress", (numberOfLinesRead - 1) * 100 / linesToRead,
                        (numberOfLinesRead) * 100 / linesToRead);
            }

            // Build patient part
            Patient patient = new Patient();
            for (int i = 0; i < map.size(); i++) {
                Relation rel = map.get(i);
                if (rel.getDatabaseTableVariableID() >= 0
                        && rel.getDatabaseTableName().equalsIgnoreCase("patient")) {
                    if (rel.getFileColumnNumber() < csvRecord.size()) {
                        if (rel.getVariableType().equalsIgnoreCase("Number")) {
                            if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) {
                                try {
                                    patient.setVariable(rel.getDatabaseVariableName(),
                                            Integer.parseInt(csvRecord.get(rel.getFileColumnNumber())));
                                } catch (NumberFormatException ex) {
                                    Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                                            "Number format error in line: " + (numberOfLinesRead + 1 + 1)
                                                    + ". ",
                                            ex);
                                    success = false;
                                }
                            }
                        } else {
                            patient.setVariable(rel.getDatabaseVariableName(),
                                    StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber())));
                        }
                    } else {
                        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                                "Something wrong with patient part of line " + numberOfLinesRead + ".",
                                new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: "
                                        + rel.getDatabaseVariableName()));
                    }
                }
            }
            // debugOut(patient.toString());

            // Build tumour part
            Tumour tumour = new Tumour();
            for (canreg.client.dataentry.Relation rel : map) {
                if (rel.getDatabaseTableVariableID() >= 0
                        && rel.getDatabaseTableName().equalsIgnoreCase("tumour")) {
                    if (rel.getFileColumnNumber() < csvRecord.size()) {
                        if (rel.getVariableType().equalsIgnoreCase("Number")) {
                            if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) {
                                try {
                                    tumour.setVariable(rel.getDatabaseVariableName(),
                                            Integer.parseInt(csvRecord.get(rel.getFileColumnNumber())));
                                } catch (NumberFormatException ex) {
                                    Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                                            "Number format error in line: " + (numberOfLinesRead + 1 + 1)
                                                    + ". ",
                                            ex);
                                    success = false;
                                }
                            }
                        } else {
                            tumour.setVariable(rel.getDatabaseVariableName(),
                                    StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber())));
                        }
                    } else {
                        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                                "Something wrong with tumour part of line " + numberOfLinesRead + ".",
                                new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: "
                                        + rel.getDatabaseVariableName()));
                    }
                }
            }

            // Build source part
            Set<Source> sources = Collections.synchronizedSet(new LinkedHashSet<Source>());
            Source source = new Source();
            for (canreg.client.dataentry.Relation rel : map) {
                if (rel.getDatabaseTableVariableID() >= 0
                        && rel.getDatabaseTableName().equalsIgnoreCase(Globals.SOURCE_TABLE_NAME)) {
                    if (rel.getFileColumnNumber() < csvRecord.size()) {
                        if (rel.getVariableType().equalsIgnoreCase("Number")) {
                            if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) {
                                try {
                                    source.setVariable(rel.getDatabaseVariableName(),
                                            Integer.parseInt(csvRecord.get(rel.getFileColumnNumber())));
                                } catch (NumberFormatException ex) {
                                    Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                                            "Number format error in line: " + (numberOfLinesRead + 1 + 1)
                                                    + ". ",
                                            ex);
                                    success = false;
                                }
                            }
                        } else {
                            source.setVariable(rel.getDatabaseVariableName(),
                                    StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber())));
                        }
                    } else {
                        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                                "Something wrong with source part of line " + numberOfLinesRead + ".",
                                new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: "
                                        + rel.getDatabaseVariableName()));
                    }

                }
            }
            sources.add(source);
            tumour.setSources(sources);

            // debugOut(tumour.toString());
            // add patient to the database
            Object patientID = patient.getVariable(io.getPatientIDVariableName());
            Object patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName());

            if (patientID == null) {
                // save the record to get the new patientID;
                patientDatabaseRecordID = server.savePatient(patient);
                patient = (Patient) server.getRecord(patientDatabaseRecordID, Globals.PATIENT_TABLE_NAME,
                        false);
                patientID = patient.getVariable(io.getPatientIDVariableName());
                patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName());
            }

            if (io.isDataFromPreviousCanReg()) {
                // set update date for the patient the same as for the tumour
                Object updateDate = tumour.getVariable(io.getTumourUpdateDateVariableName());
                patient.setVariable(io.getPatientUpdateDateVariableName(), updateDate);

                // Set the patientID the same as the tumourID initially
                // Object tumourSequence = tumour.getVariable(io.getTumourSequenceVariableName());
                Object tumourSequence = "1";

                String tumourSequenceString = tumourSequence + "";
                while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) {
                    tumourSequenceString = "0" + tumourSequenceString;
                }
                patientRecordID = patientID + "" + tumourSequenceString;

                // If this is a multiple primary tumour...
                String mpCodeString = (String) tumour.getVariable(io.getMultiplePrimaryVariableName());
                if (mpCodeString != null && mpCodeString.length() > 0) {
                    patientID = lookUpPatientID(mpCodeString, patientID, mpCodes);

                    // rebuild sequenceNumber
                    Tumour[] tumours = new Tumour[0];
                    try {
                        tumours = CanRegClientApp.getApplication()
                                .getTumourRecordsBasedOnPatientID(patientID + "", false);
                    } catch (DistributedTableDescriptionException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (UnknownTableException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    }

                    tumourSequenceString = (tumours.length + 1) + "";
                    while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) {
                        tumourSequenceString = "0" + tumourSequenceString;
                    }

                    patientRecordID = patientID + "" + tumourSequenceString;
                    Patient[] oldPatients = null;
                    try {
                        oldPatients = CanRegClientApp.getApplication().getPatientRecordsByID((String) patientID,
                                false);
                    } catch (RemoteException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (SecurityException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (DistributedTableDescriptionException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (RecordLockedException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (SQLException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (UnknownTableException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    for (Patient oldPatient : oldPatients) {
                        if (!Tools.newRecordContainsNewInfo(patient, oldPatient,
                                noNeedToLookAtPatientVariables)) {
                            needToSavePatientAgain = false;
                            patient = oldPatient;
                            patientRecordID = oldPatient.getVariable(io.getPatientRecordIDVariableName());
                        }
                    }
                }

                Object tumourID = patientRecordID + "" + tumourSequenceString;
                //
                patient.setVariable(io.getPatientIDVariableName(), patientID);
                tumour.setVariable(io.getTumourIDVariablename(), tumourID);
                // And store the record ID

                patient.setVariable(io.getPatientRecordIDVariableName(), patientRecordID);

                // Set the patient ID number on the tumour
                tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID);
                tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID);

                // Set the deprecated flag to 0 - no obsolete records from CR4
                tumour.setVariable(io.getObsoleteTumourFlagVariableName(), "0");
                patient.setVariable(io.getObsoletePatientFlagVariableName(), "0");

            }

            // Set the name in the firstName database
            String sex = (String) patient.getVariable(sexVariableName);
            if (sex != null && sex.length() > 0) {
                Integer sexCode = Integer.parseInt(sex);
                String firstNames = (String) patient.getVariable(firstNameVariableName);
                if (firstNames != null) {
                    String[] firstNamesArray = firstNames.split(" ");
                    for (String firstName : firstNamesArray) {
                        if (firstName != null && firstName.trim().length() > 0) {
                            // here we use the locale specific toUpperCase
                            Integer registeredSexCode = nameSexTable.get(firstName);
                            if (registeredSexCode == null) {
                                NameSexRecord nsr = new NameSexRecord();
                                nsr.setName(firstName);
                                nsr.setSex(sexCode);

                                server.saveNameSexRecord(nsr, false);

                                nameSexTable.put(firstName, sexCode);
                            } else if (registeredSexCode != sexCode) {
                                if (registeredSexCode != 9) {
                                    sexCode = 9;
                                    NameSexRecord nsr = new NameSexRecord();
                                    nsr.setName(firstName);
                                    nsr.setSex(sexCode);
                                    server.saveNameSexRecord(nsr, true);
                                    nameSexTable.remove(firstName);
                                    nameSexTable.put(firstName, sexCode);
                                }
                            }
                        }
                    }
                }
            }

            if (needToSavePatientAgain) {
                if (patientDatabaseRecordID > 0) {
                    server.editPatient(patient);
                } else {
                    patientDatabaseRecordID = server.savePatient(patient);
                }
            }
            if (patient != null && tumour != null) {
                String icd10 = (String) tumour.getVariable(io.getICD10VariableName());
                if (icd10 == null || icd10.trim().length() == 0) {
                    ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication()
                            .performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour);
                    tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue());
                }
                String iccc = (String) tumour.getVariable(io.getICCCVariableName());
                if (iccc == null || iccc.trim().length() == 0) {
                    ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication()
                            .performConversions(Converter.ConversionName.ICDO3toICCC3, patient, tumour);
                    tumour.setVariable(io.getICCCVariableName(), conversionResult[0].getValue());
                }
            }
            if (tumour.getVariable(io.getPatientIDTumourTableVariableName()) == null) {
                tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID);
            }

            if (tumour.getVariable(io.getPatientRecordIDTumourTableVariableName()) == null) {
                tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID);
            }

            int tumourDatabaseIDNumber = server.saveTumour(tumour);

            if (Thread.interrupted()) {
                //We've been interrupted: no more importing.
                throw new InterruptedException();
            }
        }
        task.firePropertyChange("finished", null, null);
        success = true;
    } catch (IOException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } catch (NumberFormatException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } catch (InterruptedException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                "Interupted on line: " + (numberOfLinesRead + 1) + ". ", ex);
        success = true;
    } catch (IndexOutOfBoundsException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } catch (SQLException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } finally {
        if (parser != null) {
            try {
                parser.close();
            } catch (IOException ex) {
                Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    return success;
}

From source file:io.mindmaps.migration.csv.CSVDataMigratorTest.java

private CSVParser parser(String fileName) {
    File file = new File(CSVSchemaMigratorTest.class.getClassLoader().getResource(fileName).getPath());

    CSVParser csvParser = null;/*from   w w  w . jav a 2s. c o  m*/
    try {
        csvParser = CSVParser.parse(file.toURI().toURL(), StandardCharsets.UTF_8,
                CSVFormat.DEFAULT.withHeader());
    } catch (IOException e) {
        e.printStackTrace();
    }

    return csvParser;
}

From source file:edu.isi.misd.scanner.network.modules.worker.processors.ptr.PrepToResearchProcessor.java

private PrepToResearchResponse analyzeFile(PrepToResearchRequest request, File analysisFile) throws Exception {
    PrepToResearchResponse response = new PrepToResearchResponse();
    Integer requestedOmopConceptID = request.getOmopConceptID();
    CSVFormat csvFormat = CSVFormat.newFormat(',').withHeader().withCommentMarker('#').withQuote('"');
    CSVParser parser = CSVParser.parse(analysisFile, Charset.defaultCharset(), csvFormat);
    for (CSVRecord csvRecord : parser) {
        try {/*from w  ww . j a v  a 2 s  . c om*/
            this.validateCSVRecord(csvRecord);

            // check the ID first, if no match continue
            Integer omopConceptID = Integer
                    .parseInt(csvRecord.get(ExpectedColumnName.OMOP_CONCEPT_ID.toString()));
            if (!requestedOmopConceptID.equals(omopConceptID)) {
                continue;
            }

            // match found, create response output record
            if (log.isDebugEnabled()) {
                log.debug(String.format("Found a match for requested ID %s, record: %s", requestedOmopConceptID,
                        csvRecord.toString()));
            }
            PrepToResearchRecord ptrRecord = new PrepToResearchRecord();
            ptrRecord.setOmopConceptID(omopConceptID);
            ptrRecord.setOmopConceptName(csvRecord.get(ExpectedColumnName.OMOP_CONCEPT_NAME));
            ptrRecord.setCategory(csvRecord.get(ExpectedColumnName.CATEGORY));
            ptrRecord.setCategoryValue(csvRecord.get(ExpectedColumnName.CATEGORY_VALUE));
            ptrRecord.setCountFemales(Integer.parseInt(csvRecord.get(ExpectedColumnName.COUNT_FEMALES)));
            ptrRecord.setCountMales(Integer.parseInt(csvRecord.get(ExpectedColumnName.COUNT_MALES)));
            ptrRecord.setCountTotal(Integer.parseInt(csvRecord.get(ExpectedColumnName.COUNT_TOTAL)));

            response.getPrepToResearchRecord().add(ptrRecord);
        } catch (Exception e) {
            String error = String.format(
                    "An exception occured while processing row number %s with the following values %s: %s",
                    csvRecord.getRecordNumber(), csvRecord.toString(), e.toString());
            parser.close();
            throw new RuntimeException(error);
        }
    }
    parser.close();
    return response;
}

From source file:com.me.jvmi.Main.java

public static Collection<InputRecord> parseInput(Path csv) throws IOException {
    Map<String, InputRecord> records = new HashMap<>();
    CSVParser parser = CSVParser.parse(csv.toFile(), Charset.forName("UTF-8"), CSVFormat.DEFAULT.withHeader());

    for (CSVRecord record : parser) {
        InputRecord input = new InputRecord(record);
        records.put(input.getId(), input);
    }/*from  ww w  . j a  va 2  s .c o m*/

    for (InputRecord record : records.values()) {
        if (record.isPackage()) {
            for (String id : record.packageIds) {
                if (!records.containsKey(id)) {
                    throw new IllegalStateException("Could not find product for package id: " + id);
                }
                record.addItem(records.get(id));
            }
        }
    }

    return records.values();
}

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.Test.java

private static ArrayList<HashSet<String>> extractEntities(File csvData, int nOfAttributes) throws IOException {
    CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.RFC4180);
    int r = 0;//ww w  .j a  v  a  2s.  co m
    ArrayList<Integer> attributePositions = new ArrayList<>();
    ArrayList<String> attributeNames = new ArrayList<>();
    ArrayList<HashSet<String>> res = new ArrayList<>();
    for (CSVRecord csvRecord : parser) {
        if (r == 0) {
            Iterator<String> it = csvRecord.iterator();
            it.next(); //skip URI
            if (!it.hasNext()) { //it is an empty file
                return res;
            }
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            for (; it.hasNext();) {
                c++;
                String attr = it.next();
                if (!attr.endsWith("_label")) {
                    attributePositions.add(c);
                }
            }
        } else if (r == 1) {
            Iterator<String> it = csvRecord.iterator();
            it.next(); //skip uri
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            int i = 0;
            while (i < attributePositions.size()) {
                c++;
                String attr = it.next();
                if (attributePositions.get(i) == c) {
                    if (!stopAttributes.contains(attr)) {
                        attributes.add(attr);
                    }
                    attributeNames.add(attr);
                    i++;
                }
            }
        } else if (r > 3) {
            ArrayList<String> attributesOfThisEntity = new ArrayList<>();
            Iterator<String> it = csvRecord.iterator();
            String uri = it.next();
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            int i = 0;
            while (i < attributePositions.size()) {
                c++;
                String val = it.next();
                if (attributePositions.get(i) == c) {
                    if (!val.equalsIgnoreCase("null")) {
                        String attribute = attributeNames.get(i);
                        if (!stopAttributes.contains(attribute)) {
                            attributesOfThisEntity.add(attribute);
                        }
                    }
                    i++;
                }
            }
            Collections.shuffle(attributesOfThisEntity);
            HashSet<String> s = new HashSet<>();
            for (int k = 0; k < Math.min(nOfAttributes, attributesOfThisEntity.size()); k++) {
                s.add(attributesOfThisEntity.get(k));
            }
            res.add(s);
        }
        r++;
    }
    return res;
}

From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java

@Override
// @OnEnabled// w w w . jav  a  2  s  . co m
public void init(ControllerServiceInitializationContext context) throws InitializationException {
    super.init(context);
    try {

        if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) {
            dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString();
        }

        if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) {
            dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString();
        }

        if ((dbUri == null) && (dbPath == null)) {
            throw new Exception(
                    "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        InputStream is = null;
        if (dbUri != null) {
            logger.info("opening csv database from hdfs : " + dbUri);
            is = initFromUri(dbUri);
        }

        if (dbPath != null) {
            logger.info("opening csv database from local fs : " + dbPath);
            is = initFromPath(context, dbPath);
        }

        if (is == null) {
            throw new InitializationException("Something went wrong while initializing csv db from "
                    + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        // final Reader reader = new InputStreamReader(is);
        CSVFormat format = CSVFormat.DEFAULT;
        if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) {
            format = CSVFormat.EXCEL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) {
            format = CSVFormat.EXCEL.withDelimiter(';');
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) {
            format = CSVFormat.MYSQL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) {
            format = CSVFormat.RFC4180;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) {
            format = CSVFormat.TDF;
        }

        if (context.getPropertyValue(CSV_HEADER).isSet()) {
            String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(",");
            for (String name : columnNames) {
                headers.get().put(name, "string");
            }
            format = format.withHeader(columnNames);
        } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) {
            format = format.withFirstRecordAsHeader();
        } else {
            throw new InitializationException("unable to get headers from somewhere");
        }

        Charset charset = Charset.forName("UTF-8");
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
            String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
            charset = Charset.forName(encoding);
        }

        rowKey = context.getPropertyValue(ROW_KEY).asString();
        CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format);

        /*
        *    CSVParser parser = null;
                
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
        String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
        parser = CSVParser.parse(reader, Charset.forName(encoding), format);
        } else {
        parser = CSVParser.parse(reader, format);
        }
        */
        long count = 0;
        try {
            final Set<String> columnNames = parser.getHeaderMap().keySet();
            for (final CSVRecord record : parser) {

                Record logislandRecord = new StandardRecord();
                for (final String column : columnNames) {
                    logislandRecord.setStringField(column, record.get(column));
                }

                set(logislandRecord.getField(rowKey).asString(), logislandRecord);
                count++;
            }
        } finally {
            logger.info("successfully loaded " + count + " records from CSV file");

            parser.close();
            is.close();
        }

    } catch (Exception e) {
        getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() });
        throw new InitializationException(e);
    }
}

From source file:canreg.client.dataentry.Convert.java

public static boolean importFile(canreg.client.gui.management.CanReg4MigrationInternalFrame.MigrationTask task,
        Document doc, List<canreg.client.dataentry.Relation> map, File file, CanRegServerInterface server,
        ImportOptions io) throws SQLException, RemoteException, SecurityException, RecordLockedException {
    boolean success = false;

    Set<String> noNeedToLookAtPatientVariables = new TreeSet<String>();

    noNeedToLookAtPatientVariables/*from   ww w.  j  a v  a  2s .  co m*/
            .add(canreg.common.Tools.toLowerCaseStandardized(io.getPatientIDVariableName()));
    noNeedToLookAtPatientVariables
            .add(canreg.common.Tools.toLowerCaseStandardized(io.getPatientRecordIDVariableName()));

    String firstNameVariableName = io.getFirstNameVariableName();
    String sexVariableName = io.getSexVariableName();

    CSVParser parser = null;

    HashMap mpCodes = new HashMap();

    int numberOfLinesRead = 0;

    Map<String, Integer> nameSexTable = server.getNameSexTables();

    try {
        // Logger.getLogger(Import.class.getName()).log(Level.CONFIG, "Name of the character encoding {0}");

        int numberOfRecordsInFile = canreg.common.Tools.numberOfLinesInFile(file.getAbsolutePath());

        debugOut("Importing data from " + file);

        CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(io.getSeparator());

        parser = CSVParser.parse(file, io.getFileCharset(), format);

        int linesToRead = io.getMaxLines();
        if (linesToRead == -1 || linesToRead > numberOfRecordsInFile) {
            linesToRead = numberOfRecordsInFile;
        }

        for (CSVRecord csvRecord : parser) {
            numberOfLinesRead++;
            // We allow for null tasks...
            boolean needToSavePatientAgain = true;
            int patientDatabaseRecordID = -1;

            if (task != null) {
                if (canreg.client.gui.management.CanReg4MigrationInternalFrame.isPaused) {
                    task.firePropertyChange("paused", false, true);
                }
                if (!canreg.client.gui.management.CanReg4MigrationInternalFrame.isPaused) {
                    task.firePropertyChange("paused", true, false);
                    task.firePropertyChange("progress", (numberOfLinesRead - 1) * 100 / linesToRead,
                            (numberOfLinesRead) * 100 / linesToRead);
                }
            }

            // Build patient part
            Patient patient = new Patient();
            for (int i = 0; i < map.size(); i++) {
                Relation rel = map.get(i);
                if (rel.getDatabaseTableVariableID() >= 0
                        && rel.getDatabaseTableName().equalsIgnoreCase("patient")) {
                    if (rel.getFileColumnNumber() < csvRecord.size()) {
                        if (rel.getVariableType().equalsIgnoreCase("Number")) {
                            if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) {
                                try {
                                    patient.setVariable(rel.getDatabaseVariableName(),
                                            Integer.parseInt(csvRecord.get(rel.getFileColumnNumber())));
                                } catch (NumberFormatException ex) {
                                    Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                                            "Number format error in line: " + (numberOfLinesRead + 1 + 1)
                                                    + ". ",
                                            ex);
                                    success = false;
                                }
                            }
                        } else {
                            patient.setVariable(rel.getDatabaseVariableName(),
                                    StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber())));
                        }
                    } else {
                        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                                "Something wrong with patient part of line " + numberOfLinesRead + ".",
                                new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: "
                                        + rel.getDatabaseVariableName()));
                    }
                }
            }
            // debugOut(patient.toString());

            // Build tumour part
            Tumour tumour = new Tumour();
            for (int i = 0; i < map.size(); i++) {
                Relation rel = map.get(i);
                if (rel.getDatabaseTableVariableID() >= 0
                        && rel.getDatabaseTableName().equalsIgnoreCase("tumour")
                        && rel.getFileColumnNumber() < csvRecord.size()) {
                    if (rel.getFileColumnNumber() < csvRecord.size()) {
                        if (rel.getVariableType().equalsIgnoreCase("Number")) {
                            if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) {
                                try {
                                    tumour.setVariable(rel.getDatabaseVariableName(),
                                            Integer.parseInt(csvRecord.get(rel.getFileColumnNumber())));
                                } catch (NumberFormatException ex) {
                                    Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                                            "Number format error in line: " + (numberOfLinesRead + 1 + 1)
                                                    + ". ",
                                            ex);
                                    success = false;
                                }
                            }
                        } else {
                            tumour.setVariable(rel.getDatabaseVariableName(),
                                    StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber())));
                        }
                    } else {
                        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                                "Something wrong with tumour part of line " + numberOfLinesRead + ".",
                                new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: "
                                        + rel.getDatabaseVariableName()));
                    }
                }
            }

            // Build source part
            Set<Source> sources = Collections.synchronizedSet(new LinkedHashSet<Source>());
            Source source = new Source();
            for (int i = 0; i < map.size(); i++) {
                Relation rel = map.get(i);
                if (rel.getDatabaseTableVariableID() >= 0
                        && rel.getDatabaseTableName().equalsIgnoreCase(Globals.SOURCE_TABLE_NAME)
                        && rel.getFileColumnNumber() < csvRecord.size()) {
                    if (rel.getFileColumnNumber() < csvRecord.size()) {
                        if (rel.getVariableType().equalsIgnoreCase("Number")) {
                            if (csvRecord.get(rel.getFileColumnNumber()).length() > 0) {
                                try {
                                    source.setVariable(rel.getDatabaseVariableName(),
                                            Integer.parseInt(csvRecord.get(rel.getFileColumnNumber())));
                                } catch (NumberFormatException ex) {
                                    Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                                            "Number format error in line: " + (numberOfLinesRead + 1 + 1)
                                                    + ". ",
                                            ex);
                                    success = false;
                                }
                            }
                        } else {
                            source.setVariable(rel.getDatabaseVariableName(),
                                    StringEscapeUtils.unescapeCsv(csvRecord.get(rel.getFileColumnNumber())));
                        }
                    } else {
                        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                                "Something wrong with source part of line " + numberOfLinesRead + ".",
                                new Exception("Error in line: " + numberOfLinesRead + ". Can't find field: "
                                        + rel.getDatabaseVariableName()));
                    }

                }

            }
            sources.add(source);
            tumour.setSources(sources);

            // debugOut(tumour.toString());
            // add patient to the database
            Object patientID = patient.getVariable(io.getPatientIDVariableName());
            Object patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName());

            if (patientID == null) {
                // save the record to get the new patientID;
                patientDatabaseRecordID = server.savePatient(patient);
                patient = (Patient) server.getRecord(patientDatabaseRecordID, Globals.PATIENT_TABLE_NAME,
                        false);
                patientID = patient.getVariable(io.getPatientIDVariableName());
                patientRecordID = patient.getVariable(io.getPatientRecordIDVariableName());
            }

            if (io.isDataFromPreviousCanReg()) {
                // set update date for the patient the same as for the tumour
                Object updateDate = tumour.getVariable(io.getTumourUpdateDateVariableName());
                patient.setVariable(io.getPatientUpdateDateVariableName(), updateDate);

                // Set the patientID the same as the tumourID initially

                // Object tumourSequence = tumour.getVariable(io.getTumourSequenceVariableName());
                Object tumourSequence = "1";

                String tumourSequenceString = tumourSequence + "";
                while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) {
                    tumourSequenceString = "0" + tumourSequenceString;
                }
                patientRecordID = patientID + "" + tumourSequenceString;

                // If this is a multiple primary tumour...
                String mpCodeString = (String) tumour.getVariable(io.getMultiplePrimaryVariableName());
                if (mpCodeString != null && mpCodeString.length() > 0) {
                    patientID = lookUpPatientID(mpCodeString, patientID, mpCodes);

                    // rebuild sequenceNumber
                    Tumour[] tumours = new Tumour[0];
                    try {
                        tumours = CanRegClientApp.getApplication()
                                .getTumourRecordsBasedOnPatientID(patientID + "", false);
                    } catch (DistributedTableDescriptionException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (UnknownTableException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    }

                    tumourSequenceString = (tumours.length + 1) + "";
                    while (tumourSequenceString.length() < Globals.ADDITIONAL_DIGITS_FOR_PATIENT_RECORD) {
                        tumourSequenceString = "0" + tumourSequenceString;
                    }

                    patientRecordID = patientID + "" + tumourSequenceString;
                    Patient[] oldPatients = null;
                    try {
                        oldPatients = CanRegClientApp.getApplication().getPatientRecordsByID((String) patientID,
                                false);
                    } catch (RemoteException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (SecurityException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (DistributedTableDescriptionException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (RecordLockedException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (SQLException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (UnknownTableException ex) {
                        Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    for (Patient oldPatient : oldPatients) {
                        if (!Tools.newRecordContainsNewInfo(patient, oldPatient,
                                noNeedToLookAtPatientVariables)) {
                            needToSavePatientAgain = false;
                            patient = oldPatient;
                            patientRecordID = oldPatient.getVariable(io.getPatientRecordIDVariableName());
                        }
                    }
                }

                Object tumourID = patientRecordID + "" + tumourSequenceString;
                //
                patient.setVariable(io.getPatientIDVariableName(), patientID);
                tumour.setVariable(io.getTumourIDVariablename(), tumourID);
                // And store the record ID

                patient.setVariable(io.getPatientRecordIDVariableName(), patientRecordID);

                // Set the patient ID number on the tumour
                tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID);
                tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID);

                // Set the deprecated flag to 0 - no obsolete records from CR4
                tumour.setVariable(io.getObsoleteTumourFlagVariableName(), "0");
                patient.setVariable(io.getObsoletePatientFlagVariableName(), "0");

            }

            // Set the name in the firstName database
            String sex = (String) patient.getVariable(sexVariableName);
            if (sex != null && sex.length() > 0) {
                Integer sexCode = Integer.parseInt(sex);
                String firstNames = (String) patient.getVariable(firstNameVariableName);
                if (firstNames != null) {
                    String[] firstNamesArray = firstNames.split(" ");
                    for (String firstName : firstNamesArray) {
                        if (firstName != null && firstName.trim().length() > 0) {
                            // here we use the locale specific toUpperCase
                            Integer registeredSexCode = nameSexTable.get(firstName);
                            if (registeredSexCode == null) {
                                NameSexRecord nsr = new NameSexRecord();
                                nsr.setName(firstName);
                                nsr.setSex(sexCode);

                                server.saveNameSexRecord(nsr, false);

                                nameSexTable.put(firstName, sexCode);
                            } else if (registeredSexCode != sexCode) {
                                if (registeredSexCode != 9) {
                                    sexCode = 9;
                                    NameSexRecord nsr = new NameSexRecord();
                                    nsr.setName(firstName);
                                    nsr.setSex(sexCode);
                                    server.saveNameSexRecord(nsr, true);
                                    nameSexTable.remove(firstName);
                                    nameSexTable.put(firstName, sexCode);
                                }
                            }
                        }
                    }
                }
            }

            if (needToSavePatientAgain) {
                if (patientDatabaseRecordID > 0) {
                    server.editPatient(patient);
                } else {
                    patientDatabaseRecordID = server.savePatient(patient);
                }
            }
            if (patient != null && tumour != null) {
                String icd10 = (String) tumour.getVariable(io.getICD10VariableName());
                if (icd10 == null || icd10.trim().length() == 0) {
                    ConversionResult[] conversionResult = canreg.client.CanRegClientApp.getApplication()
                            .performConversions(Converter.ConversionName.ICDO3toICD10, patient, tumour);
                    tumour.setVariable(io.getICD10VariableName(), conversionResult[0].getValue());
                }
            }
            if (tumour.getVariable(io.getPatientIDTumourTableVariableName()) == null) {
                tumour.setVariable(io.getPatientIDTumourTableVariableName(), patientID);
            }

            if (tumour.getVariable(io.getPatientRecordIDTumourTableVariableName()) == null) {
                tumour.setVariable(io.getPatientRecordIDTumourTableVariableName(), patientRecordID);
            }

            int tumourDatabaseIDNumber = server.saveTumour(tumour);

            if (Thread.interrupted()) {
                //We've been interrupted: no more importing.
                throw new InterruptedException();
            }
        }
        task.firePropertyChange("finished", null, null);
        success = true;
    } catch (IOException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } catch (NumberFormatException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } catch (InterruptedException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.INFO,
                "Interupted on line: " + (numberOfLinesRead + 1) + ". ", ex);
        success = true;
    } catch (IndexOutOfBoundsException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } catch (SQLException ex) {
        Logger.getLogger(Import.class.getName()).log(Level.SEVERE,
                "Error in line: " + (numberOfLinesRead + 1 + 1) + ". ", ex);
        success = false;
    } finally {
        if (parser != null) {
            try {
                parser.close();
            } catch (IOException ex) {
                Logger.getLogger(Import.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }
    return success;
}

From source file:com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.CorpusBuilderTest.java

private void deserialiezd_duplicate_threads_should_match_original_duplicate_threads()
        throws IngestionException {

    String csvFilePath = dupCorpusBuilder.getDupThreadDirPath()
            + StackExchangeConstants.DUP_THREAD_TSV_FILE_NAME
            + StackExchangeConstants.DUP_THREAD_TSV_FILE_EXTENSION;
    File csvData = new File(csvFilePath);

    CSVParser parser;/*  ww  w  .j a va  2 s .  co  m*/
    List<CSVRecord> records;
    try {
        parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.TDF.withHeader());
        records = parser.getRecords();
    } catch (IOException e) {
        throw new IngestionException(e);
    }

    Set<StackExchangeThread> dupThreadSet = dupCorpusBuilder.getDupThreadSetFromBinFiles();
    for (StackExchangeThread thread : dupThreadSet) {
        String binfileName = dupCorpusBuilder.getDupThreadDirPath() + thread.getId()
                + StackExchangeConstants.BIN_FILE_SUFFIX;
        CSVRecord matchRecord = null;
        for (CSVRecord record : records)
            if (Integer.parseInt(record.get(0)) == thread.getId()) {
                matchRecord = record;
                break;
            }
        assertTrue(matchRecord != null);
        // TODO haven't check the originId yet since it requires the new
        // method to get origin id from
        String deserTitle = matchRecord.get(1), deserBody = matchRecord.get(2),
                deserFileName = matchRecord.get(4), deserTags = matchRecord.get(5);
        assertEquals(deserTitle, thread.getQuestion().getTitle());
        assertEquals(deserBody, thread.getQuestion().getBody());
        assertEquals(deserFileName, binfileName);
        assertEquals(deserTags, thread.getConcatenatedTagsText());
    }
}