List of usage examples for org.apache.commons.csv CSVParser parse
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException
From source file:org.apache.phoenix.util.CSVCommonsLoader.java
/** * Upserts data from CSV file.// w w w . j a va2 s . c o m * * Data is batched up based on connection batch size. * Column PDataType is read from metadata and is used to convert * column value to correct type before upsert. * * The constructor determines the format for the CSV files. * * @param fileName * @throws Exception */ public void upsert(String fileName) throws Exception { CSVParser parser = CSVParser.parse(new File(fileName), Charsets.UTF_8, format); upsert(parser); }
From source file:org.asoem.greyfish.utils.space.cluster.DBSCANTest.java
@Before public void setUp() throws Exception { csvRecords = CSVParser.parse(Resources.getResource(DBSCANTest.class, "iris.csv"), Charset.defaultCharset(), CSVFormat.newFormat(',').withHeader().withQuote('"').withQuoteMode(QuoteMode.NON_NUMERIC)); }
From source file:org.commonvox.hbase_column_manager.TestRepositoryAdmin.java
public void testOutputReportOnInvalidColumns(boolean useMapReduce) throws Exception { System.out.println("#testOutputReportOnInvalidColumnQualifiers " + (useMapReduce ? "USING MAPREDUCE " : "") + "has been invoked."); // environment cleanup before testing initializeTestNamespaceAndTableObjects(); clearTestingEnvironment();/*from w ww.ja v a 2 s . co m*/ // add schema and data to HBase // NOTE that test/resources/hbase-column-manager.xml contains wildcarded excludedTables entries Configuration configuration = MConfiguration.create(); createSchemaStructuresInHBase(configuration, false, false); createColumnDefinitions(configuration); // For NAMESPACE01_TABLE01 colFamily CF01, only valid colQualifiers are: // COLQUALIFIER01 & COLQUALIFIER02. // For NAMESPACE01_TABLE01 colFamily CF02, only valid colQualifier is: // COLQUALIFIER03. final byte[] BAD_QUALIFIER01 = Bytes.toBytes("bad_qualifier"); final byte[] BAD_QUALIFIER02 = Bytes.toBytes("very_bad_qualifier"); final byte[] GOOD_URL01 = Bytes.toBytes("https://google.com"); final byte[] BAD_URL01 = Bytes.toBytes("ftp://google.com"); final byte[] BAD_URL02 = Bytes.toBytes("another_invalid_value"); final byte[] BAD_URL03 = Bytes.toBytes("not_even_close"); try (Connection connection = MConnectionFactory.createConnection(configuration)) { try (Table namespace01Table01 = connection.getTable(NAMESPACE01_TABLE01)) { // put two rows with valid column qualifiers namespace01Table01.put(new Put(ROW_ID_04).addColumn(CF02, COLQUALIFIER03, BAD_URL01)); // invalid value namespace01Table01.put(new Put(ROW_ID_01).addColumn(CF01, COLQUALIFIER01, VALUE_2_BYTES_LONG) .addColumn(CF01, COLQUALIFIER02, VALUE_5_BYTES_LONG) .addColumn(CF02, COLQUALIFIER03, GOOD_URL01)); namespace01Table01.put(new Put(ROW_ID_02).addColumn(CF01, COLQUALIFIER01, VALUE_9_BYTES_LONG) .addColumn(CF01, COLQUALIFIER02, VALUE_82_BYTES_LONG). // invalid length addColumn(CF02, COLQUALIFIER03, BAD_URL01)); // invalid value namespace01Table01.put(new Put(ROW_ID_04).addColumn(CF02, COLQUALIFIER03, BAD_URL02)); // invalid value // put two rows with invalid column qualifiers (3 bad qualifiers in all) namespace01Table01.put(new Put(ROW_ID_03).addColumn(CF01, BAD_QUALIFIER01, VALUE_5_BYTES_LONG). // invalid qualifier addColumn(CF01, BAD_QUALIFIER02, VALUE_2_BYTES_LONG)); // invalid qualifier namespace01Table01.put(new Put(ROW_ID_04).addColumn(CF01, BAD_QUALIFIER02, VALUE_82_BYTES_LONG). // invalid qualifier addColumn(CF02, COLQUALIFIER03, BAD_URL03)); // invalid value // put one with one good column qualifier and two bad ones namespace01Table01.put(new Put(ROW_ID_05).addColumn(CF02, COLQUALIFIER02, VALUE_9_BYTES_LONG). // invalid qualifier (for CF02) addColumn(CF02, COLQUALIFIER03, VALUE_5_BYTES_LONG). // invalid VALUE!! addColumn(CF01, BAD_QUALIFIER02, VALUE_5_BYTES_LONG). // invalid qualifier addColumn(CF01, COLQUALIFIER02, VALUE_82_BYTES_LONG)); // invalid length } } // file setup final String TARGET_DIRECTORY = "target/"; // for standalone (non-JUnit) execution final String TEMP_PREFIX = "temp."; final String INVALID_COLUMNS_SUMMARY_FILE = TEMP_PREFIX + "invalidColumns.summary.csv"; final String INVALID_COLUMNS_VERBOSE_FILE = TEMP_PREFIX + "invalidColumns.verbose.csv"; final String INVALID_COLUMNS_CF01_SUMMARY_FILE = TEMP_PREFIX + "invalidColumns.table01cf01.summary.csv"; final String INVALID_COLUMNS_CF01_VERBOSE_FILE = TEMP_PREFIX + "invalidColumns.table01cf01verbose.csv"; final String INVALID_COLUMNS_CF02_SUMMARY_FILE = TEMP_PREFIX + "invalidColumns.table01cf02.summary.csv"; final String INVALID_COLUMNS_CF02_VERBOSE_FILE = TEMP_PREFIX + "invalidColumns.table01cf02verbose.csv"; final String INVALID_COLUMNS_EMPTY_TABLE = TEMP_PREFIX + "invalidColumnsEmptyTable.summary.csv"; File fileForSummaryTable01; File fileForVerboseTable01; File fileForSummaryTable01Cf01; File fileForVerboseTable01Cf01; File fileForSummaryTable01Cf02; File fileForVerboseTable01Cf02; File fileForSummaryOfEmptyTable; try { fileForSummaryTable01 = tempTestFolder.newFile(INVALID_COLUMNS_SUMMARY_FILE); fileForVerboseTable01 = tempTestFolder.newFile(INVALID_COLUMNS_VERBOSE_FILE); fileForSummaryTable01Cf01 = tempTestFolder.newFile(INVALID_COLUMNS_CF01_SUMMARY_FILE); fileForVerboseTable01Cf01 = tempTestFolder.newFile(INVALID_COLUMNS_CF01_VERBOSE_FILE); fileForSummaryTable01Cf02 = tempTestFolder.newFile(INVALID_COLUMNS_CF02_SUMMARY_FILE); fileForVerboseTable01Cf02 = tempTestFolder.newFile(INVALID_COLUMNS_CF02_VERBOSE_FILE); fileForSummaryOfEmptyTable = tempTestFolder.newFile(INVALID_COLUMNS_EMPTY_TABLE); } catch (IllegalStateException e) { // standalone (non-JUnit) execution fileForSummaryTable01 = new File(TARGET_DIRECTORY + INVALID_COLUMNS_SUMMARY_FILE); fileForVerboseTable01 = new File(TARGET_DIRECTORY + INVALID_COLUMNS_VERBOSE_FILE); fileForSummaryTable01Cf01 = new File(TARGET_DIRECTORY + INVALID_COLUMNS_CF01_SUMMARY_FILE); fileForVerboseTable01Cf01 = new File(TARGET_DIRECTORY + INVALID_COLUMNS_CF01_VERBOSE_FILE); fileForSummaryTable01Cf02 = new File(TARGET_DIRECTORY + INVALID_COLUMNS_CF02_SUMMARY_FILE); fileForVerboseTable01Cf02 = new File(TARGET_DIRECTORY + INVALID_COLUMNS_CF02_VERBOSE_FILE); fileForSummaryOfEmptyTable = new File(TARGET_DIRECTORY + INVALID_COLUMNS_EMPTY_TABLE); } String reportGenerationFailure = COLUMN_INVALIDITY_REPORT_FAILURE + "RepositoryAdmin#outputReportOnInvalidColumnQualifiers method " + "returned unexpected boolean value"; // generate InvalidColumnQualifier reports try (Connection connection = ConnectionFactory.createConnection(configuration)) { RepositoryAdmin repositoryAdmin = new RepositoryAdmin(connection); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnQualifiers( fileForSummaryTable01, NAMESPACE01_TABLE01, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnQualifiers( fileForVerboseTable01, NAMESPACE01_TABLE01, true, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnQualifiers( fileForSummaryTable01Cf01, NAMESPACE01_TABLE01, CF01, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnQualifiers( fileForVerboseTable01Cf01, NAMESPACE01_TABLE01, CF01, true, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnQualifiers( fileForSummaryTable01Cf02, NAMESPACE01_TABLE01, CF02, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnQualifiers( fileForVerboseTable01Cf02, NAMESPACE01_TABLE01, CF02, true, useMapReduce)); try { repositoryAdmin.outputReportOnInvalidColumnQualifiers(fileForSummaryOfEmptyTable, NAMESPACE02_TABLE03, false, useMapReduce); fail(reportGenerationFailure + TABLE_NOT_INCLUDED_EXCEPTION_FAILURE); } catch (TableNotIncludedForProcessingException e) { } } // read in reports and validate contents try (CSVParser parser = CSVParser.parse(fileForSummaryTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(1), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(3), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(1), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 3, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_03), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_03), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_2_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_82_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 4: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 5: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_9_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 5, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(1), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(3), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 2, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_03), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_03), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_2_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_82_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 4: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(BAD_QUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 4, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(1), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_9_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } // generate InvalidColumnLengths reports try (Connection connection = ConnectionFactory.createConnection(configuration)) { RepositoryAdmin repositoryAdmin = new RepositoryAdmin(connection); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnLengths( fileForSummaryTable01, NAMESPACE01_TABLE01, false, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnLengths( fileForVerboseTable01, NAMESPACE01_TABLE01, true, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnLengths( fileForSummaryTable01Cf01, NAMESPACE01_TABLE01, CF01, false, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnLengths( fileForVerboseTable01Cf01, NAMESPACE01_TABLE01, CF01, true, false, useMapReduce)); assertTrue(reportGenerationFailure, !repositoryAdmin.outputReportOnInvalidColumnLengths(fileForSummaryTable01Cf02, NAMESPACE01_TABLE01, CF02, false, false, useMapReduce)); assertTrue(reportGenerationFailure, !repositoryAdmin.outputReportOnInvalidColumnLengths(fileForVerboseTable01Cf02, NAMESPACE01_TABLE01, CF02, true, false, useMapReduce)); try { repositoryAdmin.outputReportOnInvalidColumnLengths(fileForSummaryOfEmptyTable, NAMESPACE02_TABLE03, false, false, useMapReduce); fail(reportGenerationFailure + TABLE_NOT_INCLUDED_EXCEPTION_FAILURE); } catch (TableNotIncludedForProcessingException e) { } } // read in reports and validate contents try (CSVParser parser = CSVParser.parse(fileForSummaryTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(2), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_02), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_82_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_82_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 2, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(2), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.VerboseReportHeader.TABLE)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_02), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_82_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF01), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER02), record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_82_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 2, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 0, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 0, recordCount); } // generate InvalidColumnValue reports try (Connection connection = ConnectionFactory.createConnection(configuration)) { RepositoryAdmin repositoryAdmin = new RepositoryAdmin(connection); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForSummaryTable01, NAMESPACE01_TABLE01, false, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForVerboseTable01, NAMESPACE01_TABLE01, true, false, useMapReduce)); assertTrue(reportGenerationFailure, !repositoryAdmin.outputReportOnInvalidColumnValues(fileForSummaryTable01Cf01, NAMESPACE01_TABLE01, CF01, false, false, useMapReduce)); assertTrue(reportGenerationFailure, !repositoryAdmin.outputReportOnInvalidColumnValues(fileForVerboseTable01Cf01, NAMESPACE01_TABLE01, CF01, true, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForSummaryTable01Cf02, NAMESPACE01_TABLE01, CF02, false, false, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForVerboseTable01Cf02, NAMESPACE01_TABLE01, CF02, true, false, useMapReduce)); try { repositoryAdmin.outputReportOnInvalidColumnValues(fileForSummaryOfEmptyTable, NAMESPACE02_TABLE03, false, false, useMapReduce); fail(reportGenerationFailure + TABLE_NOT_INCLUDED_EXCEPTION_FAILURE); } catch (TableNotIncludedForProcessingException e) { } } // read in reports and validate contents try (CSVParser parser = CSVParser.parse(fileForSummaryTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(3), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_02), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL01), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL03), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 3, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 0, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 0, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(3), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_02), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL01), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL03), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 3, recordCount); } // generate InvalidColumnValue reports WITH includeAllCells == TRUE!! try (Connection connection = ConnectionFactory.createConnection(configuration)) { RepositoryAdmin repositoryAdmin = new RepositoryAdmin(connection); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForSummaryTable01, NAMESPACE01_TABLE01, false, true, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForVerboseTable01, NAMESPACE01_TABLE01, true, true, useMapReduce)); assertTrue(reportGenerationFailure, !repositoryAdmin.outputReportOnInvalidColumnValues(fileForSummaryTable01Cf01, NAMESPACE01_TABLE01, CF01, false, true, useMapReduce)); assertTrue(reportGenerationFailure, !repositoryAdmin.outputReportOnInvalidColumnValues(fileForVerboseTable01Cf01, NAMESPACE01_TABLE01, CF01, true, true, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForSummaryTable01Cf02, NAMESPACE01_TABLE01, CF02, false, true, useMapReduce)); assertTrue(reportGenerationFailure, repositoryAdmin.outputReportOnInvalidColumnValues( fileForVerboseTable01Cf02, NAMESPACE01_TABLE01, CF02, true, true, useMapReduce)); try { repositoryAdmin.outputReportOnInvalidColumnValues(fileForSummaryOfEmptyTable, NAMESPACE02_TABLE03, false, true, useMapReduce); fail(reportGenerationFailure + TABLE_NOT_INCLUDED_EXCEPTION_FAILURE); } catch (TableNotIncludedForProcessingException e) { } } // read in reports and validate contents try (CSVParser parser = CSVParser.parse(fileForSummaryTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(3), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_02), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL01), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL03), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL02), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 4: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL01), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 5: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 5, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 0, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf01, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { recordCount++; } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 0, recordCount); } try (CSVParser parser = CSVParser.parse(fileForSummaryTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.SUMMARY_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { // System.out.println(record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE) + ":" // + record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE) + ":" // + record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY) + ":" // + record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER) + " " // + ColumnInvalidityReport.SummaryReportHeader.OCCURRENCES.toString() + "=" // + record.get(ColumnInvalidityReport.SummaryReportHeader.OCCURRENCES) // ); recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Occurrences-count value not as expected", String.valueOf(3), record.get(ColumnInvalidityReport.SummaryReportHeader.INVALID_OCCURRENCE_COUNT)); } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 1, recordCount); } try (CSVParser parser = CSVParser.parse(fileForVerboseTable01Cf02, StandardCharsets.UTF_8, ColumnInvalidityReport.VERBOSE_CSV_FORMAT.withSkipHeaderRecord())) { int recordCount = 0; for (CSVRecord record : parser) { // System.out.println(record.get(ColumnInvalidityReport.VerboseReportHeader.NAMESPACE) + ":" // + record.get(ColumnInvalidityReport.VerboseReportHeader.TABLE) + ":" // + record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_FAMILY) + ":" // + record.get(ColumnInvalidityReport.VerboseReportHeader.COLUMN_QUALIFIER) + " " // + ColumnInvalidityReport.VerboseReportHeader.ROW_ID.toString() + "=" // + record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID) + " " // + ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE.toString() + "=" // + record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE) // ); recordCount++; assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Namespace value not as expected", NAMESPACE01_TABLE01.getNamespaceAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.NAMESPACE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Table value not as expected", NAMESPACE01_TABLE01.getQualifierAsString(), record.get(ColumnInvalidityReport.SummaryReportHeader.TABLE)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColFamily value not as expected", Bytes.toString(CF02), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_FAMILY)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " ColQualifier value not as expected", Bytes.toString(COLQUALIFIER03), record.get(ColumnInvalidityReport.SummaryReportHeader.COLUMN_QUALIFIER)); switch (recordCount) { case 1: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_02), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL01), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 2: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL03), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 3: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL02), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 4: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_04), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(BAD_URL01), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; case 5: assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " RowId value not as expected", Bytes.toString(ROW_ID_05), record.get(ColumnInvalidityReport.VerboseReportHeader.ROW_ID)); assertEquals( COLUMN_INVALIDITY_REPORT_FAILURE + "Rec " + recordCount + " Column value not as expected", Bytes.toString(VALUE_5_BYTES_LONG), record.get(ColumnInvalidityReport.VerboseReportHeader.CELL_VALUE)); break; } } assertEquals(COLUMN_INVALIDITY_REPORT_FAILURE + "Record count in CSV file not as expected", 5, recordCount); } clearTestingEnvironment(); System.out.println("#testOutputReportOnInvalidColumnQualifiers " + (useMapReduce ? "USING MAPREDUCE " : "") + "has run to completion."); }
From source file:org.ecloudmanager.service.deployment.geolite.GeolocationService.java
@PostConstruct private void init() { try {/*from w w w . j a v a 2 s .c o m*/ URL cityUrl = getClass().getClassLoader().getResource("/GeoLite2-City-Locations-en.csv"); List<CSVRecord> cityRecords = CSVParser .parse(cityUrl, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader()).getRecords(); URL countryUrl = getClass().getClassLoader().getResource("/GeoLite2-Country-Locations-en.csv"); List<CSVRecord> countryRecords = CSVParser .parse(countryUrl, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader()).getRecords(); cities = cityRecords.stream().map(record -> { StringBuilder labelBuilder = new StringBuilder(); List<String> items = new ArrayList<>(); String country = (record.get("country_iso_code") + " " + record.get("country_name")).trim(); items.add(country); String subdivision = (record.get("subdivision_1_name") + " " + record.get("subdivision_2_name")) .trim(); items.add(subdivision); String city = record.get("city_name").trim(); items.add(city); items.forEach(item -> { if (!StringUtils.isEmpty(item.trim())) { if (labelBuilder.length() > 0) { labelBuilder.append(", "); } labelBuilder.append(item); } }); return new GeolocationRecord(record.get("geoname_id"), country, subdivision, city, labelBuilder.toString()); }).collect(Collectors.toMap(GeolocationRecord::getGeoid, record -> record)); countries = countryRecords.stream().map(record -> { String label = record.get("country_iso_code") + " " + record.get("country_name"); return new GeolocationRecord(record.get("geoname_id"), label, "", "", label); }).collect(Collectors.toMap(GeolocationRecord::getGeoid, record -> record)); } catch (IOException e) { log.error("Cannot initialize geolocation service", e); } }
From source file:org.languagetool.dev.RuleDetails.java
public static void main(String[] args) throws ParseException, IOException { Options options = new Options(); options.addRequiredOption("l", "language", true, "Language for rules"); options.addRequiredOption("f", "file", true, "Input file"); options.addRequiredOption("o", "output", true, "Output file"); options.addRequiredOption("c", "column", true, "Column in input file"); options.addOption("n", "ngramPath", true, "Ngram path to activate ngram rules"); CommandLine cmd = new DefaultParser().parse(options, args); String langCode = cmd.getOptionValue('l'); String inputFile = cmd.getOptionValue('f'); String outputFile = cmd.getOptionValue('o'); String column = cmd.getOptionValue('c'); String ngramPath = cmd.hasOption('n') ? cmd.getOptionValue('n') : null; RuleDetails details = new RuleDetails(Languages.getLanguageForShortCode(langCode), ngramPath); CSVFormat format = CSVFormat.RFC4180.withFirstRecordAsHeader(); try (CSVParser parser = CSVParser.parse(new File(inputFile), Charset.defaultCharset(), format)) { try (CSVPrinter printer = new CSVPrinter(new BufferedWriter(new FileWriter(outputFile)), format)) { Map<String, Integer> oldHeader = parser.getHeaderMap(); List<String> newHeader = new ArrayList<>(Collections.nCopies(oldHeader.size(), null)); for (Map.Entry<String, Integer> entry : oldHeader.entrySet()) { newHeader.set(entry.getValue(), entry.getKey()); }//from ww w . j a va2 s . co m newHeader.add("description"); newHeader.add("category"); printer.printRecord(newHeader); if (!oldHeader.containsKey(column)) { throw new RuntimeException("Input file does not contain specified column " + column); } List<CSVRecord> records = parser.getRecords(); records.stream().sequential().map(record -> { String ruleId = record.get(column); Rule rule = details.getRuleById(ruleId); List<String> transformedValues = new ArrayList<>(); record.iterator().forEachRemaining(transformedValues::add); if (rule == null) { transformedValues.add(""); transformedValues.add(""); } else { transformedValues.add(rule.getDescription()); transformedValues.add(rule.getCategory().getId().toString()); } return transformedValues; }).forEachOrdered(values -> { try { printer.printRecord(values); } catch (IOException e) { throw new RuntimeException(e); } }); } } }
From source file:org.neo4art.colour.manager.VanGoghArtworksColourAnalysisDefaultManager.java
/** * @see org.neo4art.colour.manager.VanGoghArtworksColourAnalysisManager#loadArtworksFromFile(java.lang.String) *///ww w. j a v a2s . co m @Override public List<Artwork> loadArtworksFromFile(String fileName) throws IOException { List<Artwork> artworks = null; URL url = getClass().getClassLoader().getResource(fileName); CSVParser csvParser = CSVParser.parse(url, Charset.defaultCharset(), CSVFormat.EXCEL.withIgnoreSurroundingSpaces(true)); List<CSVRecord> cvsRecords = csvParser.getRecords(); if (CollectionUtils.isNotEmpty(cvsRecords) && CollectionUtils.size(cvsRecords) > 1) { artworks = new ArrayList<Artwork>(); for (int i = 1; i < cvsRecords.size(); i++) { CSVRecord csvRecord = cvsRecords.get(i); Artwork artwork = new Artwork(); Calendar completionDate = Calendar.getInstance(); completionDate.set(Calendar.YEAR, Integer.parseInt(csvRecord.get(2))); completionDate.set(Calendar.MONTH, Integer.parseInt(csvRecord.get(3))); artwork.setTitle(csvRecord.get(0)); artwork.setType(csvRecord.get(1)); artwork.setYear(new Date(Integer.parseInt(csvRecord.get(2)), Calendar.JANUARY, 1)); artwork.setCompletionDate(completionDate.getTime()); artwork.setImageFile(csvRecord.get(4)); artwork.setCatalogue("F: " + csvRecord.get(5) + ", JH: " + csvRecord.get(6)); artworks.add(artwork); } } return artworks; }
From source file:org.neo4art.core.service.ArtistsArtworkCatalogTest.java
@Test public void shouldSaveColours() { try {/*from w w w .j ava 2s. c om*/ URL url = getClass().getClassLoader().getResource("artists-artworks-catalog.csv"); CSVParser csvParser = CSVParser.parse(url, Charset.forName("ISO-8859-1"), CSVFormat.EXCEL); List<CSVRecord> records = csvParser.getRecords(); if (CollectionUtils.isNotEmpty(records)) { // AUTHOR;BORN-DIED;TITLE;DATE;TECHNIQUE;LOCATION;URL;FORM;TYPE;SCHOOL;TIMEFRAME for (int i = 1; i < records.size(); i++) { CSVRecord csvRecord = records.get(i); String record0 = csvRecord.get(0); if (record0.contains(",")) { String[] author = StringUtils.split(record0, ','); System.out.println(WordUtils.capitalizeFully(StringUtils.trim(author[1]))); System.out.println(WordUtils.capitalizeFully(StringUtils.trim(author[0]))); System.out.println(); } else { System.out.println(csvRecord.get(0)); System.out.println(); } /* String record1 = csvRecord.get(1); System.out.println("--|" + record1 + "|--"); if (record1.startsWith("(b. ") && record1.contains("d.") && record1.contains(")")) { record1 = StringUtils.remove(record1, '('); record1 = StringUtils.remove(record1, ')'); System.out.println(record1); String[] bornDied = StringUtils.split(record1, ','); System.out.println(bornDied[0].trim().substring(2).trim()); System.out.println(bornDied[1].trim()); System.out.println(bornDied[2].trim().substring(2).trim()); System.out.println(bornDied[3].trim()); } else { System.out.println(csvRecord.get(1)); } System.out.println(csvRecord.get(2)); System.out.println(csvRecord.get(3)); System.out.println(csvRecord.get(4)); System.out.println(csvRecord.get(5)); System.out.println(csvRecord.get(6)); System.out.println(csvRecord.get(7)); System.out.println(csvRecord.get(8)); System.out.println(csvRecord.get(9)); System.out.println(csvRecord.get(10)); System.out.println(); */ } } } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
From source file:org.neo4art.core.service.ColourDefaultService.java
/** * @throws IOException //www . j a v a 2 s . c o m * @see org.neo4art.core.service.ColourService#getColours() */ @Override public List<Colour> getColours() throws IOException { List<Colour> result = null; URL url = getClass().getClassLoader().getResource("colours.csv"); CSVParser csvParser = CSVParser.parse(url, Charset.defaultCharset(), CSVFormat.EXCEL.withDelimiter(',') .withQuote('\'').withEscape('\\').withIgnoreSurroundingSpaces(true)); List<CSVRecord> records = csvParser.getRecords(); if (CollectionUtils.isNotEmpty(records)) { result = new ArrayList<Colour>(); for (CSVRecord csvRecord : records) { int r = Integer.parseInt(csvRecord.get(1).substring(1, 3), 16); int g = Integer.parseInt(csvRecord.get(1).substring(3, 5), 16); int b = Integer.parseInt(csvRecord.get(1).substring(5, 7), 16); result.add(new Colour(csvRecord.get(0), r, g, b)); } } return result; }
From source file:org.neo4art.literature.manager.VanGoghLettersManager.java
public List<SentimentAnalysis> loadSentimentsFromFile(String fileName) throws IOException { List<SentimentAnalysis> sentimentAnalysisList = new ArrayList<SentimentAnalysis>(); URL url = getClass().getClassLoader().getResource(fileName); CSVParser csvParser = CSVParser.parse(url, Charset.defaultCharset(), CSVFormat.EXCEL.withIgnoreSurroundingSpaces(true)); List<CSVRecord> cvsRecords = csvParser.getRecords(); if (CollectionUtils.isNotEmpty(cvsRecords) && CollectionUtils.size(cvsRecords) > 1) { for (int i = 1; i < cvsRecords.size(); i++) { CSVRecord csvRecord = cvsRecords.get(i); SentimentAnalysis sentimentAnalysis = new SentimentAnalysis(); Letter letter = new Letter(); letter.setTitle(csvRecord.get(0)); String polarityType = csvRecord.get(1); String polarity = ""; if (polarityType.equalsIgnoreCase("0")) { polarity = "neutral"; } else if (polarityType.equalsIgnoreCase("1") || polarityType.equalsIgnoreCase("2")) { polarity = "negative"; } else if (polarityType.equalsIgnoreCase("3") || polarityType.equalsIgnoreCase("4")) { polarity = "positive"; }/*from w w w .j a va 2 s . c o m*/ letter.setDate(csvRecord.get(2)); sentimentAnalysis.setPolarity(polarity); sentimentAnalysis.setSource(letter); sentimentAnalysisList.add(sentimentAnalysis); } } return sentimentAnalysisList; }
From source file:parser_owl.ParserOwl.java
public static void startParisWithData(String inputData) { try {// w w w . j a v a 2 s.c om File csvData = new File("/Users/gautier/Documents/web_sem/sources/paris_tournages2.csv"); CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.EXCEL.withHeader()); System.out.println("Starting Paris (input file: " + csvData.getAbsolutePath() + ")..."); for (CSVRecord csvRecord : parser) { Film film = new Film(csvRecord.get("titre"), csvRecord.get("realisateur"), csvRecord.get("date_debut_evenement"), csvRecord.get("date_fin_evenement"), csvRecord.get("cadre"), csvRecord.get("lieu"), csvRecord.get("adresse"), csvRecord.get("arrondissement"), csvRecord.get("adresse_complete"), csvRecord.get("geo_coordinates"), OMDB_PROXY.getMovieInfos(csvRecord.get("titre")).get("imdbRating"), OMDB_PROXY.getMovieInfos(csvRecord.get("titre")).get("Runtime"), OMDB_PROXY.getMovieInfos(csvRecord.get("titre")).get("Country"), OMDB_PROXY.getMovieInfos(csvRecord.get("titre")).get("Actors"), OMDB_PROXY.getMovieInfos(csvRecord.get("titre")).get("Year")); MANAGER.store(film); } System.out.println("Ending Paris."); } catch (IOException e) { System.out.println("error: " + e); } }