Example usage for org.apache.hadoop.fs FileSystem FS_DEFAULT_NAME_KEY

List of usage examples for org.apache.hadoop.fs FileSystem FS_DEFAULT_NAME_KEY

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem FS_DEFAULT_NAME_KEY.

Prototype

String FS_DEFAULT_NAME_KEY

To view the source code for org.apache.hadoop.fs FileSystem FS_DEFAULT_NAME_KEY.

Click Source Link

Usage

From source file:org.apache.drill.exec.store.hive.HiveTestDataGenerator.java

License:Apache License

private void generateTestData() throws Exception {
    HiveConf conf = new HiveConf(SessionState.class);

    conf.set("javax.jdo.option.ConnectionURL", String.format("jdbc:derby:;databaseName=%s;create=true", dbDir));
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///");
    conf.set("hive.metastore.warehouse.dir", whDir);
    conf.set("mapred.job.tracker", "local");
    conf.set(ConfVars.SCRATCHDIR.varname, getTempDir("scratch_dir"));
    conf.set(ConfVars.LOCALSCRATCHDIR.varname, getTempDir("local_scratch_dir"));
    conf.set(ConfVars.DYNAMICPARTITIONINGMODE.varname, "nonstrict");

    SessionState ss = new SessionState(conf);
    SessionState.start(ss);//from   w w w .  j  av  a 2s  . c o  m
    Driver hiveDriver = new Driver(conf);

    // generate (key, value) test data
    String testDataFile = generateTestDataFile();

    // Create a (key, value) schema table with Text SerDe which is available in hive-serdes.jar
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS default.kv(key INT, value STRING) "
            + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver, "LOAD DATA LOCAL INPATH '" + testDataFile + "' OVERWRITE INTO TABLE default.kv");

    // Create a (key, value) schema table in non-default database with RegexSerDe which is available in hive-contrib.jar
    // Table with RegExSerde is expected to have columns of STRING type only.
    executeQuery(hiveDriver, "CREATE DATABASE IF NOT EXISTS db1");
    executeQuery(hiveDriver,
            "CREATE TABLE db1.kv_db1(key STRING, value STRING) "
                    + "ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' "
                    + "WITH SERDEPROPERTIES (" + "  \"input.regex\" = \"([0-9]*), (.*_[0-9]*)\", "
                    + "  \"output.format.string\" = \"%1$s, %2$s\"" + ") ");
    executeQuery(hiveDriver, "INSERT INTO TABLE db1.kv_db1 SELECT * FROM default.kv");

    // Create an Avro format based table backed by schema in a separate file
    final String avroCreateQuery = String.format(
            "CREATE TABLE db1.avro " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' "
                    + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' "
                    + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' "
                    + "TBLPROPERTIES ('avro.schema.url'='file:///%s')",
            BaseTestQuery.getPhysicalFileFromResource("avro_test_schema.json").replace('\\', '/'));

    executeQuery(hiveDriver, avroCreateQuery);
    executeQuery(hiveDriver, "INSERT INTO TABLE db1.avro SELECT * FROM default.kv");

    executeQuery(hiveDriver, "USE default");

    // create a table with no data
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS empty_table(a INT, b STRING)");
    // delete the table location of empty table
    File emptyTableLocation = new File(whDir, "empty_table");
    if (emptyTableLocation.exists()) {
        FileUtils.forceDelete(emptyTableLocation);
    }

    // create a Hive table that has columns with data types which are supported for reading in Drill.
    testDataFile = generateAllTypesDataFile();
    executeQuery(hiveDriver,
            "CREATE TABLE IF NOT EXISTS readtest (" + "  binary_field BINARY," + "  boolean_field BOOLEAN,"
                    + "  tinyint_field TINYINT," + "  decimal0_field DECIMAL,"
                    + "  decimal9_field DECIMAL(6, 2)," + "  decimal18_field DECIMAL(15, 5),"
                    + "  decimal28_field DECIMAL(23, 1)," + "  decimal38_field DECIMAL(30, 3),"
                    + "  double_field DOUBLE," + "  float_field FLOAT," + "  int_field INT,"
                    + "  bigint_field BIGINT," + "  smallint_field SMALLINT," + "  string_field STRING,"
                    + "  varchar_field VARCHAR(50)," + "  timestamp_field TIMESTAMP," + "  date_field DATE,"
                    + "  char_field CHAR(10)" + ") PARTITIONED BY (" +
                    // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
                    // "  binary_part BINARY," +
                    "  boolean_part BOOLEAN," + "  tinyint_part TINYINT," + "  decimal0_part DECIMAL,"
                    + "  decimal9_part DECIMAL(6, 2)," + "  decimal18_part DECIMAL(15, 5),"
                    + "  decimal28_part DECIMAL(23, 1)," + "  decimal38_part DECIMAL(30, 3),"
                    + "  double_part DOUBLE," + "  float_part FLOAT," + "  int_part INT,"
                    + "  bigint_part BIGINT," + "  smallint_part SMALLINT," + "  string_part STRING,"
                    + "  varchar_part VARCHAR(50)," + "  timestamp_part TIMESTAMP," + "  date_part DATE,"
                    + "  char_part CHAR(10)" + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' "
                    + "TBLPROPERTIES ('serialization.null.format'='') ");

    // Add a partition to table 'readtest'
    executeQuery(hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " +
    // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
    // "  binary_part='binary', " +
            "  boolean_part='true', " + "  tinyint_part='64', " + "  decimal0_part='36.9', "
            + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', "
            + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', "
            + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', "
            + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', "
            + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', "
            + "  date_part='2013-07-05', " + "  char_part='char')");

    // Add a second partition to table 'readtest' which contains the same values as the first partition except
    // for tinyint_part partition column
    executeQuery(hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " +
    // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
    // "  binary_part='binary', " +
            "  boolean_part='true', " + "  tinyint_part='65', " + "  decimal0_part='36.9', "
            + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', "
            + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', "
            + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', "
            + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', "
            + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', "
            + "  date_part='2013-07-05', " + "  char_part='char')");

    // Load data into table 'readtest'
    executeQuery(hiveDriver,
            String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE default.readtest PARTITION (" +
            // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
            // "  binary_part='binary', " +
                    "  boolean_part='true', " + "  tinyint_part='64', " + "  decimal0_part='36.9', "
                    + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', "
                    + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', "
                    + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', "
                    + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', "
                    + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', "
                    + "  date_part='2013-07-05'," + "  char_part='char'" + ")", testDataFile));

    // create a table that has all Hive types. This is to test how hive tables metadata is populated in
    // Drill's INFORMATION_SCHEMA.
    executeQuery(hiveDriver,
            "CREATE TABLE IF NOT EXISTS infoschematest(" + "booleanType BOOLEAN, " + "tinyintType TINYINT, "
                    + "smallintType SMALLINT, " + "intType INT, " + "bigintType BIGINT, " + "floatType FLOAT, "
                    + "doubleType DOUBLE, " + "dateType DATE, " + "timestampType TIMESTAMP, "
                    + "binaryType BINARY, " + "decimalType DECIMAL(38, 2), " + "stringType STRING, "
                    + "varCharType VARCHAR(20), " + "listType ARRAY<STRING>, " + "mapType MAP<STRING,INT>, "
                    + "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, "
                    + "uniontypeType UNIONTYPE<int, double, array<string>>, " + "charType CHAR(10))");

    /**
     * Create a PARQUET table with all supported types.
     */
    executeQuery(hiveDriver, "CREATE TABLE readtest_parquet (" + "  binary_field BINARY, "
            + "  boolean_field BOOLEAN, " + "  tinyint_field TINYINT," + "  decimal0_field DECIMAL,"
            + "  decimal9_field DECIMAL(6, 2)," + "  decimal18_field DECIMAL(15, 5),"
            + "  decimal28_field DECIMAL(23, 1)," + "  decimal38_field DECIMAL(30, 3),"
            + "  double_field DOUBLE," + "  float_field FLOAT," + "  int_field INT," + "  bigint_field BIGINT,"
            + "  smallint_field SMALLINT," + "  string_field STRING," + "  varchar_field VARCHAR(50),"
            + "  timestamp_field TIMESTAMP," + "  char_field CHAR(10)" + ") PARTITIONED BY (" +
            // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
            // "  binary_part BINARY," +
            "  boolean_part BOOLEAN," + "  tinyint_part TINYINT," + "  decimal0_part DECIMAL,"
            + "  decimal9_part DECIMAL(6, 2)," + "  decimal18_part DECIMAL(15, 5),"
            + "  decimal28_part DECIMAL(23, 1)," + "  decimal38_part DECIMAL(30, 3)," + "  double_part DOUBLE,"
            + "  float_part FLOAT," + "  int_part INT," + "  bigint_part BIGINT," + "  smallint_part SMALLINT,"
            + "  string_part STRING," + "  varchar_part VARCHAR(50)," + "  timestamp_part TIMESTAMP,"
            + "  date_part DATE," + "  char_part CHAR(10)" + ") STORED AS parquet ");

    executeQuery(hiveDriver, "INSERT OVERWRITE TABLE readtest_parquet " + "PARTITION (" +
    // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
    // "  binary_part='binary', " +
            "  boolean_part='true', " + "  tinyint_part='64', " + "  decimal0_part='36.9', "
            + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', "
            + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', "
            + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', "
            + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', "
            + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', "
            + "  date_part='2013-07-05', " + "  char_part='char'" + ") " + " SELECT " + "  binary_field,"
            + "  boolean_field," + "  tinyint_field," + "  decimal0_field," + "  decimal9_field,"
            + "  decimal18_field," + "  decimal28_field," + "  decimal38_field," + "  double_field,"
            + "  float_field," + "  int_field," + "  bigint_field," + "  smallint_field," + "  string_field,"
            + "  varchar_field," + "  timestamp_field," + "  char_field"
            + " FROM readtest WHERE tinyint_part = 64");

    // Add a second partition to table 'readtest_parquet' which contains the same values as the first partition except
    // for tinyint_part partition column
    executeQuery(hiveDriver, "ALTER TABLE readtest_parquet ADD PARTITION ( " +
    // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now.
    // "  binary_part='binary', " +
            "  boolean_part='true', " + "  tinyint_part='65', " + "  decimal0_part='36.9', "
            + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', "
            + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', "
            + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', "
            + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', "
            + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', "
            + "  date_part='2013-07-05', " + "  char_part='char')");

    // create a Hive view to test how its metadata is populated in Drill's INFORMATION_SCHEMA
    executeQuery(hiveDriver, "CREATE VIEW IF NOT EXISTS hiveview AS SELECT * FROM kv");

    executeQuery(hiveDriver,
            "CREATE TABLE IF NOT EXISTS "
                    + "partition_pruning_test_loadtable(a DATE, b TIMESTAMP, c INT, d INT, e INT) "
                    + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver,
            String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE partition_pruning_test_loadtable",
                    generateTestDataFileForPartitionInput()));

    // create partitioned hive table to test partition pruning
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS partition_pruning_test(a DATE, b TIMESTAMP) "
            + "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) "
            + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable");

    // Add a partition with custom location
    executeQuery(hiveDriver,
            String.format("ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'",
                    getTempDir("part1")));
    executeQuery(hiveDriver, String.format(
            "INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) "
                    + "SELECT '%s', '%s' FROM kv LIMIT 1",
            new Date(System.currentTimeMillis()).toString(),
            new Timestamp(System.currentTimeMillis()).toString()));

    executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable");

    // Create a partitioned parquet table (DRILL-3938)
    executeQuery(hiveDriver,
            "CREATE TABLE kv_parquet(key INT, value STRING) PARTITIONED BY (part1 int) STORED AS PARQUET");
    executeQuery(hiveDriver,
            "INSERT INTO TABLE kv_parquet PARTITION(part1) SELECT key, value, key FROM default.kv");
    executeQuery(hiveDriver, "ALTER TABLE kv_parquet ADD COLUMNS (newcol string)");

    executeQuery(hiveDriver, "CREATE TABLE countStar_Parquet (int_field INT) STORED AS parquet");

    final int numOfRows = 200;
    final StringBuffer sb = new StringBuffer();
    sb.append("VALUES ");
    for (int i = 0; i < numOfRows; ++i) {
        if (i != 0) {
            sb.append(",");
        }
        sb.append("(").append(i).append(")");
    }

    executeQuery(hiveDriver, "INSERT INTO TABLE countStar_Parquet \n" + sb.toString());

    // Create a StorageHandler based table (DRILL-3739)
    executeQuery(hiveDriver, "CREATE TABLE kv_sh(key INT, value STRING) STORED BY "
            + "'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'");
    // Insert fails if the table directory already exists for tables with DefaultStorageHandlers. Its a known
    // issue in Hive. So delete the table directory created as part of the CREATE TABLE
    FileUtils.deleteQuietly(new File(whDir, "kv_sh"));
    //executeQuery(hiveDriver, "INSERT OVERWRITE TABLE kv_sh SELECT * FROM kv");

    // Create text tables with skip header and footer table property
    executeQuery(hiveDriver, "create database if not exists skipper");
    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_text_small", "textfile", "1", "1"));
    executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_small", 5, 1, 1));

    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_text_large", "textfile", "2", "2"));
    executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_large", 5000, 2, 2));

    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_header", "textfile", "A", "1"));
    executeQuery(hiveDriver,
            generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_header", 5, 1, 1));

    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_footer", "textfile", "1", "A"));
    executeQuery(hiveDriver,
            generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_footer", 5, 1, 1));

    // Create rcfile table with skip header and footer table property
    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_rcfile_large", "rcfile", "1", "1"));
    executeQuery(hiveDriver, "insert into table skipper.kv_rcfile_large select * from skipper.kv_text_large");

    // Create parquet table with skip header and footer table property
    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_parquet_large", "parquet", "1", "1"));
    executeQuery(hiveDriver, "insert into table skipper.kv_parquet_large select * from skipper.kv_text_large");

    // Create sequencefile table with skip header and footer table property
    executeQuery(hiveDriver,
            createTableWithHeaderFooterProperties("skipper.kv_sequencefile_large", "sequencefile", "1", "1"));
    executeQuery(hiveDriver,
            "insert into table skipper.kv_sequencefile_large select * from skipper.kv_text_large");

    // Create a table based on json file
    executeQuery(hiveDriver, "create table default.simple_json(json string)");
    final String loadData = String.format("load data local inpath '" + Resources.getResource("simple.json")
            + "' into table default.simple_json");
    executeQuery(hiveDriver, loadData);
    ss.close();
}

From source file:org.apache.drill.exec.store.model.DrillModelWriter.java

License:Apache License

@Override
public void init(Map<String, String> writerOptions) throws IOException {
    this.location = writerOptions.get("location");
    this.prefix = writerOptions.get("prefix");
    //    this.fieldDelimiter = writerOptions.get("separator");
    this.extension = writerOptions.get("extension");

    Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY));
    this.fs = FileSystem.get(conf);

    //    this.currentRecord = new Byte;//new StringBuilder();
    this.index = 0;
}

From source file:org.apache.drill.exec.store.parquet.ParquetFormatPlugin.java

License:Apache License

public RecordWriter getRecordWriter(FragmentContext context, ParquetWriter writer)
        throws IOException, OutOfMemoryException {
    Map<String, String> options = Maps.newHashMap();

    options.put("location", writer.getLocation());

    FragmentHandle handle = context.getHandle();
    String fragmentId = String.format("%d_%d", handle.getMajorFragmentId(), handle.getMinorFragmentId());
    options.put("prefix", fragmentId);

    options.put(FileSystem.FS_DEFAULT_NAME_KEY, ((FileSystemConfig) writer.getStorageConfig()).connection);

    options.put(ExecConstants.PARQUET_BLOCK_SIZE,
            context.getOptions().getOption(ExecConstants.PARQUET_BLOCK_SIZE).num_val.toString());
    options.put(ExecConstants.PARQUET_PAGE_SIZE,
            context.getOptions().getOption(ExecConstants.PARQUET_PAGE_SIZE).num_val.toString());
    options.put(ExecConstants.PARQUET_DICT_PAGE_SIZE,
            context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE).num_val.toString());

    options.put(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE,
            context.getOptions().getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).string_val);

    options.put(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING,
            context.getOptions().getOption(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING).bool_val
                    .toString());/*from w w w  . j a  v  a 2 s . c  o m*/

    RecordWriter recordWriter = new ParquetRecordWriter(context, writer);
    recordWriter.init(options);

    return recordWriter;
}

From source file:org.apache.drill.exec.store.parquet.ParquetRecordWriter.java

License:Apache License

@Override
public void init(Map<String, String> writerOptions) throws IOException {
    this.location = writerOptions.get("location");
    this.prefix = writerOptions.get("prefix");

    conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY));
    blockSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_BLOCK_SIZE));
    pageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_PAGE_SIZE));
    dictionaryPageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_DICT_PAGE_SIZE));
    String codecName = writerOptions.get(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).toLowerCase();
    switch (codecName) {
    case "snappy":
        codec = CompressionCodecName.SNAPPY;
        break;/*from   w  ww  .  j a  va2  s.c  o  m*/
    case "lzo":
        codec = CompressionCodecName.LZO;
        break;
    case "gzip":
        codec = CompressionCodecName.GZIP;
        break;
    case "none":
    case "uncompressed":
        codec = CompressionCodecName.UNCOMPRESSED;
        break;
    default:
        throw new UnsupportedOperationException(String.format("Unknown compression type: %s", codecName));
    }

    enableDictionary = Boolean
            .parseBoolean(writerOptions.get(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
}

From source file:org.apache.drill.exec.store.parquet.TestFileGenerator.java

License:Apache License

public static void generateParquetFile(String filename, ParquetTestProperties props) throws Exception {

    int currentBooleanByte = 0;
    WrapAroundCounter booleanBitCounter = new WrapAroundCounter(7);

    Configuration configuration = new Configuration();
    configuration.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///");
    //"message m { required int32 integer; required int64 integer64; required boolean b; required float f; required double d;}"

    FileSystem fs = FileSystem.get(configuration);
    Path path = new Path(filename);
    if (fs.exists(path)) {
        fs.delete(path, false);//from  w w w.j a v a 2s.  co  m
    }

    String messageSchema = "message m {";
    for (FieldInfo fieldInfo : props.fields.values()) {
        messageSchema += " required " + fieldInfo.parquetType + " " + fieldInfo.name + ";";
    }
    // remove the last semicolon, java really needs a join method for strings...
    // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug
    //messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1);
    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap<>();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++) {
        w.startBlock(props.recordsPerRowGroup);
        currentBooleanByte = 0;
        booleanBitCounter.reset();

        for (FieldInfo fieldInfo : props.fields.values()) {

            if (!columnValuesWritten.containsKey(fieldInfo.name)) {
                columnValuesWritten.put(fieldInfo.name, 0);
                valsWritten = 0;
            } else {
                valsWritten = columnValuesWritten.get(fieldInfo.name);
            }

            String[] path1 = { fieldInfo.name };
            ColumnDescriptor c1 = schema.getColumnDescription(path1);

            w.startColumn(c1, props.recordsPerRowGroup, codec);
            final int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
            final int PAGE_SIZE = 1024 * 1024; // 1 MB
            byte[] bytes;
            RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter(
                    MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage, PAGE_SIZE,
                    new DirectByteBufferAllocator());
            RunLengthBitPackingHybridValuesWriter repLevels = new RunLengthBitPackingHybridValuesWriter(
                    MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage, PAGE_SIZE,
                    new DirectByteBufferAllocator());
            // for variable length binary fields
            int bytesNeededToEncodeLength = 4;
            if (fieldInfo.bitLength > 0) {
                bytes = new byte[(int) Math.ceil(valsPerPage * fieldInfo.bitLength / 8.0)];
            } else {
                // the twelve at the end is to account for storing a 4 byte length with each value
                int totalValLength = ((byte[]) fieldInfo.values[0]).length
                        + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length
                        + 3 * bytesNeededToEncodeLength;
                // used for the case where there is a number of values in this row group that is not divisible by 3
                int leftOverBytes = 0;
                if (valsPerPage % 3 > 0) {
                    leftOverBytes += ((byte[]) fieldInfo.values[1]).length + bytesNeededToEncodeLength;
                }
                if (valsPerPage % 3 > 1) {
                    leftOverBytes += ((byte[]) fieldInfo.values[2]).length + bytesNeededToEncodeLength;
                }
                bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
            }
            int bytesPerPage = (int) (valsPerPage * (fieldInfo.bitLength / 8.0));
            int bytesWritten = 0;
            for (int z = 0; z < fieldInfo.numberOfPages; z++, bytesWritten = 0) {
                for (int i = 0; i < valsPerPage; i++) {
                    repLevels.writeInteger(0);
                    defLevels.writeInteger(1);
                    //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
                    if (fieldInfo.values[0] instanceof Boolean) {

                        bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val]
                                & ((boolean) fieldInfo.values[valsWritten % 3] ? allBitsTrue : allBitsFalse);
                        booleanBitCounter.increment();
                        if (booleanBitCounter.val == 0) {
                            currentBooleanByte++;
                        }
                        valsWritten++;
                        if (currentBooleanByte > bytesPerPage) {
                            break;
                        }
                    } else {
                        if (fieldInfo.values[valsWritten % 3] instanceof byte[]) {
                            System.arraycopy(
                                    ByteArrayUtil.toByta(((byte[]) fieldInfo.values[valsWritten % 3]).length),
                                    0, bytes, bytesWritten, bytesNeededToEncodeLength);
                            System.arraycopy(fieldInfo.values[valsWritten % 3], 0, bytes,
                                    bytesWritten + bytesNeededToEncodeLength,
                                    ((byte[]) fieldInfo.values[valsWritten % 3]).length);
                            bytesWritten += ((byte[]) fieldInfo.values[valsWritten % 3]).length
                                    + bytesNeededToEncodeLength;
                        } else {
                            System.arraycopy(ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]), 0, bytes,
                                    i * (fieldInfo.bitLength / 8), fieldInfo.bitLength / 8);
                        }
                        valsWritten++;
                    }

                }
                byte[] fullPage = new byte[2 * 4 * valsPerPage + bytes.length];
                byte[] repLevelBytes = repLevels.getBytes().toByteArray();
                byte[] defLevelBytes = defLevels.getBytes().toByteArray();
                System.arraycopy(bytes, 0, fullPage, 0, bytes.length);
                System.arraycopy(repLevelBytes, 0, fullPage, bytes.length, repLevelBytes.length);
                System.arraycopy(defLevelBytes, 0, fullPage, bytes.length + repLevelBytes.length,
                        defLevelBytes.length);
                w.writeDataPage((props.recordsPerRowGroup / fieldInfo.numberOfPages), fullPage.length,
                        BytesInput.from(fullPage), RLE, RLE, PLAIN);
                currentBooleanByte = 0;
            }
            w.endColumn();
            columnValuesWritten.remove(fieldInfo.name);
            columnValuesWritten.put(fieldInfo.name, valsWritten);
        }

        w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
}

From source file:org.apache.drill.exec.store.parquet.TestParquetFilterPushDown.java

License:Apache License

@BeforeClass
public static void initFSAndCreateFragContext() throws Exception {
    fragContext = new FragmentContext(bits[0].getContext(), BitControl.PlanFragment.getDefaultInstance(), null,
            bits[0].getContext().getFunctionImplementationRegistry());

    Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "local");

    fs = FileSystem.get(conf);/*from  w w w.  j av a2  s.  c  o  m*/
}

From source file:org.apache.drill.exec.store.sys.local.FilePStore.java

License:Apache License

public static DrillFileSystem getFileSystem(DrillConfig config, Path root) throws IOException {
    Path blobRoot = root == null ? getLogDir() : root;
    Configuration fsConf = new Configuration();
    if (blobRoot.toUri().getScheme() != null) {
        fsConf.set(FileSystem.FS_DEFAULT_NAME_KEY, blobRoot.toUri().toString());
    }/*from w  w  w  . j  ava  2 s  .c  o m*/

    DrillFileSystem fs = new DrillFileSystem(fsConf);
    fs.mkdirs(blobRoot);
    return fs;
}

From source file:org.apache.drill.exec.store.text.DrillTextRecordWriter.java

License:Apache License

@Override
public void init(Map<String, String> writerOptions) throws IOException {
    this.location = writerOptions.get("location");
    this.prefix = writerOptions.get("prefix");
    this.fieldDelimiter = writerOptions.get("separator");
    this.extension = writerOptions.get("extension");

    Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY));
    this.fs = FileSystem.get(conf);

    this.currentRecord = new StringBuilder();
    this.index = 0;
}

From source file:org.apache.drill.exec.work.batch.FileTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "sync:///");
    System.out.println(FileSystem.getDefaultUri(conf));
    FileSystem fs = FileSystem.get(conf);
    //    FileSystem fs = new LocalSyncableFileSystem(conf);
    Path path = new Path("/tmp/testFile");
    FSDataOutputStream out = fs.create(path);
    byte[] s = "hello world".getBytes();
    out.write(s);/*from   w  w w. ja  v a 2s  .c o m*/
    out.sync();
    //    out.close();
    FSDataInputStream in = fs.open(path);
    byte[] bytes = new byte[s.length];
    in.read(bytes);
    System.out.println(new String(bytes));
    File file = new File("/tmp/testFile");
    FileOutputStream fos = new FileOutputStream(file);
    FileInputStream fis = new FileInputStream(file);
    fos.write(s);
    fos.getFD().sync();
    fis.read(bytes);
    System.out.println(new String(bytes));
    out = fs.create(new Path("/tmp/file"));
    for (int i = 0; i < 100; i++) {
        bytes = new byte[256 * 1024];
        Stopwatch watch = Stopwatch.createStarted();
        out.write(bytes);
        out.sync();
        long t = watch.elapsed(TimeUnit.MILLISECONDS);
        System.out.printf("Elapsed: %d. Rate %d.\n", t, (long) ((long) bytes.length * 1000L / t));
    }
}

From source file:org.apache.drill.exec.work.batch.SpoolingRawBatchBuffer.java

License:Apache License

private synchronized void initSpooler() throws IOException {
    if (spooler != null) {
        return;//from w  w  w.j  a  v  a 2  s .  c  o m
    }

    Configuration conf = new Configuration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, context.getConfig().getString(ExecConstants.TEMP_FILESYSTEM));
    conf.set(DRILL_LOCAL_IMPL_STRING, LocalSyncableFileSystem.class.getName());
    fs = FileSystem.get(conf);
    path = getPath();
    outputStream = fs.create(path);
    final String spoolingThreadName = QueryIdHelper.getExecutorThreadName(context.getHandle())
            .concat(":Spooler-" + oppositeId + "-" + bufferIndex);
    spooler = new Spooler(spoolingThreadName);
    spooler.start();
}