List of usage examples for org.apache.hadoop.fs FileSystem FS_DEFAULT_NAME_KEY
String FS_DEFAULT_NAME_KEY
To view the source code for org.apache.hadoop.fs FileSystem FS_DEFAULT_NAME_KEY.
Click Source Link
From source file:org.apache.drill.exec.store.hive.HiveTestDataGenerator.java
License:Apache License
private void generateTestData() throws Exception { HiveConf conf = new HiveConf(SessionState.class); conf.set("javax.jdo.option.ConnectionURL", String.format("jdbc:derby:;databaseName=%s;create=true", dbDir)); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///"); conf.set("hive.metastore.warehouse.dir", whDir); conf.set("mapred.job.tracker", "local"); conf.set(ConfVars.SCRATCHDIR.varname, getTempDir("scratch_dir")); conf.set(ConfVars.LOCALSCRATCHDIR.varname, getTempDir("local_scratch_dir")); conf.set(ConfVars.DYNAMICPARTITIONINGMODE.varname, "nonstrict"); SessionState ss = new SessionState(conf); SessionState.start(ss);//from w w w . j av a 2s . c o m Driver hiveDriver = new Driver(conf); // generate (key, value) test data String testDataFile = generateTestDataFile(); // Create a (key, value) schema table with Text SerDe which is available in hive-serdes.jar executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS default.kv(key INT, value STRING) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); executeQuery(hiveDriver, "LOAD DATA LOCAL INPATH '" + testDataFile + "' OVERWRITE INTO TABLE default.kv"); // Create a (key, value) schema table in non-default database with RegexSerDe which is available in hive-contrib.jar // Table with RegExSerde is expected to have columns of STRING type only. executeQuery(hiveDriver, "CREATE DATABASE IF NOT EXISTS db1"); executeQuery(hiveDriver, "CREATE TABLE db1.kv_db1(key STRING, value STRING) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' " + "WITH SERDEPROPERTIES (" + " \"input.regex\" = \"([0-9]*), (.*_[0-9]*)\", " + " \"output.format.string\" = \"%1$s, %2$s\"" + ") "); executeQuery(hiveDriver, "INSERT INTO TABLE db1.kv_db1 SELECT * FROM default.kv"); // Create an Avro format based table backed by schema in a separate file final String avroCreateQuery = String.format( "CREATE TABLE db1.avro " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' " + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' " + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' " + "TBLPROPERTIES ('avro.schema.url'='file:///%s')", BaseTestQuery.getPhysicalFileFromResource("avro_test_schema.json").replace('\\', '/')); executeQuery(hiveDriver, avroCreateQuery); executeQuery(hiveDriver, "INSERT INTO TABLE db1.avro SELECT * FROM default.kv"); executeQuery(hiveDriver, "USE default"); // create a table with no data executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS empty_table(a INT, b STRING)"); // delete the table location of empty table File emptyTableLocation = new File(whDir, "empty_table"); if (emptyTableLocation.exists()) { FileUtils.forceDelete(emptyTableLocation); } // create a Hive table that has columns with data types which are supported for reading in Drill. testDataFile = generateAllTypesDataFile(); executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS readtest (" + " binary_field BINARY," + " boolean_field BOOLEAN," + " tinyint_field TINYINT," + " decimal0_field DECIMAL," + " decimal9_field DECIMAL(6, 2)," + " decimal18_field DECIMAL(15, 5)," + " decimal28_field DECIMAL(23, 1)," + " decimal38_field DECIMAL(30, 3)," + " double_field DOUBLE," + " float_field FLOAT," + " int_field INT," + " bigint_field BIGINT," + " smallint_field SMALLINT," + " string_field STRING," + " varchar_field VARCHAR(50)," + " timestamp_field TIMESTAMP," + " date_field DATE," + " char_field CHAR(10)" + ") PARTITIONED BY (" + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part BINARY," + " boolean_part BOOLEAN," + " tinyint_part TINYINT," + " decimal0_part DECIMAL," + " decimal9_part DECIMAL(6, 2)," + " decimal18_part DECIMAL(15, 5)," + " decimal28_part DECIMAL(23, 1)," + " decimal38_part DECIMAL(30, 3)," + " double_part DOUBLE," + " float_part FLOAT," + " int_part INT," + " bigint_part BIGINT," + " smallint_part SMALLINT," + " string_part STRING," + " varchar_part VARCHAR(50)," + " timestamp_part TIMESTAMP," + " date_part DATE," + " char_part CHAR(10)" + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " + "TBLPROPERTIES ('serialization.null.format'='') "); // Add a partition to table 'readtest' executeQuery(hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='64', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05', " + " char_part='char')"); // Add a second partition to table 'readtest' which contains the same values as the first partition except // for tinyint_part partition column executeQuery(hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='65', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05', " + " char_part='char')"); // Load data into table 'readtest' executeQuery(hiveDriver, String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE default.readtest PARTITION (" + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='64', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05'," + " char_part='char'" + ")", testDataFile)); // create a table that has all Hive types. This is to test how hive tables metadata is populated in // Drill's INFORMATION_SCHEMA. executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS infoschematest(" + "booleanType BOOLEAN, " + "tinyintType TINYINT, " + "smallintType SMALLINT, " + "intType INT, " + "bigintType BIGINT, " + "floatType FLOAT, " + "doubleType DOUBLE, " + "dateType DATE, " + "timestampType TIMESTAMP, " + "binaryType BINARY, " + "decimalType DECIMAL(38, 2), " + "stringType STRING, " + "varCharType VARCHAR(20), " + "listType ARRAY<STRING>, " + "mapType MAP<STRING,INT>, " + "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " + "uniontypeType UNIONTYPE<int, double, array<string>>, " + "charType CHAR(10))"); /** * Create a PARQUET table with all supported types. */ executeQuery(hiveDriver, "CREATE TABLE readtest_parquet (" + " binary_field BINARY, " + " boolean_field BOOLEAN, " + " tinyint_field TINYINT," + " decimal0_field DECIMAL," + " decimal9_field DECIMAL(6, 2)," + " decimal18_field DECIMAL(15, 5)," + " decimal28_field DECIMAL(23, 1)," + " decimal38_field DECIMAL(30, 3)," + " double_field DOUBLE," + " float_field FLOAT," + " int_field INT," + " bigint_field BIGINT," + " smallint_field SMALLINT," + " string_field STRING," + " varchar_field VARCHAR(50)," + " timestamp_field TIMESTAMP," + " char_field CHAR(10)" + ") PARTITIONED BY (" + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part BINARY," + " boolean_part BOOLEAN," + " tinyint_part TINYINT," + " decimal0_part DECIMAL," + " decimal9_part DECIMAL(6, 2)," + " decimal18_part DECIMAL(15, 5)," + " decimal28_part DECIMAL(23, 1)," + " decimal38_part DECIMAL(30, 3)," + " double_part DOUBLE," + " float_part FLOAT," + " int_part INT," + " bigint_part BIGINT," + " smallint_part SMALLINT," + " string_part STRING," + " varchar_part VARCHAR(50)," + " timestamp_part TIMESTAMP," + " date_part DATE," + " char_part CHAR(10)" + ") STORED AS parquet "); executeQuery(hiveDriver, "INSERT OVERWRITE TABLE readtest_parquet " + "PARTITION (" + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='64', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05', " + " char_part='char'" + ") " + " SELECT " + " binary_field," + " boolean_field," + " tinyint_field," + " decimal0_field," + " decimal9_field," + " decimal18_field," + " decimal28_field," + " decimal38_field," + " double_field," + " float_field," + " int_field," + " bigint_field," + " smallint_field," + " string_field," + " varchar_field," + " timestamp_field," + " char_field" + " FROM readtest WHERE tinyint_part = 64"); // Add a second partition to table 'readtest_parquet' which contains the same values as the first partition except // for tinyint_part partition column executeQuery(hiveDriver, "ALTER TABLE readtest_parquet ADD PARTITION ( " + // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. // " binary_part='binary', " + " boolean_part='true', " + " tinyint_part='65', " + " decimal0_part='36.9', " + " decimal9_part='36.9', " + " decimal18_part='3289379872.945645', " + " decimal28_part='39579334534534.35345', " + " decimal38_part='363945093845093890.9', " + " double_part='8.345', " + " float_part='4.67', " + " int_part='123456', " + " bigint_part='234235', " + " smallint_part='3455', " + " string_part='string', " + " varchar_part='varchar', " + " timestamp_part='2013-07-05 17:01:00', " + " date_part='2013-07-05', " + " char_part='char')"); // create a Hive view to test how its metadata is populated in Drill's INFORMATION_SCHEMA executeQuery(hiveDriver, "CREATE VIEW IF NOT EXISTS hiveview AS SELECT * FROM kv"); executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS " + "partition_pruning_test_loadtable(a DATE, b TIMESTAMP, c INT, d INT, e INT) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); executeQuery(hiveDriver, String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE partition_pruning_test_loadtable", generateTestDataFileForPartitionInput())); // create partitioned hive table to test partition pruning executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS partition_pruning_test(a DATE, b TIMESTAMP) " + "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) " + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable"); // Add a partition with custom location executeQuery(hiveDriver, String.format("ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'", getTempDir("part1"))); executeQuery(hiveDriver, String.format( "INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) " + "SELECT '%s', '%s' FROM kv LIMIT 1", new Date(System.currentTimeMillis()).toString(), new Timestamp(System.currentTimeMillis()).toString())); executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable"); // Create a partitioned parquet table (DRILL-3938) executeQuery(hiveDriver, "CREATE TABLE kv_parquet(key INT, value STRING) PARTITIONED BY (part1 int) STORED AS PARQUET"); executeQuery(hiveDriver, "INSERT INTO TABLE kv_parquet PARTITION(part1) SELECT key, value, key FROM default.kv"); executeQuery(hiveDriver, "ALTER TABLE kv_parquet ADD COLUMNS (newcol string)"); executeQuery(hiveDriver, "CREATE TABLE countStar_Parquet (int_field INT) STORED AS parquet"); final int numOfRows = 200; final StringBuffer sb = new StringBuffer(); sb.append("VALUES "); for (int i = 0; i < numOfRows; ++i) { if (i != 0) { sb.append(","); } sb.append("(").append(i).append(")"); } executeQuery(hiveDriver, "INSERT INTO TABLE countStar_Parquet \n" + sb.toString()); // Create a StorageHandler based table (DRILL-3739) executeQuery(hiveDriver, "CREATE TABLE kv_sh(key INT, value STRING) STORED BY " + "'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'"); // Insert fails if the table directory already exists for tables with DefaultStorageHandlers. Its a known // issue in Hive. So delete the table directory created as part of the CREATE TABLE FileUtils.deleteQuietly(new File(whDir, "kv_sh")); //executeQuery(hiveDriver, "INSERT OVERWRITE TABLE kv_sh SELECT * FROM kv"); // Create text tables with skip header and footer table property executeQuery(hiveDriver, "create database if not exists skipper"); executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_small", "textfile", "1", "1")); executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_small", 5, 1, 1)); executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_large", "textfile", "2", "2")); executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_large", 5000, 2, 2)); executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_header", "textfile", "A", "1")); executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_header", 5, 1, 1)); executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_footer", "textfile", "1", "A")); executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_footer", 5, 1, 1)); // Create rcfile table with skip header and footer table property executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_rcfile_large", "rcfile", "1", "1")); executeQuery(hiveDriver, "insert into table skipper.kv_rcfile_large select * from skipper.kv_text_large"); // Create parquet table with skip header and footer table property executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_parquet_large", "parquet", "1", "1")); executeQuery(hiveDriver, "insert into table skipper.kv_parquet_large select * from skipper.kv_text_large"); // Create sequencefile table with skip header and footer table property executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_sequencefile_large", "sequencefile", "1", "1")); executeQuery(hiveDriver, "insert into table skipper.kv_sequencefile_large select * from skipper.kv_text_large"); // Create a table based on json file executeQuery(hiveDriver, "create table default.simple_json(json string)"); final String loadData = String.format("load data local inpath '" + Resources.getResource("simple.json") + "' into table default.simple_json"); executeQuery(hiveDriver, loadData); ss.close(); }
From source file:org.apache.drill.exec.store.model.DrillModelWriter.java
License:Apache License
@Override public void init(Map<String, String> writerOptions) throws IOException { this.location = writerOptions.get("location"); this.prefix = writerOptions.get("prefix"); // this.fieldDelimiter = writerOptions.get("separator"); this.extension = writerOptions.get("extension"); Configuration conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY)); this.fs = FileSystem.get(conf); // this.currentRecord = new Byte;//new StringBuilder(); this.index = 0; }
From source file:org.apache.drill.exec.store.parquet.ParquetFormatPlugin.java
License:Apache License
public RecordWriter getRecordWriter(FragmentContext context, ParquetWriter writer) throws IOException, OutOfMemoryException { Map<String, String> options = Maps.newHashMap(); options.put("location", writer.getLocation()); FragmentHandle handle = context.getHandle(); String fragmentId = String.format("%d_%d", handle.getMajorFragmentId(), handle.getMinorFragmentId()); options.put("prefix", fragmentId); options.put(FileSystem.FS_DEFAULT_NAME_KEY, ((FileSystemConfig) writer.getStorageConfig()).connection); options.put(ExecConstants.PARQUET_BLOCK_SIZE, context.getOptions().getOption(ExecConstants.PARQUET_BLOCK_SIZE).num_val.toString()); options.put(ExecConstants.PARQUET_PAGE_SIZE, context.getOptions().getOption(ExecConstants.PARQUET_PAGE_SIZE).num_val.toString()); options.put(ExecConstants.PARQUET_DICT_PAGE_SIZE, context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE).num_val.toString()); options.put(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE, context.getOptions().getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).string_val); options.put(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING, context.getOptions().getOption(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING).bool_val .toString());/*from w w w . j a v a 2 s . c o m*/ RecordWriter recordWriter = new ParquetRecordWriter(context, writer); recordWriter.init(options); return recordWriter; }
From source file:org.apache.drill.exec.store.parquet.ParquetRecordWriter.java
License:Apache License
@Override public void init(Map<String, String> writerOptions) throws IOException { this.location = writerOptions.get("location"); this.prefix = writerOptions.get("prefix"); conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY)); blockSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_BLOCK_SIZE)); pageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_PAGE_SIZE)); dictionaryPageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_DICT_PAGE_SIZE)); String codecName = writerOptions.get(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).toLowerCase(); switch (codecName) { case "snappy": codec = CompressionCodecName.SNAPPY; break;/*from w ww . j a va2 s.c o m*/ case "lzo": codec = CompressionCodecName.LZO; break; case "gzip": codec = CompressionCodecName.GZIP; break; case "none": case "uncompressed": codec = CompressionCodecName.UNCOMPRESSED; break; default: throw new UnsupportedOperationException(String.format("Unknown compression type: %s", codecName)); } enableDictionary = Boolean .parseBoolean(writerOptions.get(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING)); }
From source file:org.apache.drill.exec.store.parquet.TestFileGenerator.java
License:Apache License
public static void generateParquetFile(String filename, ParquetTestProperties props) throws Exception { int currentBooleanByte = 0; WrapAroundCounter booleanBitCounter = new WrapAroundCounter(7); Configuration configuration = new Configuration(); configuration.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///"); //"message m { required int32 integer; required int64 integer64; required boolean b; required float f; required double d;}" FileSystem fs = FileSystem.get(configuration); Path path = new Path(filename); if (fs.exists(path)) { fs.delete(path, false);//from w w w.j a v a 2s. co m } String messageSchema = "message m {"; for (FieldInfo fieldInfo : props.fields.values()) { messageSchema += " required " + fieldInfo.parquetType + " " + fieldInfo.name + ";"; } // remove the last semicolon, java really needs a join method for strings... // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug //messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1); messageSchema += "}"; MessageType schema = MessageTypeParser.parseMessageType(messageSchema); CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED; ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path); w.start(); HashMap<String, Integer> columnValuesWritten = new HashMap<>(); int valsWritten; for (int k = 0; k < props.numberRowGroups; k++) { w.startBlock(props.recordsPerRowGroup); currentBooleanByte = 0; booleanBitCounter.reset(); for (FieldInfo fieldInfo : props.fields.values()) { if (!columnValuesWritten.containsKey(fieldInfo.name)) { columnValuesWritten.put(fieldInfo.name, 0); valsWritten = 0; } else { valsWritten = columnValuesWritten.get(fieldInfo.name); } String[] path1 = { fieldInfo.name }; ColumnDescriptor c1 = schema.getColumnDescription(path1); w.startColumn(c1, props.recordsPerRowGroup, codec); final int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages); final int PAGE_SIZE = 1024 * 1024; // 1 MB byte[] bytes; RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter( MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage, PAGE_SIZE, new DirectByteBufferAllocator()); RunLengthBitPackingHybridValuesWriter repLevels = new RunLengthBitPackingHybridValuesWriter( MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage, PAGE_SIZE, new DirectByteBufferAllocator()); // for variable length binary fields int bytesNeededToEncodeLength = 4; if (fieldInfo.bitLength > 0) { bytes = new byte[(int) Math.ceil(valsPerPage * fieldInfo.bitLength / 8.0)]; } else { // the twelve at the end is to account for storing a 4 byte length with each value int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength; // used for the case where there is a number of values in this row group that is not divisible by 3 int leftOverBytes = 0; if (valsPerPage % 3 > 0) { leftOverBytes += ((byte[]) fieldInfo.values[1]).length + bytesNeededToEncodeLength; } if (valsPerPage % 3 > 1) { leftOverBytes += ((byte[]) fieldInfo.values[2]).length + bytesNeededToEncodeLength; } bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes]; } int bytesPerPage = (int) (valsPerPage * (fieldInfo.bitLength / 8.0)); int bytesWritten = 0; for (int z = 0; z < fieldInfo.numberOfPages; z++, bytesWritten = 0) { for (int i = 0; i < valsPerPage; i++) { repLevels.writeInteger(0); defLevels.writeInteger(1); //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : "")); if (fieldInfo.values[0] instanceof Boolean) { bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val] & ((boolean) fieldInfo.values[valsWritten % 3] ? allBitsTrue : allBitsFalse); booleanBitCounter.increment(); if (booleanBitCounter.val == 0) { currentBooleanByte++; } valsWritten++; if (currentBooleanByte > bytesPerPage) { break; } } else { if (fieldInfo.values[valsWritten % 3] instanceof byte[]) { System.arraycopy( ByteArrayUtil.toByta(((byte[]) fieldInfo.values[valsWritten % 3]).length), 0, bytes, bytesWritten, bytesNeededToEncodeLength); System.arraycopy(fieldInfo.values[valsWritten % 3], 0, bytes, bytesWritten + bytesNeededToEncodeLength, ((byte[]) fieldInfo.values[valsWritten % 3]).length); bytesWritten += ((byte[]) fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength; } else { System.arraycopy(ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]), 0, bytes, i * (fieldInfo.bitLength / 8), fieldInfo.bitLength / 8); } valsWritten++; } } byte[] fullPage = new byte[2 * 4 * valsPerPage + bytes.length]; byte[] repLevelBytes = repLevels.getBytes().toByteArray(); byte[] defLevelBytes = defLevels.getBytes().toByteArray(); System.arraycopy(bytes, 0, fullPage, 0, bytes.length); System.arraycopy(repLevelBytes, 0, fullPage, bytes.length, repLevelBytes.length); System.arraycopy(defLevelBytes, 0, fullPage, bytes.length + repLevelBytes.length, defLevelBytes.length); w.writeDataPage((props.recordsPerRowGroup / fieldInfo.numberOfPages), fullPage.length, BytesInput.from(fullPage), RLE, RLE, PLAIN); currentBooleanByte = 0; } w.endColumn(); columnValuesWritten.remove(fieldInfo.name); columnValuesWritten.put(fieldInfo.name, valsWritten); } w.endBlock(); } w.end(new HashMap<String, String>()); logger.debug("Finished generating parquet file."); }
From source file:org.apache.drill.exec.store.parquet.TestParquetFilterPushDown.java
License:Apache License
@BeforeClass public static void initFSAndCreateFragContext() throws Exception { fragContext = new FragmentContext(bits[0].getContext(), BitControl.PlanFragment.getDefaultInstance(), null, bits[0].getContext().getFunctionImplementationRegistry()); Configuration conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "local"); fs = FileSystem.get(conf);/*from w w w. j av a2 s. c o m*/ }
From source file:org.apache.drill.exec.store.sys.local.FilePStore.java
License:Apache License
public static DrillFileSystem getFileSystem(DrillConfig config, Path root) throws IOException { Path blobRoot = root == null ? getLogDir() : root; Configuration fsConf = new Configuration(); if (blobRoot.toUri().getScheme() != null) { fsConf.set(FileSystem.FS_DEFAULT_NAME_KEY, blobRoot.toUri().toString()); }/*from w w w . j ava 2 s .c o m*/ DrillFileSystem fs = new DrillFileSystem(fsConf); fs.mkdirs(blobRoot); return fs; }
From source file:org.apache.drill.exec.store.text.DrillTextRecordWriter.java
License:Apache License
@Override public void init(Map<String, String> writerOptions) throws IOException { this.location = writerOptions.get("location"); this.prefix = writerOptions.get("prefix"); this.fieldDelimiter = writerOptions.get("separator"); this.extension = writerOptions.get("extension"); Configuration conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY)); this.fs = FileSystem.get(conf); this.currentRecord = new StringBuilder(); this.index = 0; }
From source file:org.apache.drill.exec.work.batch.FileTest.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "sync:///"); System.out.println(FileSystem.getDefaultUri(conf)); FileSystem fs = FileSystem.get(conf); // FileSystem fs = new LocalSyncableFileSystem(conf); Path path = new Path("/tmp/testFile"); FSDataOutputStream out = fs.create(path); byte[] s = "hello world".getBytes(); out.write(s);/*from w w w. ja v a 2s .c o m*/ out.sync(); // out.close(); FSDataInputStream in = fs.open(path); byte[] bytes = new byte[s.length]; in.read(bytes); System.out.println(new String(bytes)); File file = new File("/tmp/testFile"); FileOutputStream fos = new FileOutputStream(file); FileInputStream fis = new FileInputStream(file); fos.write(s); fos.getFD().sync(); fis.read(bytes); System.out.println(new String(bytes)); out = fs.create(new Path("/tmp/file")); for (int i = 0; i < 100; i++) { bytes = new byte[256 * 1024]; Stopwatch watch = Stopwatch.createStarted(); out.write(bytes); out.sync(); long t = watch.elapsed(TimeUnit.MILLISECONDS); System.out.printf("Elapsed: %d. Rate %d.\n", t, (long) ((long) bytes.length * 1000L / t)); } }
From source file:org.apache.drill.exec.work.batch.SpoolingRawBatchBuffer.java
License:Apache License
private synchronized void initSpooler() throws IOException { if (spooler != null) { return;//from w w w.j a v a 2 s . c o m } Configuration conf = new Configuration(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, context.getConfig().getString(ExecConstants.TEMP_FILESYSTEM)); conf.set(DRILL_LOCAL_IMPL_STRING, LocalSyncableFileSystem.class.getName()); fs = FileSystem.get(conf); path = getPath(); outputStream = fs.create(path); final String spoolingThreadName = QueryIdHelper.getExecutorThreadName(context.getHandle()) .concat(":Spooler-" + oppositeId + "-" + bufferIndex); spooler = new Spooler(spoolingThreadName); spooler.start(); }