List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java
License:Apache License
public AvroCounterByBlock(String inDirStr) throws IOException { long numAvroRecords = 0; //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null;// ww w .ja va2 s .co m try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input dirs/file exists and get file list (even if list of single file) Path inPath = new Path(inDirStr); if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file inputFileList.add(hdfs.getFileStatus(inPath)); } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir //Get list of input files RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } } else { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } DataFileStream<Object> dfs = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); dfs = new DataFileStream<Object>(inStream, reader); long thisFileRecords = 0; while (dfs.hasNext()) { numAvroRecords = numAvroRecords + dfs.getBlockCount(); thisFileRecords = thisFileRecords + dfs.getBlockCount(); //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." ); dfs.nextBlock(); } System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records."); dfs.close(); inStream.close(); //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die. } System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and " + numAvroRecords + " total records."); }
From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java
License:Apache License
public AvroCounterByRecord(String inDirStr) throws IOException { long numAvroRecords = 0; //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null;// www . j a v a2s . c o m try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input dirs/file exists and get file list (even if list of single file) Path inPath = new Path(inDirStr); if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file inputFileList.add(hdfs.getFileStatus(inPath)); } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir //Get list of input files RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } } else { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } DataFileStream<Object> avroStream = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); avroStream = new DataFileStream<Object>(inStream, reader); long thisFileRecords = 0; while (avroStream.hasNext()) { numAvroRecords++; thisFileRecords++; avroStream.next(); } avroStream.close(); inStream.close(); System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records."); //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die. } System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and " + numAvroRecords + " total records."); }
From source file:com.bah.culvert.adapter.TableAdapter.java
License:Apache License
public static void setTableName(Configuration conf, String tableName) { conf.set(TableAdapter.TABLE_NAME_SETTING_KEY, tableName); }
From source file:com.bah.culvert.Client.java
License:Apache License
/** * Set the database the client is currently talking to * @param db DatabaseAdapter to connect to the database * @param conf Top level configuration to pack the database's configuration in *///from www .j a va 2s . c o m public static void setDatabase(DatabaseAdapter db, Configuration conf) { conf.set(DATABASE_ADAPTER_CONF_KEY, db.getClass().getName()); ConfUtils.packConfigurationInPrefix(DATABASE_ADAPTER_CONF_PREFIX, db.getConf(), conf); }
From source file:com.bah.culvert.configuration.CConfiguration.java
License:Apache License
/** * Creates a clone of passed configuration. * //w w w. java 2 s. c o m * @param that Configuration to clone. * @return a Configuration created with the culvert-*.xml files plus the given * configuration. */ public static Configuration create(final Configuration that) { Configuration conf = create(); for (Entry<String, String> e : that) { conf.set(e.getKey(), e.getValue()); } return conf; }
From source file:com.bah.culvert.configuration.ConfigurationTest.java
License:Apache License
@Test public void incorporatesNewConfItems() { Configuration conf = new Configuration(); String v1 = conf.iterator().next().getKey(); conf.set(v1, "a special value"); Configuration c2 = CConfiguration.create(conf); Assert.assertEquals("a special value", c2.get(v1)); }
From source file:com.bah.culvert.data.index.Index.java
License:Apache License
/** * Set the index name./*from w w w. jav a 2 s . co m*/ * @param name The name of the index. * @param conf The configuration to set. */ public static void setIndexName(String name, Configuration conf) { conf.set(NAME_CONF_KEY, name); }
From source file:com.bah.culvert.data.index.Index.java
License:Apache License
/** * Set the name of the data table containing the indexed row tuple. * @param table The name of the data table. * @param conf The configuration to set. *//* w ww . j a va 2 s .c o m*/ public static void setPrimaryTable(String table, Configuration conf) { conf.set(PRIMARY_TABLE_CONF_KEY, table); }
From source file:com.bah.culvert.data.index.Index.java
License:Apache License
/** * Set the name of the index table. This should refer to the actual name of * the index table. This is can be different than the name of the index. * @param table The name of the index table. * @param conf The configuration to set. *///from ww w. jav a2s. co m public static void setIndexTable(String table, Configuration conf) { conf.set(INDEX_TABLE_CONF_KEY, table); }
From source file:com.bah.culvert.data.index.Index.java
License:Apache License
/** * Set the column family of the column tuple that is being indexed. * @param colFam The column family./* w w w . j a v a 2 s .c om*/ * @param conf The configuration to set. */ public static void setColumnFamily(String colFam, Configuration conf) { conf.set(COL_FAM_CONF_KEY, colFam); }