Example usage for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java

License:Apache License

public AvroCounterByBlock(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;//  ww  w .ja  va2 s  .co  m
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
        inputFileList.add(hdfs.getFileStatus(inPath));
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
            }
        }
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        DataFileStream<Object> dfs = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        dfs = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;
        while (dfs.hasNext()) {

            numAvroRecords = numAvroRecords + dfs.getBlockCount();
            thisFileRecords = thisFileRecords + dfs.getBlockCount();

            //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." );

            dfs.nextBlock();
        }

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        dfs.close();
        inStream.close();

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.
    }

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");

}

From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java

License:Apache License

public AvroCounterByRecord(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;//  www  .  j a v  a2s  .  c o  m
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
        inputFileList.add(hdfs.getFileStatus(inPath));
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
            }
        }
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        DataFileStream<Object> avroStream = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        avroStream = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;

        while (avroStream.hasNext()) {
            numAvroRecords++;
            thisFileRecords++;
            avroStream.next();
        }
        avroStream.close();
        inStream.close();

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.
    }

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");

}

From source file:com.bah.culvert.adapter.TableAdapter.java

License:Apache License

public static void setTableName(Configuration conf, String tableName) {
    conf.set(TableAdapter.TABLE_NAME_SETTING_KEY, tableName);
}

From source file:com.bah.culvert.Client.java

License:Apache License

/**
 * Set the database the client is currently talking to
 * @param db DatabaseAdapter to connect to the database
 * @param conf Top level configuration to pack the database's configuration in
 *///from   www  .j  a va 2s . c  o m
public static void setDatabase(DatabaseAdapter db, Configuration conf) {
    conf.set(DATABASE_ADAPTER_CONF_KEY, db.getClass().getName());
    ConfUtils.packConfigurationInPrefix(DATABASE_ADAPTER_CONF_PREFIX, db.getConf(), conf);
}

From source file:com.bah.culvert.configuration.CConfiguration.java

License:Apache License

/**
 * Creates a clone of passed configuration.
 * //w w w. java 2 s. c o  m
 * @param that Configuration to clone.
 * @return a Configuration created with the culvert-*.xml files plus the given
 *         configuration.
 */
public static Configuration create(final Configuration that) {
    Configuration conf = create();
    for (Entry<String, String> e : that) {
        conf.set(e.getKey(), e.getValue());
    }
    return conf;
}

From source file:com.bah.culvert.configuration.ConfigurationTest.java

License:Apache License

@Test
public void incorporatesNewConfItems() {
    Configuration conf = new Configuration();
    String v1 = conf.iterator().next().getKey();
    conf.set(v1, "a special value");

    Configuration c2 = CConfiguration.create(conf);
    Assert.assertEquals("a special value", c2.get(v1));
}

From source file:com.bah.culvert.data.index.Index.java

License:Apache License

/**
 * Set the index name./*from   w w  w. jav a 2  s . co m*/
 * @param name The name of the index.
 * @param conf The configuration to set.
 */
public static void setIndexName(String name, Configuration conf) {
    conf.set(NAME_CONF_KEY, name);
}

From source file:com.bah.culvert.data.index.Index.java

License:Apache License

/**
 * Set the name of the data table containing the indexed row tuple.
 * @param table The name of the data table.
 * @param conf The configuration to set.
 *//*  w ww  .  j  a va  2 s  .c o  m*/
public static void setPrimaryTable(String table, Configuration conf) {
    conf.set(PRIMARY_TABLE_CONF_KEY, table);
}

From source file:com.bah.culvert.data.index.Index.java

License:Apache License

/**
 * Set the name of the index table. This should refer to the actual name of
 * the index table. This is can be different than the name of the index.
 * @param table The name of the index table.
 * @param conf The configuration to set.
 *///from   ww  w.  jav a2s. co m
public static void setIndexTable(String table, Configuration conf) {
    conf.set(INDEX_TABLE_CONF_KEY, table);
}

From source file:com.bah.culvert.data.index.Index.java

License:Apache License

/**
 * Set the column family of the column tuple that is being indexed.
 * @param colFam The column family./* w  w  w  .  j a v  a 2 s .c  om*/
 * @param conf The configuration to set.
 */
public static void setColumnFamily(String colFam, Configuration conf) {
    conf.set(COL_FAM_CONF_KEY, colFam);
}