Example usage for org.apache.hadoop.conf Configuration set

List of usage examples for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults/*from www . j  a  v  a 2  s .  c  o m*/
    int numRowsA = 4;// 1024;
    int numColsA = 4;// 1024;
    int numRowsB = 4;// 1024;
    int numColsB = 4;// 1024;
    boolean isDebugging = true;

    Configuration conf = new HamaConfiguration();
    BSPJobClient jobClient = new BSPJobClient(conf);
    ClusterStatus cluster = jobClient.getClusterStatus(true);

    if (args.length > 0) {
        if (args.length == 6) {
            conf.setInt("bsp.peers.num", Integer.parseInt(args[0]));
            numRowsA = Integer.parseInt(args[1]);
            numColsA = Integer.parseInt(args[2]);
            numRowsB = Integer.parseInt(args[3]);
            numColsB = Integer.parseInt(args[4]);
            isDebugging = Boolean.parseBoolean(args[5]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numRowsA | Number of rows of the first input matrix");
            System.out.println("    Argument3=numColsA | Number of columns of the first input matrix");
            System.out.println("    Argument4=numRowsB | Number of rows of the second input matrix");
            System.out.println("    Argument5=numColsB | Number of columns of the second input matrix");
            System.out.println("    Argument6=debug | Enable debugging (true|false)");
            return;
        }
    } else {
        conf.setInt("bsp.peers.num", 1); // cluster.getMaxTasks());
        // Enable one GPU task
        conf.setInt("bsp.peers.gpu.num", 1);
    }

    conf.setBoolean("hama.pipes.logging", isDebugging);
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.set(CONF_BLOCKSIZE, "" + BLOCK_SIZE);
    conf.set(CONF_GRIDSIZE, "" + GRID_SIZE);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("numRowsA: " + numRowsA);
    LOG.info("numColsA: " + numColsA);
    LOG.info("numRowsB: " + numRowsB);
    LOG.info("numColsB: " + numColsB);
    LOG.info("isDebugging: " + isDebugging);
    LOG.info("outputPath: " + OUTPUT_DIR);

    if (numColsA != numRowsB) {
        throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")");
    }

    // Create random DistributedRowMatrix
    // use constant seeds to get reproducible results

    // Matrix A
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L),
            MATRIX_A_PATH, false);
    // Matrix B is stored transposed
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L),
            MATRIX_B_PATH, true);

    // Load DistributedRowMatrix a and b
    DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA);
    a.setConf(conf);

    DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB);
    b.setConf(conf);

    // MatrixMultiplication
    long startTime = System.currentTimeMillis();
    DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH);

    LOG.info("MatrixMultiplicationHybrid using Hama finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // Verification

    // Overwrite matrix B, NOT transposed for verification
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L),
            MATRIX_B_PATH, false);
    b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB);
    b.setConf(conf);

    DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH);

    if (c.verify(d)) {
        System.out.println("Verify PASSED!");
    } else {
        System.out.println("Verify FAILED!");
    }

    if (isDebugging) {
        System.out.println("Matrix A:");
        a.printDistributedRowMatrix();
        System.out.println("Matrix B:");
        b.printDistributedRowMatrix();
        System.out.println("Matrix C:");
        c.printDistributedRowMatrix();
        System.out.println("Matrix D:");
        d.printDistributedRowMatrix();
        printOutput(conf);
    }
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {
    // Defaults/*from   ww  w. jav a  2  s . co  m*/
    int numBspTask = 1; // CPU + GPU tasks
    int numGpuBspTask = 1; // GPU tasks
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;

    int maxIteration = 3; // 150;
    int matrixRank = 3;
    int skipCount = 1;

    double alpha = ALPHA;
    int userCount = 0;
    int itemCount = 0;
    int percentNonZeroValues = 0;

    int GPUPercentage = 20;

    boolean useTestExampleInput = true;
    boolean isDebugging = true;
    String inputFile = "";
    String separator = "\\t";

    Configuration conf = new HamaConfiguration();
    FileSystem fs = FileSystem.get(conf);

    // Set numBspTask to maxTasks
    // BSPJobClient jobClient = new BSPJobClient(conf);
    // ClusterStatus cluster = jobClient.getClusterStatus(true);
    // numBspTask = cluster.getMaxTasks();

    if (args.length > 0) {
        if (args.length >= 14) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);

            maxIteration = Integer.parseInt(args[4]);
            matrixRank = Integer.parseInt(args[5]);
            skipCount = Integer.parseInt(args[6]);

            alpha = Double.parseDouble(args[7]);
            userCount = Integer.parseInt(args[8]);
            itemCount = Integer.parseInt(args[9]);
            percentNonZeroValues = Integer.parseInt(args[10]);

            GPUPercentage = Integer.parseInt(args[11]);

            useTestExampleInput = Boolean.parseBoolean(args[12]);
            isDebugging = Boolean.parseBoolean(args[13]);

            // optional parameters
            if (args.length > 14) {
                inputFile = args[14];
            }
            if (args.length > 15) {
                separator = args[15];
            }

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println(
                    "    Argument5=maxIterations | Number of maximal iterations (" + maxIteration + ")");
            System.out.println("    Argument6=matrixRank | matrixRank (" + matrixRank + ")");
            System.out.println("    Argument7=skipCount | skipCount (" + skipCount + ")");
            System.out.println("    Argument8=alpha | alpha (" + alpha + ")");
            System.out.println("    Argument9=userCount | userCount (" + userCount + ")");
            System.out.println("    Argument10=itemCount | itemCount (" + itemCount + ")");
            System.out.println("    Argument11=percentNonZeroValues | percentNonZeroValues ("
                    + percentNonZeroValues + ")");
            System.out.println("    Argument12=GPUPercentage (percentage of input)");
            System.out.println("    Argument13=testExample | Use testExample input (true|false=default)");
            System.out.println("    Argument14=debug | Enable debugging (true|false=default)");
            System.out.println("    Argument15=inputFile (optional) | MovieLens inputFile");
            System.out.println("    Argument16=separator (optional) | default '" + separator + "' ");
            return;
        }
    }

    // Check if inputFile exists
    if ((!inputFile.isEmpty()) && (!new File(inputFile).exists())) {
        System.out.println("Error: inputFile: " + inputFile + " does not exist!");
        return;
    }

    // Check parameters
    if ((inputFile.isEmpty()) && (!useTestExampleInput) && (userCount <= 0) && (itemCount <= 0)
            && (percentNonZeroValues <= 0)) {
        System.out.println("Invalid parameter: userCount: " + userCount + " itemCount: " + itemCount
                + " percentNonZeroValues: " + percentNonZeroValues);
        return;
    }

    // Check if blockSize < matrixRank when using GPU
    if ((numGpuBspTask > 0) && (blockSize < matrixRank)) {
        System.out.println("Error: BlockSize < matrixRank");
        return;
    }

    // Check GPUPercentage
    if ((GPUPercentage < 0) && (GPUPercentage > 100)) {
        System.out.println("Error: GPUPercentage must be between 0 and 100 percent");
        return;
    }

    // Set config variables
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.setBoolean("hama.pipes.logging", isDebugging);
    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCKSIZE, "" + blockSize);
    conf.set(CONF_GRIDSIZE, "" + gridSize);

    conf.setInt(OnlineCF.CONF_ITERATION_COUNT, maxIteration);
    conf.setInt(OnlineCF.CONF_MATRIX_RANK, matrixRank);
    conf.setInt(OnlineCF.CONF_SKIP_COUNT, skipCount);

    // Debug output
    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
    LOG.info("GPUPercentage: " + GPUPercentage);

    LOG.info("isDebugging: " + isDebugging);
    LOG.info("useTestExampleInput: " + useTestExampleInput);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);

    LOG.info("maxIteration: " + maxIteration);
    LOG.info("matrixRank: " + matrixRank);
    LOG.info("skipCount: " + skipCount);

    LOG.info("alpha: " + alpha);
    LOG.info("userCount: " + userCount);
    LOG.info("itemCount: " + itemCount);
    LOG.info("percentNonZeroValues: " + percentNonZeroValues);

    if (!inputFile.isEmpty()) {
        LOG.info("inputFile: " + inputFile);
        LOG.info("separator: " + separator);
    }

    // prepare Input
    int maxTestPrefs = 10;
    Path preferencesIn = new Path(CONF_INPUT_DIR, "preferences_in.seq");
    List<Preference<Long, Long>> testPrefs = null;
    if (useTestExampleInput) {

        testPrefs = prepareTestInputData(conf, fs, CONF_INPUT_DIR, preferencesIn);

    } else if (inputFile.isEmpty()) {

        testPrefs = generateRandomInputData(conf, fs, CONF_INPUT_DIR, numBspTask, numGpuBspTask, userCount,
                itemCount, percentNonZeroValues, GPUPercentage, maxTestPrefs);

    } else if (!inputFile.isEmpty()) {
        // parse inputFile and return first entries for testing
        testPrefs = convertInputData(conf, fs, CONF_INPUT_DIR, preferencesIn, inputFile, separator,
                maxTestPrefs);
    }

    // Generate Job config
    BSPJob job = createOnlineCFTrainHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

    // Execute Job
    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {

        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        // Load Job results for testing
        OnlineCF recommender = new OnlineCF();
        recommender.load(CONF_OUTPUT_DIR.toString(), false);

        // Test results
        int error = 0;
        double totalError = 0;
        for (Preference<Long, Long> test : testPrefs) {
            double expected = test.getValue().get();
            double estimated = recommender.estimatePreference(test.getUserId(), test.getItemId());

            if (testPrefs.size() <= 20) {
                LOG.info("(" + test.getUserId() + ", " + test.getItemId() + ", " + expected + "): " + estimated
                        + " error: " + Math.abs(expected - estimated));
            }
            totalError += Math.abs(expected - estimated);
            error += (Math.abs(expected - estimated) < 0.5) ? 1 : 0;
        }

        LOG.info("totalError: " + totalError);
        LOG.info("assertEquals(expected: " + (testPrefs.size() * 0.75) + " == " + error
                + " actual) with delta: 1");

        if (isDebugging) {
            printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());
        }
    }

}

From source file:at.illecker.hama.hybrid.examples.testglobalgpusync.TestGlobalGpuSyncHybridBSP.java

License:Apache License

public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {

    // Defaults//from  w w w  .ja v a2  s  .c  o m
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;
    boolean isDebugging = false;

    Configuration conf = new HamaConfiguration();

    if (args.length > 0) {
        if (args.length == 5) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);
            isDebugging = Boolean.parseBoolean(args[4]);
        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println("    Argument5=debug | Enable debugging (true|false=default)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean("hama.pipes.logging", isDebugging);
    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCK_SIZE, "" + blockSize);
    conf.set(CONF_GRID_SIZE, "" + gridSize);
    conf.set(CONF_TMP_DIR, TMP_DIR.toString());

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCK_SIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRID_SIZE));
    LOG.info("TempDir: " + conf.get(CONF_TMP_DIR));
    LOG.info("isDebugging: " + conf.getBoolean("hama.pipes.logging", false));

    BSPJob job = createTestGlobalGpuSyncHybridBSPConf(conf);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        printOutput(job, FileSystem.get(conf), new Path(conf.get(CONF_TMP_DIR)));
    }
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationBSPGpu.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults//  ww  w.j  av  a  2s.c o  m
    int numRowsA = 1024;
    int numColsA = 1024;
    int numRowsB = 1024;
    int numColsB = 1024;
    boolean isDebugging = false;

    Configuration conf = new HamaConfiguration();

    if (args.length > 0) {
        if (args.length == 6) {
            conf.setInt("bsp.peers.num", Integer.parseInt(args[0]));
            numRowsA = Integer.parseInt(args[1]);
            numColsA = Integer.parseInt(args[2]);
            numRowsB = Integer.parseInt(args[3]);
            numColsB = Integer.parseInt(args[4]);
            isDebugging = Boolean.parseBoolean(args[5]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numRowsA | Number of rows of the first input matrix");
            System.out.println("    Argument3=numColsA | Number of columns of the first input matrix");
            System.out.println("    Argument4=numRowsB | Number of rows of the second input matrix");
            System.out.println("    Argument5=numColsB | Number of columns of the second input matrix");
            System.out.println("    Argument6=debug | Enable debugging (true|false)");
            return;
        }
    } else {
        conf.setInt("bsp.peers.num", 1); // 1 because only one GPU available
    }

    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.set(CONF_BLOCKSIZE, "" + BLOCK_SIZE);
    conf.set(CONF_GRIDSIZE, "" + GRID_SIZE);
    conf.setBoolean(CONF_DEBUG, true);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("numRowsA: " + numRowsA);
    LOG.info("numColsA: " + numColsA);
    LOG.info("numRowsB: " + numRowsB);
    LOG.info("numColsB: " + numColsB);
    LOG.info("isDebugging: " + isDebugging);
    LOG.info("outputPath: " + OUTPUT_DIR);

    if (numColsA != numRowsB) {
        throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")");
    }

    // Create random DistributedRowMatrix
    // use constant seeds to get reproducable results

    // Matrix A
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L),
            MATRIX_A_PATH, false);
    // Matrix B
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L),
            MATRIX_B_PATH, false);

    // Load DistributedRowMatrix a and b
    DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA);
    a.setConf(conf);

    DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB);
    b.setConf(conf);

    // MatrixMultiply all within a new BSP job
    long startTime = System.currentTimeMillis();
    DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH, true);

    System.out.println("MatrixMultiplicationGpu using Hama finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // Verification
    DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH);
    if (c.verify(d)) {
        System.out.println("Verify PASSED!");
    } else {
        System.out.println("Verify FAILED!");
    }

    if (isDebugging) {
        System.out.println("Matrix A:");
        a.printDistributedRowMatrix();
        System.out.println("Matrix B:");
        b.printDistributedRowMatrix();
        System.out.println("Matrix C:");
        c.printDistributedRowMatrix();
        System.out.println("Matrix D:");
        d.printDistributedRowMatrix();

        printOutput(conf);
    }
}

From source file:azkaban.AzkabanCommonModule.java

License:Apache License

@Inject
@Provides//from  ww w .  ja va 2  s  .c o m
@Singleton
public Configuration createHadoopConfiguration() {
    final String hadoopConfDirPath = requireNonNull(this.props.get(HADOOP_CONF_DIR_PATH));

    final File hadoopConfDir = new File(requireNonNull(hadoopConfDirPath));
    checkArgument(hadoopConfDir.exists() && hadoopConfDir.isDirectory());

    final Configuration hadoopConf = new Configuration(false);
    hadoopConf.addResource(new org.apache.hadoop.fs.Path(hadoopConfDirPath, "core-site.xml"));
    hadoopConf.addResource(new org.apache.hadoop.fs.Path(hadoopConfDirPath, "hdfs-site.xml"));
    hadoopConf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    return hadoopConf;
}

From source file:azkaban.jobtype.HadoopConfigurationInjector.java

License:Apache License

/**
 * Writes out the XML configuration file that will be injected by the client
 * as a configuration resource./*from  ww  w. j  a va 2 s.c  o  m*/
 * <p>
 * This file will include a series of links injected by Azkaban as well as
 * any job properties that begin with the designated injection prefix.
 *
 * @param props The Azkaban properties
 * @param workingDir The Azkaban job working directory
 */
public static void prepareResourcesToInject(Props props, String workingDir) {
    try {
        Configuration conf = new Configuration(false);

        // First, inject a series of Azkaban links. These are equivalent to
        // CommonJobProperties.[EXECUTION,WORKFLOW,JOB,JOBEXEC,ATTEMPT]_LINK
        addHadoopProperties(props);

        // Next, automatically inject any properties that begin with the
        // designated injection prefix.
        Map<String, String> confProperties = props.getMapByPrefix(INJECT_PREFIX);

        for (Map.Entry<String, String> entry : confProperties.entrySet()) {
            String confKey = entry.getKey().replace(INJECT_PREFIX, "");
            String confVal = entry.getValue();
            conf.set(confKey, confVal);
        }

        // Now write out the configuration file to inject.
        File file = getConfFile(props, workingDir, INJECT_FILE);
        OutputStream xmlOut = new FileOutputStream(file);
        conf.writeXml(xmlOut);
        xmlOut.close();
    } catch (Throwable e) {
        _logger.error("Encountered error while preparing the Hadoop configuration resource file", e);
    }
}

From source file:azkaban.jobtype.HadoopConfigurationInjector.java

License:Apache License

/**
 * Loads an Azkaban property into the Hadoop configuration.
 *
 * @param props The Azkaban properties/*from  w w w.  j a v  a  2s.  com*/
 * @param conf The Hadoop configuration
 * @param name The property name to load from the Azkaban properties into the Hadoop configuration
 */
public static void loadProp(Props props, Configuration conf, String name) {
    String prop = props.get(name);
    if (prop != null) {
        conf.set(name, prop);
    }
}

From source file:azkaban.jobtype.HadoopJavaJobRunnerMain.java

License:Apache License

private void runMethodAsUser(Properties props, final Object obj, final String runMethod,
        final UserGroupInformation ugi) throws IOException, InterruptedException {
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
        @Override//from w w w  .java2 s.  c om
        public Void run() throws Exception {

            Configuration conf = new Configuration();
            if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
                conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
            }

            runMethod(obj, runMethod);
            return null;
        }
    });
}

From source file:azkaban.jobtype.javautils.HadoopUtils.java

License:Apache License

public static void setPropsInJob(Configuration conf, Props props) {
    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try {/*from w  w w .ja va  2 s .c o m*/
        props.storeFlattened(output);
        conf.set("azkaban.props", new String(output.toByteArray(), "UTF-8"));
    } catch (IOException e) {
        throw new RuntimeException("This is not possible!", e);
    }
}

From source file:azkaban.jobtype.ReportalAbstractRunner.java

License:Apache License

public void run() throws Exception {
    System.out.println("Reportal: Setting up environment");

    // Check the properties file
    if (props == null) {
        throw new ReportalRunnerException("Properties file not loaded correctly.");
    }//from   w ww.j a va2  s  . com

    // Get the hadoop token
    Configuration conf = new Configuration();
    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
        conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
    }

    // Get properties
    String execId = props.getString(CommonJobProperties.EXEC_ID);
    outputCapacity = props.getInt("reportal.output.capacity", 10 * 1024 * 1024);
    proxyUser = props.getString("reportal.proxy.user");
    jobQuery = props.getString("reportal.job.query");
    jobTitle = props.getString("reportal.job.title");
    reportalTitle = props.getString("reportal.title");
    reportalStorageUser = props.getString("reportal.storage.user", "reportal");
    Map<String, String> reportalVariables = props.getMapByPrefix(REPORTAL_VARIABLE_PREFIX);

    // Parse variables
    for (Entry<String, String> entry : reportalVariables.entrySet()) {
        if (entry.getKey().endsWith("from")) {
            String fromValue = entry.getValue();
            String toKey = entry.getKey().substring(0, entry.getKey().length() - 4) + "to";
            String toValue = reportalVariables.get(toKey);
            if (toValue != null) {
                variables.put(fromValue, toValue);
            }
        }
    }

    // Built-in variables
    variables.put("run_id", execId);
    variables.put("sys_date", Long.toString(System.currentTimeMillis() / 1000));

    Calendar cal = Calendar.getInstance();
    Date date = new Date();
    cal.setTime(date);

    String timeZone = props.getString("reportal.default.timezone", "UTC");
    TimeZone.setDefault(TimeZone.getTimeZone(timeZone));

    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
    SimpleDateFormat hourFormat = new SimpleDateFormat("yyyy-MM-dd-HH");

    variables.put("hive_current_hour", hourFormat.format(cal.getTime()));
    variables.put("hive_current_day", dateFormat.format(cal.getTime()));
    cal.add(Calendar.HOUR, -1);
    variables.put("hive_last_hour", hourFormat.format(cal.getTime()));
    cal.add(Calendar.HOUR, 1);
    cal.add(Calendar.DATE, -1);
    variables.put("hive_yesterday", dateFormat.format(cal.getTime()));
    cal.add(Calendar.DATE, -6);
    variables.put("hive_last_seven_days", dateFormat.format(cal.getTime()));
    cal.add(Calendar.DATE, -1);
    variables.put("hive_last_eight_days", dateFormat.format(cal.getTime()));
    variables.put("owner", proxyUser);
    variables.put("title", reportalTitle);

    // Props debug
    System.out.println("Reportal Variables:");
    for (Entry<String, String> data : variables.entrySet()) {
        System.out.println(data.getKey() + " -> " + data.getValue());
    }

    if (requiresOutput()) {
        // Get output stream to data
        String locationTemp = ("./reportal/" + jobTitle + ".csv").replace("//", "/");
        File tempOutput = new File(locationTemp);
        tempOutput.getParentFile().mkdirs();
        tempOutput.createNewFile();
        outputStream = new BoundedOutputStream(new BufferedOutputStream(new FileOutputStream(tempOutput)),
                outputCapacity);

        // Run the reportal
        runReportal();

        // Cleanup the reportal
        try {
            outputStream.close();
        } catch (IOException e) {
            // We can safely ignore this exception since we're just making sure the
            // stream is closed.
        }
    } else {
        runReportal();
    }
}