Example usage for org.apache.hadoop.conf Configuration set

List of usage examples for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorHFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    int seq = 0;//from   w ww . j a v  a  2 s .  c  o m
    String inputFile = (args.length > seq) ? args[seq] : "";
    seq++;

    String hfileOutputFile = (args.length > seq) ? args[seq] : "";
    seq++;

    String tableName = (args.length > seq) ? args[seq] : "";
    seq++;

    String familyName = (args.length > seq) ? args[seq] : "1";
    seq++;

    String replaceFrom = (args.length > seq) ? args[seq] : "";
    seq++;

    String replaceTo = (args.length > seq) ? args[seq] : "";
    seq++;

    String startIndex = (args.length > seq) ? args[seq] : "";
    seq++;

    String endIndex = (args.length > seq) ? args[seq] : "";
    seq++;

    if (null == inputFile || inputFile.trim().isEmpty()) {
        String err = KVReplicatorHFile.class + " > Please enter input file path.";
        System.err.println(err);
        throw new IOException(err);
    }

    Configuration conf = HBaseConfiguration.create();
    conf.set(TABLE_NAME, tableName);
    conf.set(FAMILY_NAME, familyName);
    conf.set(REPLACE_FROM, replaceFrom);
    conf.set(REPLACE_TO, replaceTo);
    conf.set(START_INDEX, startIndex);
    conf.set(END_INDEX, endIndex);

    try {
        List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>();
        HColumnDescriptor cols = new HColumnDescriptor(familyName.getBytes());
        colFamilies.add(cols);
        HDML.create(tableName, colFamilies);
    } catch (HBaseException e) {
        e.printStackTrace();
    }

    Job job = Job.getInstance(conf, "KVReplicatorHBase - creating HFile");

    job.setJarByClass(KVReplicatorHFile.class);
    job.setMapperClass(KVHFileWriterMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim()));
    FileOutputFormat.setOutputPath(job, new Path(hfileOutputFile.trim()));

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    HTable hTable = new HTable(conf, tableName);
    HFileOutputFormat.configureIncrementalLoad(job, hTable);
    boolean result = job.waitForCompletion(true);

    return (result ? 0 : 1);
}

From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorMapFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    int seq = 0;//from   w w w. jav  a 2  s .com
    String inputFile = (args.length > seq) ? args[seq] : "";
    seq++;

    String outputFile = (args.length > seq) ? args[seq++] : "/tmp/hsearch-index";

    String outputFileName = (args.length > seq) ? args[seq++] : "file1";

    String xmlFilePath = (args.length > seq) ? args[seq++] : "";

    String replaceFrom = (args.length > seq) ? args[seq++] : "";

    String replaceTo = (args.length > seq) ? args[seq++] : "";

    String startIndex = (args.length > seq) ? args[seq++] : "";

    String endIndex = (args.length > seq) ? args[seq++] : "";

    String numberOfReducerStr = (args.length > seq) ? args[seq] : "1";
    int numberOfReducer = Integer.parseInt(numberOfReducerStr);

    if (null == inputFile || inputFile.trim().isEmpty()) {
        String err = KVReplicatorHFile.class + " > Please enter input file path.";
        System.err.println(err);
        throw new IOException(err);
    }

    Configuration conf = HBaseConfiguration.create();

    FieldMapping fm = KVIndexer.createFieldMapping(conf, xmlFilePath, new StringBuilder());
    outputFile = outputFile.charAt(outputFile.length() - 1) == '/' ? outputFile : outputFile + "/";
    outputFile = outputFile + fm.tableName;

    conf.set(OUTPUT_FILE_PATH, outputFile);
    conf.set(OUTPUT_FILE_NAME, outputFileName);

    conf.set(REPLACE_FROM, replaceFrom);
    conf.set(REPLACE_TO, replaceTo);
    conf.set(START_INDEX, startIndex);
    conf.set(END_INDEX, endIndex);

    Job job = Job.getInstance(conf, "KVReplicatorMapFile - Replicating Map File");

    job.setJarByClass(KVReplicatorMapFile.class);
    job.setMapperClass(KVReplicatorMapper.class);
    job.setReducerClass(KVReplicatorReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setNumReduceTasks(numberOfReducer);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim()));

    FileSystem fs = FileSystem.get(conf);
    Path dummyPath = new Path("/tmp", "dummy");
    if (fs.exists(dummyPath)) {
        fs.delete(dummyPath, true);
    }

    FileOutputFormat.setOutputPath(job, dummyPath);

    boolean result = job.waitForCompletion(true);
    return (result ? 0 : 1);
}

From source file:com.blackberry.logdriver.Lock.java

License:Apache License

public static void main(String[] args) {
    if (args.length < 1) {
        printUsage();/*from www  .j a  va2s .co  m*/
        System.exit(1);
    }

    // We'll be using a Configuration object
    Configuration conf = new Configuration();

    // Check args for conf files or specific configs
    int i = 0;
    while (args[i].startsWith("-")) {
        String arg = args[i];
        if (arg.startsWith("-confFiles=")) {
            String fileNames = arg.substring("-confFiles=".length());
            String[] files = fileNames.split(",");
            for (String file : files) {
                conf.addResource(new Path(file));
            }
        } else if (arg.startsWith("-zkConnectString=")) {
            String connectString = arg.substring("-zkConnectString=".length());
            conf.set("zk.connect.string", connectString);
        }

        i++;
    }

    LockUtil lockUtil = null;
    try {
        lockUtil = new LockUtil(conf);
    } catch (Exception e) {
        LOG.error("Error getting ZooKeeper client.", e);
        System.exit(1);
    }

    // Look for the SCAN command. It's a little different.
    if (args.length > i && args[i].toUpperCase().equals("SCAN")) {
        String root = "";
        if (args.length > 1 + 1) {
            root = args[i + 1];
        }
        try {
            scan(lockUtil, root);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return;
    }

    // Look for the SCAN command. It's a little different.
    if (args.length > i + 1 && args[i].toUpperCase().equals("PURGE")) {
        String root = args[i + 1];

        try {
            purge(lockUtil, root);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return;
    }

    // Now, just grab the required args
    String readWrite = null;
    String lockUnlock = null;

    String dcNumber = null;
    String service = null;
    String date = null;
    String hour = null;
    String component = null;

    if (args.length < i + 6) {
        printUsage();
        System.exit(1);
    }

    readWrite = args[i].toUpperCase();
    if (!readWrite.equals("READ") && !readWrite.equals("WRITE")) {
        printUsage();
        System.exit(1);
    }
    i++;

    lockUnlock = args[i].toUpperCase();
    if (!lockUnlock.equals("RESET") && !lockUnlock.equals("STATUS")) {
        printUsage();
        System.exit(1);
    }
    i++;

    dcNumber = args[i];
    i++;

    service = args[i];
    i++;

    date = args[i];
    i++;

    hour = args[i];
    i++;

    component = args[i];

    PathInfo pathInfo = new PathInfo();
    try {
        pathInfo.setDcNumber(dcNumber);
        pathInfo.setService(service);
        pathInfo.setDate(date);
        pathInfo.setHour(hour);
        pathInfo.setComponent(component);
    } catch (Exception e) {
        LOG.error("Exception configuring path info.", e);
        System.exit(1);
    }

    String lockPath = null;
    try {
        lockPath = lockUtil.getLockPath(pathInfo);
    } catch (Exception e) {
        LOG.error("Error getting lock path", e);
        System.exit(1);
    }

    if (readWrite.equals("READ") && lockUnlock.equals("RESET")) {
        while (true) {
            try {
                lockUtil.resetReadLock(lockPath);
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
                continue;
            } catch (Exception e) {
                LOG.error("Unexpected error", e);
                System.exit(1);
            }
            break;
        }
    } else if (readWrite.equals("READ") && lockUnlock.equals("STATUS")) {
        long numLocks = 0;
        while (true) {
            try {
                numLocks = lockUtil.getReadLockCount(lockPath);
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
                continue;
            } catch (Exception e) {
                LOG.error("Unexpected error", e);
                System.exit(1);
            }

            System.out.println("Read lock count is " + numLocks);
            break;
        }
    } else if (readWrite.equals("WRITE") && lockUnlock.equals("RESET")) {
        while (true) {
            try {
                lockUtil.resetWriteLock(lockPath);
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
                continue;
            } catch (Exception e) {
                LOG.error("Unexpected error", e);
                System.exit(1);
            }
            break;
        }
    } else if (readWrite.equals("WRITE") && lockUnlock.equals("STATUS")) {
        long numLocks = 0;
        while (true) {
            try {
                numLocks = lockUtil.getWriteLockCount(lockPath);
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
                continue;
            } catch (Exception e) {
                LOG.error("Unexpected error", e);
                System.exit(1);
            }

            System.out.println("Write lock count is " + numLocks);
            break;
        }
    }
}

From source file:com.blackberry.logdriver.LockedFs.java

License:Apache License

public int run(String[] args) throws Exception {
    // The required args are zkConnectString, dcNumber, service, date, hour,
    // component, from, to
    if (args.length < 7) {
        printUsage();/*www .j a  v  a  2s .c  o  m*/
        System.exit(1);
    }

    String zkConnectString = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = args[3];
    String hour = args[4];
    String component = args[5];
    String[] commands = new String[args.length - 6];

    String logDir = getConf().get("logdriver.logdir.name", "logs");

    for (int i = 6; i < args.length; i++) {
        commands[i - 6] = args[i];
    }

    // Set the configuration correctly, so we can reach zookeeper
    Configuration conf = getConf();
    conf.set("zk.connect.string", zkConnectString);

    LockUtil lockUtil = null;
    String lockPath = null;
    try {
        lockUtil = new LockUtil(conf);

        PathInfo pathInfo = new PathInfo();
        pathInfo.setDcNumber(dcNumber);
        pathInfo.setService(service);
        pathInfo.setLogdir(logDir);
        pathInfo.setDate(date);
        pathInfo.setHour(hour);
        pathInfo.setComponent(component);

        lockPath = lockUtil.getLockPath(pathInfo);

        // Get the write lock
        while (true) {
            try {
                lockUtil.acquireWriteLock(lockPath);
                break;
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
            }
        }

        for (String command : commands) {
            LOG.info("Running {}", command);

            String[] parts = command.split("\\s+");
            if ("move".equals(parts[0].toLowerCase())) {
                if (parts.length < 3) {
                    LOG.error("Move required at least 2 arguements");
                    return 1;
                }

                String[] from = new String[parts.length - 2];
                for (int i = 1; i < parts.length - 1; i++) {
                    from[i - 1] = parts[i];
                }
                String to = parts[parts.length - 1];

                move(conf, from, to);

            } else if ("delete".equals(parts[0].toLowerCase())) {
                for (int i = 1; i < parts.length; i++) {
                    delete(conf, parts[i]);
                }

            } else if ("touch".equals(parts[0].toLowerCase())) {
                for (int i = 1; i < parts.length; i++) {
                    touch(conf, parts[i]);
                }

            }
        }
    } catch (Exception e) {
        LOG.error("Caught exception.", e);
    } finally {
        // Release the write lock
        while (true) {
            try {
                lockUtil.releaseWriteLock(lockPath);
                lockUtil.close();
                break;
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
            }
        }
    }

    return 0;
}

From source file:com.blackberry.logdriver.LockedFsShell.java

License:Apache License

public static void main(String[] args) {
    // The required args are zkConnectString, dcNumber, service, date, hour,
    // component and an arbitrary number of commands.
    if (args.length < 7) {
        printUsage();/*w  ww  .  j a  v  a  2  s  .  co  m*/
        System.exit(1);
    }

    String zkConnectString = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = args[3];
    String hour = args[4];
    String component = args[5];

    // Set the configuration correctly, so we can reach zookeeper
    Configuration conf = new Configuration();
    conf.set("zk.connect.string", zkConnectString);

    try {
        LockUtil lockUtil = new LockUtil(conf);

        PathInfo pathInfo = new PathInfo();
        pathInfo.setDcNumber(dcNumber);
        pathInfo.setService(service);
        pathInfo.setDate(date);
        pathInfo.setHour(hour);
        pathInfo.setComponent(component);

        String lockPath = lockUtil.getLockPath(pathInfo);

        // Get the write lock
        while (true) {
            try {
                lockUtil.acquireWriteLock(lockPath);
                break;
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
            }
        }

        // Run the commands
        int res = 0;

        for (int i = 6; i < args.length; i++) {
            String[] fsShellArgs = args[i].split("\\s+");
            LOG.info("Calling FsShell with args {}", args[i]);
            FsShell shell = new FsShell();
            try {
                res = ToolRunner.run(shell, fsShellArgs);
            } finally {
                shell.close();
            }

            if (res != 0) {
                break;
            }
        }

        // Release the write lock
        while (true) {
            try {
                lockUtil.releaseWriteLock(lockPath);
                break;
            } catch (KeeperException.ConnectionLossException e) {
                LOG.warn("Lost connection to ZooKeeper.  Retrying.", e);
            }
        }

        if (res != 0) {
            LOG.error("Bad return value ({}) from FsShell", res);
            System.exit(res);
        }

    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }
}

From source file:com.blackberry.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   w w  w  .j  a va2s  .  c om

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }
        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from w w  w .  j  av  a2s.  co m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from  ww  w. java2 s  .c o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.blackberry.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//www  .  j a  v a  2  s.c  o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from   www .jav  a2 s.c o m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}