public String get(String name) 

Get the value of the name property, null if no such property exists.


From source file:ImportTsv.java

License:Apache License

 * Sets up the actual job./*from ww w  .  j a v  a2 s.  c o m*/
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {
    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));

            // See if a non-default Mapper was set
            String mapperClassName = conf.get(MAPPER_CONF_KEY);
            Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

            TableName tableName = TableName.valueOf(args[0]);
            Path inputDir = new Path(args[1]);

            // set filter
            conf.set(EASTCOM_FILTER_PARAMS, args[3]);
            conf.set(EASTCOM_FILTER_DEFINE, args[4]);

            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            FileInputFormat.setInputPaths(job, inputDir);
            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
            if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
                String fileLoc = conf.get(CREDENTIALS_LOCATION);
                Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);

            if (hfileOutPath != null) {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
                        // TODO: this is backwards. Instead of depending on the existence of a table,
                        // create a sane splits file for HFileOutputFormat based on data sampling.
                        createTable(admin, tableName, columns);
                    } else {
                        throw new TableNotFoundException(errorMsg);
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
                    // if no.strict is false then check column family
                    if (!noStrict) {
                        ArrayList<String> unmatchedFamilies = new ArrayList<String>();
                        Set<String> cfSet = getColumnFamilies(columns);
                        HTableDescriptor tDesc = table.getTableDescriptor();
                        for (String cf : cfSet) {
                            if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
                        if (unmatchedFamilies.size() > 0) {
                            ArrayList<String> familyNames = new ArrayList<String>();
                            for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
                            String msg = "Column Families " + unmatchedFamilies + " specified in "
                                    + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName
                                    + " column families " + familyNames + ".\n"
                                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
                                    + "=true.\n";
                    Path outputDir = new Path(hfileOutPath);
                    FileOutputFormat.setOutputPath(job, outputDir);
                    if (mapperClass.equals(TsvImporterTextMapper.class)) {
                    } else {
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
            } else {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    throw new TableNotFoundException(errorMsg);
                if (mapperClass.equals(TsvImporterTextMapper.class)) {
                            + " should not be used for non bulkloading case. use "
                            + TsvImporterMapper.class.toString()
                            + " or custom mapper whose value type is Put.");
                // No reducers. Just write straight to table. Call initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);

                    com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;

From source file:ConfTest.java

License:Open Source License

public static void main(String[] args) {
    Configuration conf = new Configuration();
    conf.addResource(new Path("d:\\test\\a.xml"));

From source file:TestRawParascaleFileSystemBase.java

License:Apache License

protected void init() throws URISyntaxException, IOException {
    groupInformation = UserGroupInformation.createRemoteUser("hadoop");
    fs = getFileSystem(groupInformation);
    final Configuration conf = getConf();
    fs.initialize(new URI(conf.get(FS_DEFAULT_NAME)), getConf());

From source file:LungDriver.java

License:Creative Commons License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    String author = conf.get("com.marcolotz.author");
    String jobName = conf.get("mapreduce.job.name");
    String inputPath = conf.get("mapred.input.dir");
    String outputPath = conf.get("mapred.output.dir");

    System.out.println("\nApplication author: " + author + "\n");

    System.out.println("Configurations stored at: conf/lungConfiguration.xml");
    System.out.println("Input path: " + inputPath);
    System.out.println("Output path: " + outputPath);

    /* For non-standard operation (i.e. with arguments) */
    if (args.length != 0) {

        /* terminates the program if there is an incorrect input */
        if (processInputs(args, conf) != 0) {
            return 1;
        }//from   www .ja v a 2s.co  m

    System.out.println("Bottom Threshold for nodules candidates detection:"
            + conf.getInt("com.marcolotz.grayNoduleCandidates.bottomThreshold", 110));
    System.out.println("Top Threshold for nodules candidates detection:"
            + conf.getInt("com.marcolotz.grayNoduleCandidates.topThreshold", 120));

    System.out.print("Cleaning output path: ");
    cleanOutputPath(conf, outputPath);

    System.out.print("Configuring the job " + jobName + ": ");

    /* Makes a new job */
    // The classic Job constructor is deprecated.
    Job job = Job.getInstance(conf);

     * This method sets the jar file in which each node will look for the
     * Mapper and Reducer classes.


    // Submits the job to the cluster
    System.out.println("Distributing the job:");
    return job.waitForCompletion(true) ? 0 : 1;

From source file:Importer.java

License:Open Source License

public static void copyFile(File file) throws Exception {
    //    String TEST_PREFIX = "";
    File destFile = new File(outDir, file.getName() + ".seq");
    Path dest = new Path(destFile.getAbsolutePath());

    Configuration conf = new Configuration();
    FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")),
            conf);/*from   ww  w .  j av a2  s.c o m*/
    CompressionCodec codec = new DefaultCodec();
    FSDataOutputStream outputStr = fileSys.create(dest);
    seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, codec);
    String filename = file.getName();
    InputStream in = new BufferedInputStream(new FileInputStream(file));
    if (filename.endsWith(".bz2")) {
        in.read(); //snarf header
        in = new CBZip2InputStream(in);
    BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII"));

    System.out.println("working on file " + file);
    int records = 0;
    long bytes = 0, bytes_since_status = 0;
    long startTime = System.currentTimeMillis();
    String s = null;
    Text content = new Text();
    while ((s = br.readLine()) != null) {
        if (s.startsWith("---END.OF.DOCUMENT---")) {
            Text name = new Text(hash(content));
            seqFileWriter.append(name, content);
            content = new Text();
        } else {
            byte[] line_as_bytes = (s + " ").getBytes();
            for (byte b : line_as_bytes) {
                assert b < 128 : "found an unexpected high-bit set";

            content.append(line_as_bytes, 0, line_as_bytes.length);
            bytes += line_as_bytes.length;
            bytes_since_status += line_as_bytes.length;
            if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB
              bytes_since_status = 0;
    } //end while
    if (content.getLength() > 5) {
        Text name = new Text(hash(content));
        seqFileWriter.append(name, content);
    totalBytes += bytes;
    totalRecords += records;
    long time = (System.currentTimeMillis() - startTime) / 1000 + 1;
    long kbSec = bytes / 1024 / time;
    System.out.println(new java.util.Date());
    System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time
            + " seconds (" + kbSec + " KB/sec).");

From source file:MedianMaper.java

public void map(LongWritable Key, Text Value, Context context) throws IOException, InterruptedException {
    int YearWeek;
    double Price;
    Configuration conf = context.getConfiguration();
    String Cheapest_Carrier = conf.get("Cheapest_Carrier");
    Calendar Cal = Calendar.getInstance();
    FlightPriceParser FParser = new FlightPriceParser();
    if (!FParser.map(Value.toString())) {
        return;// w w  w . j  a v a 2s.  c om
    if (FParser.Carrier != Cheapest_Carrier)
    Cal.set(FParser.Year, FParser.Month - 1, FParser.DayOfMonth);
    YearWeek = FParser.Year * 100 + Cal.get(Calendar.WEEK_OF_YEAR);
    context.write(new IntWritable(YearWeek), new IntWritable((int) Math.round(FParser.Price * 100)));

From source file:ConfigurationTest.java

License:Apache License

public void test() {
    Configuration conf = CosmosWebConfiguration.get();

    Assert.assertEquals("localhost", conf.get(CosmosWebConfiguration.ZOOKEEPERS));
    Assert.assertEquals("accumulo", conf.get(CosmosWebConfiguration.ACCUMULO_INSTANCE));
    Assert.assertEquals("root", conf.get(CosmosWebConfiguration.ACCUMULO_USER));
    Assert.assertEquals("secret", conf.get(CosmosWebConfiguration.ACCUMULO_PASSWORD));

From source file:HoopRemoteTask.java

License:Open Source License

*//*from   w  w w .  jav a 2  s . com*/
public static int countTerms(Configuration conf) {
    dbg("postProcess ()");

    int count = 0;

    String output = conf.get("mapred.output.dir");

    if (output != null) {
        if (output.isEmpty() == true)
            output = HoopLink.outputpath;
    } else
        output = HoopLink.outputpath;

    Path inFile = new Path(output + "/part-r-00000");
    FSDataInputStream in = null;

    String thisLine = null;

    try {
        in = HoopRemoteTask.hdfs.open(inFile);

        BufferedReader reader = new BufferedReader(new InputStreamReader(in));

        while ((thisLine = reader.readLine()) != null) {

    } catch (IOException e) {
        dbg("Error opening file in HDFS");

    return (count);

From source file:HoopRemoteTask.java

License:Open Source License

*///  ww w  .j  a  va  2  s  .c  o m
public static void postProcess(Configuration conf) {
    dbg("postProcess ()");

    if (HoopLink.nrshards == 1) {
        dbg("Only 1 shard needed, skipping post processing");

    if (HoopLink.shardcreate.equals("mos") == true) {
        dbg("We shouldn't be pos-processing since the HoopShardedOutputFormat class already did this");

    if (HoopLink.shardcreate.equals("hdfs") == true) {
        dbg("Starting shard post-process task ...");

        int termCount = countTerms(conf);

        String output = conf.get("mapred.output.dir");

        if (output != null) {
            if (output.isEmpty() == true)
                output = HoopLink.outputpath;
        } else
            output = HoopLink.outputpath;

        dbg("Post processing " + termCount + " items in: " + output);

        Path inFile = new Path(output + "/part-r-00000");
        Path outFile = null;
        FSDataInputStream in = null;
        FSDataOutputStream out = null;

        try {
            in = HoopRemoteTask.hdfs.open(inFile);

            BufferedReader reader = new BufferedReader(new InputStreamReader(in));

            String thisLine;

            int count = 0;
            int split = Math.round(termCount / HoopLink.nrshards);
            int partition = 0;

            outFile = new Path(output + "/partition-" + partition + "-00000.txt");
            out = HoopRemoteTask.hdfs.create(outFile);

            if (out != null) {
                while ((thisLine = reader.readLine()) != null) {
                    StringBuffer formatted = new StringBuffer();


                    if (count > split) {


                        outFile = new Path(output + "/partition-" + partition + "-00000.txt");
                        out = HoopRemoteTask.hdfs.create(outFile);

                        count = 0;

                    byte[] utf8Bytes = formatted.toString().getBytes("UTF8");
                    // We get an additional 0 because of Java string encoding. leave it out!

                if (in != null)

                if (out != null)
            } else
                dbg("Error: unable to open output file");

        } catch (IOException e) {
            // TODO Auto-generated catch block

        dbg("Starting rudimentary sharding into " + HoopLink.nrshards);

        if (in != null) {

            try {
            } catch (IOException e) {
                // TODO Auto-generated catch block


    HoopStatistics stats = new HoopStatistics();
    String results = stats.printStatistics(null);

From source file:ParascaleFileSystem.java

License:Apache License

 * {@inheritDoc}//  w w w  .j  ava2s .  c  om
public void initialize(final URI uri, final Configuration conf) throws IOException {
    final URI rawUri;
    final RawParascaleFileSystem rawParascaleFileSystem;
    UserGroupInformation groupInformation;
    try {
        if (conf.get("hadoop.job.ugi") != null) {
            String username = new StringTokenizer(conf.get("hadoop.job.ugi"), ",").nextToken();
            groupInformation = UserGroupInformation.createRemoteUser(username);
        } else {
            groupInformation = UserGroupInformation.getCurrentUser();
        rawParascaleFileSystem = new RawParascaleFileSystem(groupInformation);
        fs = conf.getBoolean(CRC_FILESYSTEM, false) ? new ChecksumFsWrapper(rawParascaleFileSystem)
                : rawParascaleFileSystem;
        rawUri = new URI(uri.getScheme(), uri.getAuthority(), null, null, null);
    } catch (final URISyntaxException e) {
        throw (IOException) new IOException().initCause(e);
    // initialize with the raw URI - RawFS expects it without a path!
    fs.initialize(rawUri, conf);
    if (!rawParascaleFileSystem.isMountPointAbsolute()) {
        throw new IOException(
                "Mountpoint " + rawParascaleFileSystem.getMountPoint() + " is not an absolute path");
    if (!rawParascaleFileSystem.mountPointExists()) {
        throw new IOException("WorkingDirectory does not exist - can not mount Parascale " + "filesystem at "
                + rawParascaleFileSystem.getMountPath());
    if (!rawParascaleFileSystem.createHomeDirectory()) {
        throw new IOException("Can not create HomeDirectory");
