Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:format.OverlapLengthInputFormat.java

License:Apache License

/**
 * Set the overlapping portion of adjacent records
 * @param conf configuration/*from www. ja  v a 2s  .c om*/
 * @param overlapLength the overlapping portion of adjacent records
 **/
public static void setOverlapLength(Configuration conf, int recordLength) {
    conf.setInt(OVERLAP_LENGTH, recordLength);
}

From source file:FormatStorage.Head.java

License:Open Source License

public void toJobConf(Configuration conf) {
    conf.setInt(ConstVar.HD_var, var);
    conf.setInt(ConstVar.HD_compress, compress);
    conf.setInt(ConstVar.HD_compressStyle, compressStyle);
    conf.setInt(ConstVar.HD_primaryIndex, primaryIndex);
    conf.setInt(ConstVar.HD_encode, encode);
    conf.setInt(ConstVar.HD_encodeStyle, encodeStyle);
    if (key != null) {
        conf.set(ConstVar.HD_key, key);//  w  w w.  ja  va  2s  . c om
    }

    if (fieldMap != null) {
        short fieldNum = fieldMap.fieldNum();
        String[] fieldStrings = new String[fieldNum];

        Set<Short> keySet = fieldMap.fields.keySet();
        Iterator<Short> iterator = keySet.iterator();

        int i = 0;
        while (iterator.hasNext()) {
            Field field = fieldMap.fields.get(iterator.next());

            fieldStrings[i++] = field.type + ConstVar.RecordSplit + field.len + ConstVar.RecordSplit
                    + field.index;
        }
        conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);
    }

}

From source file:FormatStorage1.IHead.java

License:Open Source License

public void toJobConf(Configuration conf) {
    conf.setInt(ConstVar.HD_magic, magic);
    conf.setInt(ConstVar.HD_var, var);
    conf.setInt(ConstVar.HD_ver, ver);/*  w ww. jav  a  2 s  .co m*/
    conf.setInt(ConstVar.HD_lineindex, lineindex);
    conf.setInt(ConstVar.HD_primaryIndex, primaryIndex);
    conf.setInt(ConstVar.HD_compress, compress);
    conf.setInt(ConstVar.HD_compressStyle, compressStyle);
    conf.setInt(ConstVar.HD_encode, encode);
    conf.setInt(ConstVar.HD_encodeStyle, encodeStyle);

    if (fieldMap != null) {
        int fieldNum = fieldMap.fieldtypes().size();
        String[] fieldStrings = new String[fieldNum];

        int i = 0;
        for (IRecord.IFType ft : this.fieldMap.fieldtypes().values()) {
            fieldStrings[i++] = ft.type() + ConstVar.RecordSplit + ft.len() + ConstVar.RecordSplit + ft.idx();
        }
        conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);
    }

    if (udi != null && udi.infos.size() > 0) {
        String[] udistrs = new String[udi.infos.size()];
        int i = 0;
        for (Map.Entry<Integer, String> en : udi.infos.entrySet()) {
            udistrs[i++] = en.getKey() + ConstVar.RecordSplit + en.getValue();
        }
        conf.setStrings(ConstVar.HD_udi, udistrs);
    }
}

From source file:gobblin.cluster.GobblinHelixTaskTest.java

License:Apache License

@BeforeClass
public void setUp() throws IOException {
    Configuration configuration = new Configuration();
    configuration.setInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY, 1);
    this.taskExecutor = new TaskExecutor(configuration);

    this.helixManager = Mockito.mock(HelixManager.class);
    Mockito.when(this.helixManager.getInstanceName()).thenReturn(GobblinHelixTaskTest.class.getSimpleName());
    this.taskStateTracker = new GobblinHelixTaskStateTracker(new Properties(), this.helixManager);

    this.localFs = FileSystem.getLocal(configuration);
    this.appWorkDir = new Path(GobblinHelixTaskTest.class.getSimpleName());
    this.taskOutputDir = new Path(this.appWorkDir, "output");
}

From source file:gr.ntua.h2rdf.byteImport.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];/*w w w  . jav a2 s.c  o m*/
    Job job = null;
    try {
        Configuration conf = new Configuration();
        conf.addResource("hbase-default.xml");
        conf.addResource("hbase-site.xml");
        job = new Job(conf, NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);//sampler.HamaReducer.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        // c.addResource(new Path("/0/arcomemDB/hadoop-0.20.2-cdh3u3/conf/hbase-site.xml"));
        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.SortIds.java

License:Apache License

public static void loadHFiles(String[] args) throws Exception {
    Configuration conf = new Configuration();
    HBaseAdmin hadmin = new HBaseAdmin(conf);

    Path hfofDir = new Path(args[1]);
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}//  w  w  w.  j a v a  2 s .co  m
    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    for (FileStatus stat : familyDirStatuses) {
        if (!stat.isDir()) {
            continue;
        }
        Path familyDir = stat.getPath();
        // Skip _logs, etc
        if (familyDir.getName().startsWith("_"))
            continue;
        //byte[] family = familyDir.getName().getBytes();
        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
        for (Path hfile : hfiles) {
            if (hfile.getName().startsWith("_"))
                continue;
            HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
            //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
            final byte[] first;
            try {
                hfr.loadFileInfo();
                first = hfr.getFirstRowKey();
            } finally {
                hfr.close();
            }
            splits[length] = first.clone();
            length++;
        }
    }
    //System.out.println(length);

    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);
    //HTableDescriptor desc = new HTableDescriptor("H2RDF");

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_Index");

    HColumnDescriptor family = new HColumnDescriptor("1");
    family.setCompressionType(Algorithm.GZ);
    desc.addFamily(family);
    HColumnDescriptor family2 = new HColumnDescriptor("2");
    family2.setCompressionType(Algorithm.GZ);
    desc.addFamily(family2);
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME + "_Index")) {
        //hadmin.disableTable(TABLE_NAME);
        //hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = args[1];
    args1[1] = TABLE_NAME + "_Index";
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java

License:Apache License

private void loadHFiles() throws Exception {
    Configuration conf = HBaseConfiguration.create();
    conf.addResource("hbase-default.xml");
    conf.addResource("hbase-site.xml");
    HBaseAdmin hadmin = new HBaseAdmin(conf);
    Path hfofDir = new Path("out/I");
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}//from  w w w.ja v a  2  s. com
    // FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(hfofDir));
    for (Path hfile : hfiles) {
        if (hfile.getName().startsWith("_"))
            continue;

        HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
        //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
        final byte[] first;
        try {
            hfr.loadFileInfo();
            first = hfr.getFirstRowKey();
        } finally {
            hfr.close();
        }
        //System.out.println("out/I/"+hfile.getName()+" \t "+Bytes.toStringBinary(first));
        splits[length] = first.clone();
        length++;
    }
    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("I");
    family.setCompressionType(Algorithm.SNAPPY);
    desc.addFamily(family);
    family = new HColumnDescriptor("S");
    family.setCompressionType(Algorithm.SNAPPY);
    desc.addFamily(family);
    family = new HColumnDescriptor("T");
    family.setCompressionType(Algorithm.SNAPPY);
    desc.addFamily(family);
    //family= new HColumnDescriptor("C");
    //desc.addFamily(family); 
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME)) {
        //hadmin.disableTable(TABLE_NAME);
        //hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = "out";
    args1[1] = TABLE_NAME;
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:gr.ntua.h2rdf.partialJoin.JoinPlaner.java

License:Open Source License

private static String[] printJoinV(Configuration joinConf, String ret) {
    HashMap<String, Integer> varSet = new HashMap<String, Integer>();
    String[] lines = new String[join_files.length];
    for (int i = 0; i < join_files.length; i++) {
        lines[i] = "{";
    }/*from  w  w w  . j  av a 2 s  .  c om*/
    //String joinpat = query.getResultVars().get(query.getResultVars().size()-1).toString();
    //try {
    joinConf.set("input.joinvars", ret);
    //v.writeBytes(ret+"\n");
    String patId = "";
    for (int i = 0; i < join_files.length; i++) {
        String jf = join_files[i];
        //if(!varSet.contains(join_files_vars[i])){
        StringTokenizer t = new StringTokenizer(join_files_vars2[i]);
        while (t.hasMoreTokens()) {
            String s = t.nextToken();
            if (!ret.contains(s)) {
                if (!varSet.containsKey(s)) {
                    varSet.put(s, 1);
                } else {
                    Integer temp = varSet.get(s);
                    temp++;
                    varSet.put(s, temp);
                }
            }
        }

        if (jf.contains("BGP:") && joinpat.contains(jf.split("BGP:")[1])) {
            String id = "";
            if (jf.contains("BGP:")) {
                id += "P";
                id += jf.split("BGP:")[1];
                lines[i] = join_files_vars[i] + "{" + jf;
            } else {
                id += "J";
                id += jf.split("_")[2];
                lines[i] += jf;
            }
            patId += id + " " + join_files_vars[i] + " $$ ";
            //v.writeBytes(id+" "+join_files_vars[i]+" $$ ");
        } else {
            String id = "";
            if (jf.contains("BGP:")) {
                lines[i] = join_files_vars[i] + "|" + jf;
            } else {
                id += "J";
                id += jf.split("_")[2];
                lines[i] += jf;
                patId += id + " " + join_files_vars[i] + " $$ ";
                //v.writeBytes(id+" "+join_files_vars[i]+" $$ ");
            }
        }
    }
    joinConf.set("input.patId", patId);

    Iterator<String> it = varSet.keySet().iterator();
    int c = 0;
    while (it.hasNext()) {
        String s = it.next();
        int snum = varSet.get(s);
        if (snum >= 2) {
            c++;
            joinConf.set("input.double" + c, s);
            joinConf.setInt("input.double" + c + ".num", snum);
        }
    }
    joinConf.setInt("input.double", c);

    //v.writeBytes("\n");
    /*} catch (IOException e) {
       e.printStackTrace();
    }*/
    return lines;
}

From source file:hadoop.api.RecommenderJob.java

License:Apache License

/**
 * Calculate the co-occurrence matrix//w ww. j  av  a2 s .c o m
 *
 * @param args          Information about the input path, numberOfColumns, similarityClassname, maxObservationsPerRow
 * @param numberOfUsers Number of Users
 * @return Similarities Per Item
 */
public int rowSimilarity(String[] args, int numberOfUsers) {
    try {
        prepareRecommender(args);
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf());
    } catch (IOException e) {
        e.printStackTrace();
    }

    int maxPrefsInItemSimilarity = Integer.parseInt(getOption("maxPrefsInItemSimilarity"));
    int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
    String similarityClassname = getOption("similarityClassname");
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold"))
            : RowSimilarityJob.NO_THRESHOLD;
    long randomSeed = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed"))
            : RowSimilarityJob.NO_FIXED_RANDOM_SEED;

    try {
        ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input",
                new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output",
                new Path(prepPath, "similarityMatrix").toUri().toString(), "--numberOfColumns",
                String.valueOf(numberOfUsers), "--similarityClassname", similarityClassname,
                "--maxObservationsPerRow", String.valueOf(maxPrefsInItemSimilarity),
                "--maxObservationsPerColumn", String.valueOf(maxPrefsInItemSimilarity),
                "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem), "--excludeSelfSimilarity",
                String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed",
                String.valueOf(randomSeed), "--tempDir", prepPath.toString() });
    } catch (Exception e) {
        e.printStackTrace();
    }

    // write out the similarity matrix if the user specified that behavior
    if (hasOption("outputPathForSimilarityMatrix")) {
        Path outputPathForSimilarityMatrix = new Path(getOption("outputPathForSimilarityMatrix"));

        Job outputSimilarityMatrix = null;
        try {
            outputSimilarityMatrix = prepareJob(getTempPath("similarityMatrix"), outputPathForSimilarityMatrix,
                    SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class,
                    EntityEntityWritable.class, DoubleWritable.class,
                    ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class,
                    DoubleWritable.class, TextOutputFormat.class);
        } catch (IOException e) {
            e.printStackTrace();
        }

        Configuration mostSimilarItemsConf = outputSimilarityMatrix.getConfiguration();
        mostSimilarItemsConf.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR,
                new Path(getTempPath(DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.ITEMID_INDEX)
                        .toString());
        mostSimilarItemsConf.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, maxSimilaritiesPerItem);
        try {
            outputSimilarityMatrix.waitForCompletion(true);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }
    return maxSimilaritiesPerItem;
}

From source file:hadoop.api.RecommenderJob.java

License:Apache License

/**
 * Calculate the multiplication of the co-occurrence matrix by the user vectors
 *
 * @param args Information about the input pathpartialMultiply, similarityClassname, maxObservationsPerRow
 * @return 0/*w w w  .jav a  2  s. c om*/
 */
public int multiplication(String[] args, String path1, String path2) {
    try {
        prepareRecommender(args);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Path similarityMatrixPath = new Path(path1);
    Path partialMultiplyPath = new Path(prepPath, "partialMultiply");
    int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
    String usersFile = getOption("usersFile");

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job partialMultiply = null;
        try {
            partialMultiply = new Job(getConf(), "partialMultiply");
        } catch (IOException e) {
            e.printStackTrace();
        }
        Configuration partialMultiplyConf = partialMultiply.getConfiguration();

        MultipleInputs.addInputPath(partialMultiply, similarityMatrixPath, SequenceFileInputFormat.class,
                SimilarityMatrixRowWrapperMapper.class);
        MultipleInputs.addInputPath(partialMultiply, new Path(path2), SequenceFileInputFormat.class,
                UserVectorSplitterMapper.class);
        partialMultiply.setJarByClass(org.apache.mahout.cf.taste.hadoop.item.ToVectorAndPrefReducer.class);
        partialMultiply.setMapOutputKeyClass(VarIntWritable.class);
        partialMultiply.setMapOutputValueClass(VectorOrPrefWritable.class);
        partialMultiply.setReducerClass(ToVectorAndPrefReducer.class);
        partialMultiply.setOutputFormatClass(SequenceFileOutputFormat.class);
        partialMultiply.setOutputKeyClass(VarIntWritable.class);
        partialMultiply.setOutputValueClass(VectorAndPrefsWritable.class);
        partialMultiplyConf.setBoolean("mapred.compress.map.output", true);

        partialMultiplyConf.set("mapred.output.dir", partialMultiplyPath.toString());

        if (usersFile != null) {
            partialMultiplyConf.set(UserVectorSplitterMapper.USERS_FILE, usersFile);
        }
        partialMultiplyConf.setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED, maxPrefsPerUser);

        boolean succeeded = false;
        try {
            succeeded = partialMultiply.waitForCompletion(true);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
        if (!succeeded) {
            return -1;
        }
    }
    return 0;
}