List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:format.OverlapLengthInputFormat.java
License:Apache License
/** * Set the overlapping portion of adjacent records * @param conf configuration/*from www. ja v a 2s .c om*/ * @param overlapLength the overlapping portion of adjacent records **/ public static void setOverlapLength(Configuration conf, int recordLength) { conf.setInt(OVERLAP_LENGTH, recordLength); }
From source file:FormatStorage.Head.java
License:Open Source License
public void toJobConf(Configuration conf) { conf.setInt(ConstVar.HD_var, var); conf.setInt(ConstVar.HD_compress, compress); conf.setInt(ConstVar.HD_compressStyle, compressStyle); conf.setInt(ConstVar.HD_primaryIndex, primaryIndex); conf.setInt(ConstVar.HD_encode, encode); conf.setInt(ConstVar.HD_encodeStyle, encodeStyle); if (key != null) { conf.set(ConstVar.HD_key, key);// w w w. ja va 2s . c om } if (fieldMap != null) { short fieldNum = fieldMap.fieldNum(); String[] fieldStrings = new String[fieldNum]; Set<Short> keySet = fieldMap.fields.keySet(); Iterator<Short> iterator = keySet.iterator(); int i = 0; while (iterator.hasNext()) { Field field = fieldMap.fields.get(iterator.next()); fieldStrings[i++] = field.type + ConstVar.RecordSplit + field.len + ConstVar.RecordSplit + field.index; } conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); } }
From source file:FormatStorage1.IHead.java
License:Open Source License
public void toJobConf(Configuration conf) { conf.setInt(ConstVar.HD_magic, magic); conf.setInt(ConstVar.HD_var, var); conf.setInt(ConstVar.HD_ver, ver);/* w ww. jav a 2 s .co m*/ conf.setInt(ConstVar.HD_lineindex, lineindex); conf.setInt(ConstVar.HD_primaryIndex, primaryIndex); conf.setInt(ConstVar.HD_compress, compress); conf.setInt(ConstVar.HD_compressStyle, compressStyle); conf.setInt(ConstVar.HD_encode, encode); conf.setInt(ConstVar.HD_encodeStyle, encodeStyle); if (fieldMap != null) { int fieldNum = fieldMap.fieldtypes().size(); String[] fieldStrings = new String[fieldNum]; int i = 0; for (IRecord.IFType ft : this.fieldMap.fieldtypes().values()) { fieldStrings[i++] = ft.type() + ConstVar.RecordSplit + ft.len() + ConstVar.RecordSplit + ft.idx(); } conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); } if (udi != null && udi.infos.size() > 0) { String[] udistrs = new String[udi.infos.size()]; int i = 0; for (Map.Entry<Integer, String> en : udi.infos.entrySet()) { udistrs[i++] = en.getKey() + ConstVar.RecordSplit + en.getValue(); } conf.setStrings(ConstVar.HD_udi, udistrs); } }
From source file:gobblin.cluster.GobblinHelixTaskTest.java
License:Apache License
@BeforeClass public void setUp() throws IOException { Configuration configuration = new Configuration(); configuration.setInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY, 1); this.taskExecutor = new TaskExecutor(configuration); this.helixManager = Mockito.mock(HelixManager.class); Mockito.when(this.helixManager.getInstanceName()).thenReturn(GobblinHelixTaskTest.class.getSimpleName()); this.taskStateTracker = new GobblinHelixTaskStateTracker(new Properties(), this.helixManager); this.localFs = FileSystem.getLocal(configuration); this.appWorkDir = new Path(GobblinHelixTaskTest.class.getSimpleName()); this.taskOutputDir = new Path(this.appWorkDir, "output"); }
From source file:gr.ntua.h2rdf.byteImport.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];/*w w w . jav a2 s.c o m*/ Job job = null; try { Configuration conf = new Configuration(); conf.addResource("hbase-default.xml"); conf.addResource("hbase-site.xml"); job = new Job(conf, NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class);//sampler.HamaReducer.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } // c.addResource(new Path("/0/arcomemDB/hadoop-0.20.2-cdh3u3/conf/hbase-site.xml")); HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:gr.ntua.h2rdf.loadTriples.SortIds.java
License:Apache License
public static void loadHFiles(String[] args) throws Exception { Configuration conf = new Configuration(); HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path(args[1]); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //}// w w w. j a v a 2 s .co m FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; for (FileStatus stat : familyDirStatuses) { if (!stat.isDir()) { continue; } Path familyDir = stat.getPath(); // Skip _logs, etc if (familyDir.getName().startsWith("_")) continue; //byte[] family = familyDir.getName().getBytes(); Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } splits[length] = first.clone(); length++; } } //System.out.println(length); byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); //HTableDescriptor desc = new HTableDescriptor("H2RDF"); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_Index"); HColumnDescriptor family = new HColumnDescriptor("1"); family.setCompressionType(Algorithm.GZ); desc.addFamily(family); HColumnDescriptor family2 = new HColumnDescriptor("2"); family2.setCompressionType(Algorithm.GZ); desc.addFamily(family2); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_Index")) { //hadmin.disableTable(TABLE_NAME); //hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = args[1]; args1[1] = TABLE_NAME + "_Index"; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java
License:Apache License
private void loadHFiles() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.addResource("hbase-default.xml"); conf.addResource("hbase-site.xml"); HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path("out/I"); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //}//from w w w.ja v a 2 s. com // FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(hfofDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } //System.out.println("out/I/"+hfile.getName()+" \t "+Bytes.toStringBinary(first)); splits[length] = first.clone(); length++; } byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("I"); family.setCompressionType(Algorithm.SNAPPY); desc.addFamily(family); family = new HColumnDescriptor("S"); family.setCompressionType(Algorithm.SNAPPY); desc.addFamily(family); family = new HColumnDescriptor("T"); family.setCompressionType(Algorithm.SNAPPY); desc.addFamily(family); //family= new HColumnDescriptor("C"); //desc.addFamily(family); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME)) { //hadmin.disableTable(TABLE_NAME); //hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = "out"; args1[1] = TABLE_NAME; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:gr.ntua.h2rdf.partialJoin.JoinPlaner.java
License:Open Source License
private static String[] printJoinV(Configuration joinConf, String ret) { HashMap<String, Integer> varSet = new HashMap<String, Integer>(); String[] lines = new String[join_files.length]; for (int i = 0; i < join_files.length; i++) { lines[i] = "{"; }/*from w w w . j av a 2 s . c om*/ //String joinpat = query.getResultVars().get(query.getResultVars().size()-1).toString(); //try { joinConf.set("input.joinvars", ret); //v.writeBytes(ret+"\n"); String patId = ""; for (int i = 0; i < join_files.length; i++) { String jf = join_files[i]; //if(!varSet.contains(join_files_vars[i])){ StringTokenizer t = new StringTokenizer(join_files_vars2[i]); while (t.hasMoreTokens()) { String s = t.nextToken(); if (!ret.contains(s)) { if (!varSet.containsKey(s)) { varSet.put(s, 1); } else { Integer temp = varSet.get(s); temp++; varSet.put(s, temp); } } } if (jf.contains("BGP:") && joinpat.contains(jf.split("BGP:")[1])) { String id = ""; if (jf.contains("BGP:")) { id += "P"; id += jf.split("BGP:")[1]; lines[i] = join_files_vars[i] + "{" + jf; } else { id += "J"; id += jf.split("_")[2]; lines[i] += jf; } patId += id + " " + join_files_vars[i] + " $$ "; //v.writeBytes(id+" "+join_files_vars[i]+" $$ "); } else { String id = ""; if (jf.contains("BGP:")) { lines[i] = join_files_vars[i] + "|" + jf; } else { id += "J"; id += jf.split("_")[2]; lines[i] += jf; patId += id + " " + join_files_vars[i] + " $$ "; //v.writeBytes(id+" "+join_files_vars[i]+" $$ "); } } } joinConf.set("input.patId", patId); Iterator<String> it = varSet.keySet().iterator(); int c = 0; while (it.hasNext()) { String s = it.next(); int snum = varSet.get(s); if (snum >= 2) { c++; joinConf.set("input.double" + c, s); joinConf.setInt("input.double" + c + ".num", snum); } } joinConf.setInt("input.double", c); //v.writeBytes("\n"); /*} catch (IOException e) { e.printStackTrace(); }*/ return lines; }
From source file:hadoop.api.RecommenderJob.java
License:Apache License
/** * Calculate the co-occurrence matrix//w ww. j av a2 s .c o m * * @param args Information about the input path, numberOfColumns, similarityClassname, maxObservationsPerRow * @param numberOfUsers Number of Users * @return Similarities Per Item */ public int rowSimilarity(String[] args, int numberOfUsers) { try { prepareRecommender(args); } catch (IOException e) { e.printStackTrace(); } try { numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf()); } catch (IOException e) { e.printStackTrace(); } int maxPrefsInItemSimilarity = Integer.parseInt(getOption("maxPrefsInItemSimilarity")); int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem")); String similarityClassname = getOption("similarityClassname"); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD; long randomSeed = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed")) : RowSimilarityJob.NO_FIXED_RANDOM_SEED; try { ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output", new Path(prepPath, "similarityMatrix").toUri().toString(), "--numberOfColumns", String.valueOf(numberOfUsers), "--similarityClassname", similarityClassname, "--maxObservationsPerRow", String.valueOf(maxPrefsInItemSimilarity), "--maxObservationsPerColumn", String.valueOf(maxPrefsInItemSimilarity), "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed", String.valueOf(randomSeed), "--tempDir", prepPath.toString() }); } catch (Exception e) { e.printStackTrace(); } // write out the similarity matrix if the user specified that behavior if (hasOption("outputPathForSimilarityMatrix")) { Path outputPathForSimilarityMatrix = new Path(getOption("outputPathForSimilarityMatrix")); Job outputSimilarityMatrix = null; try { outputSimilarityMatrix = prepareJob(getTempPath("similarityMatrix"), outputPathForSimilarityMatrix, SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class); } catch (IOException e) { e.printStackTrace(); } Configuration mostSimilarItemsConf = outputSimilarityMatrix.getConfiguration(); mostSimilarItemsConf.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR, new Path(getTempPath(DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.ITEMID_INDEX) .toString()); mostSimilarItemsConf.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, maxSimilaritiesPerItem); try { outputSimilarityMatrix.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } return maxSimilaritiesPerItem; }
From source file:hadoop.api.RecommenderJob.java
License:Apache License
/** * Calculate the multiplication of the co-occurrence matrix by the user vectors * * @param args Information about the input pathpartialMultiply, similarityClassname, maxObservationsPerRow * @return 0/*w w w .jav a 2 s. c om*/ */ public int multiplication(String[] args, String path1, String path2) { try { prepareRecommender(args); } catch (IOException e) { e.printStackTrace(); } Path similarityMatrixPath = new Path(path1); Path partialMultiplyPath = new Path(prepPath, "partialMultiply"); int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser")); String usersFile = getOption("usersFile"); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job partialMultiply = null; try { partialMultiply = new Job(getConf(), "partialMultiply"); } catch (IOException e) { e.printStackTrace(); } Configuration partialMultiplyConf = partialMultiply.getConfiguration(); MultipleInputs.addInputPath(partialMultiply, similarityMatrixPath, SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class); MultipleInputs.addInputPath(partialMultiply, new Path(path2), SequenceFileInputFormat.class, UserVectorSplitterMapper.class); partialMultiply.setJarByClass(org.apache.mahout.cf.taste.hadoop.item.ToVectorAndPrefReducer.class); partialMultiply.setMapOutputKeyClass(VarIntWritable.class); partialMultiply.setMapOutputValueClass(VectorOrPrefWritable.class); partialMultiply.setReducerClass(ToVectorAndPrefReducer.class); partialMultiply.setOutputFormatClass(SequenceFileOutputFormat.class); partialMultiply.setOutputKeyClass(VarIntWritable.class); partialMultiply.setOutputValueClass(VectorAndPrefsWritable.class); partialMultiplyConf.setBoolean("mapred.compress.map.output", true); partialMultiplyConf.set("mapred.output.dir", partialMultiplyPath.toString()); if (usersFile != null) { partialMultiplyConf.set(UserVectorSplitterMapper.USERS_FILE, usersFile); } partialMultiplyConf.setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED, maxPrefsPerUser); boolean succeeded = false; try { succeeded = partialMultiply.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } if (!succeeded) { return -1; } } return 0; }