List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:org.mrgeo.hdfs.ingest.format.IngestImageSplittingInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext context) throws IOException { final List<InputSplit> splits = new LinkedList<InputSplit>(); // mapred.input.dir final Path[] inputs = FileInputFormat.getInputPaths(context); final Configuration conf = context.getConfiguration(); int tilesize = -1; try {/* w w w. j a va 2 s . c o m*/ //metadata = HadoopUtils.getMetadata(conf); Map<String, MrsImagePyramidMetadata> meta = HadoopUtils.getMetadata(context.getConfiguration()); if (!meta.isEmpty()) { MrsImagePyramidMetadata metadata = meta.values().iterator().next(); tilesize = metadata.getTilesize(); } } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException(e); } if (tilesize < 0) { tilesize = conf.getInt("tilesize", -1); if (tilesize < 1) { throw new MrsImageException( "Error, no \"tilesize\" or \"metadata\" parameter in configuration, tilesize needs to be calculated & set before map/reduce"); } } final int zoomlevel = conf.getInt("zoomlevel", -1); // get the tilesize in bytes (default to 3 band, 1 byte per band) final long tilebytes = conf.getLong("tilebytes", tilesize * tilesize * 3 * 1); if (zoomlevel < 1) { throw new MrsImageException( "Error, no \"zoomlevel\" parameter in configuration, zoomlevel needs to be calculated & set before map/reduce"); } // get the spill buffer percent, then take 95% of it for extra padding... double spillpct = conf.getFloat("io.sort.spill.percent", (float) 0.8) * 0.95; long spillsize = (long) (conf.getFloat("io.sort.mb", 200) * spillpct) * 1024 * 1024; log.info("Spill size for splitting is: " + spillsize + "b"); Map<String, Bounds> lookup = new HashMap<>(); final String adhocname = conf.get(IngestImageDriver.INGEST_BOUNDS_LOCATION, null); if (adhocname != null) { AdHocDataProvider dp = DataProviderFactory.getAdHocDataProvider(adhocname, DataProviderFactory.AccessMode.READ, conf); InputStream is = dp.get(IngestImageDriver.INGEST_BOUNDS_FILE); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line; while ((line = reader.readLine()) != null) { String[] data = line.split("\\|"); if (data.length == 2) { lookup.put(data[0], Bounds.fromDelimitedString(data[1])); } } is.close(); } //log.info("Creating splits for: " + output.toString()); for (final Path input : inputs) { final FileSystem fs = HadoopFileUtils.getFileSystem(conf, input); LongRectangle bounds = null; if (lookup.containsKey(input.toString())) { Bounds b = lookup.get(input.toString()); bounds = TMSUtils.boundsToTile(b.getTMSBounds(), zoomlevel, tilesize).toLongRectangle(); } else { log.info(" reading: " + input.toString()); log.info(" zoomlevel: " + zoomlevel); final AbstractGridCoverage2DReader reader = GeotoolsRasterUtils.openImage(input.toString()); if (reader != null) { try { bounds = GeotoolsRasterUtils.calculateTiles(reader, tilesize, zoomlevel); } finally { try { GeotoolsRasterUtils.closeStreamFromReader(reader); } catch (Exception e) { e.printStackTrace(); throw new IOException(e); } } } } if (bounds != null) { final long minTx = bounds.getMinX(); final long maxTx = bounds.getMaxX(); final long minTy = bounds.getMinY(); final long maxTy = bounds.getMaxY(); final long width = bounds.getWidth(); final long height = bounds.getHeight(); final long totaltiles = width * height; final FileStatus status = fs.getFileStatus(input); // for now, we'll just use the 1st block location for the split. // we can get more sophisticated later... final BlockLocation[] blocks = fs.getFileBlockLocations(status, 0, 0); String location = null; if (blocks.length > 0) { final String hosts[] = blocks[0].getHosts(); if (hosts.length > 0) { location = hosts[0]; } } // long filelen = status.getLen(); final long totalbytes = totaltiles * tilebytes; // if uncompressed tile sizes are greater than the spillsize, break it // into pieces if (totalbytes > spillsize) { final long numsplits = (totalbytes / spillsize) + 1; final long splitrange = (totaltiles / numsplits); long leftovers = totaltiles - (numsplits * splitrange); long start = 0; long end = 0; for (int i = 0; i < numsplits; i++) { end = start + splitrange; if (leftovers > 0) { end++; leftovers--; } final long sy = (start / width); final long sx = (start - (sy * width)); // since the tile range is inclusive, calculate with end-1 final long ey = ((end - 1) / width); final long ex = ((end - 1) - (ey * width)); // System.out.println("start: " + start + " end: " + end); // System.out.println(" sx: " + sx + " sy: " + sy); // System.out.println(" ex: " + ex + " ey: " + ey); splits.add(new IngestImageSplit(input.toString(), minTx + sx, minTx + ex, minTy + sy, minTy + ey, (end - start), bounds, zoomlevel, tilesize, location)); start = end; } } else { splits.add(new IngestImageSplit(input.toString(), minTx, maxTx, minTy, maxTy, (maxTx + 1 - minTx) * (maxTy + 1 - minTy), bounds, zoomlevel, tilesize, location)); } } } return splits; }
From source file:org.pingles.cascading.cassandra.hadoop.ColumnFamilyRecordWriter.java
License:Apache License
ColumnFamilyRecordWriter(Configuration conf) throws IOException { this.conf = conf; this.ringCache = new RingCache(ConfigHelper.getOutputKeyspace(conf), ConfigHelper.getPartitioner(conf), ConfigHelper.getInitialAddress(conf), ConfigHelper.getRpcPort(conf)); this.queueSize = conf.getInt(ColumnFamilyOutputFormat.QUEUE_SIZE, 32 * Runtime.getRuntime().availableProcessors()); this.clients = new HashMap<Range, RangeClient>(); batchThreshold = conf.getLong(ColumnFamilyOutputFormat.BATCH_THRESHOLD, 32); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getWriteConsistencyLevel(conf)); }
From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraGenJob.java
License:Apache License
static long getNumberOfRows(Configuration job) { return job.getLong(TeraConstants.TERASORT_NUM_ROWS, 0); }
From source file:org.testies.BypassOperations.java
License:Apache License
public FileSKVWriter openWriter(String file, FileSystem fs, Configuration conf, AccumuloConfiguration acuconf) throws IOException { int hrep = conf.getInt("dfs.replication", -1); int trep = acuconf.getCount(Property.TABLE_FILE_REPLICATION); int rep = hrep; if (trep > 0 && trep != hrep) { rep = trep;// w w w . j a v a 2 s . co m } long hblock = conf.getLong("dfs.block.size", 1 << 26); long tblock = acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE); long block = hblock; if (tblock > 0) block = tblock; int bufferSize = conf.getInt("io.file.buffer.size", 4096); long blockSize = acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE); long indexBlockSize = acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX); String compression = acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE); CachableBlockFile.Writer _cbw = new CachableBlockFile.Writer( fs.create(new Path(file), false, bufferSize, (short) rep, block), compression, conf); org.testies.RFile.Writer writer = new RFile.Writer(_cbw, (int) blockSize, (int) indexBlockSize); return writer; }
From source file:org.unigram.likelike.lsh.GetRecommendationsReducer.java
License:Apache License
/** * setup.//from w w w . ja va 2s .c om * * @param context contains Configuration object to get settings */ @Override public final void setup(final Context context) { Configuration jc = null; if (context == null) { jc = new Configuration(); } else { jc = context.getConfiguration(); } this.maxOutputSize = jc.getLong(LikelikeConstants.MAX_OUTPUT_SIZE, LikelikeConstants.DEFAULT_MAX_OUTPUT_SIZE); this.comparator = new Comparator<Object>() { public int compare(final Object o1, final Object o2) { Map.Entry e1 = (Map.Entry) o1; Map.Entry e2 = (Map.Entry) o2; Double e1Value = (Double) e1.getValue(); Double e2Value = (Double) e2.getValue(); return (e2Value.compareTo(e1Value)); } }; // create writer String writerClassName = LikelikeConstants.DEFAULT_LIKELIKE_OUTPUT_WRITER; try { writerClassName = jc.get(LikelikeConstants.LIKELIKE_OUTPUT_WRITER, LikelikeConstants.DEFAULT_LIKELIKE_OUTPUT_WRITER); Class<? extends IWriter> extractorClass = Class.forName(writerClassName).asSubclass(IWriter.class); Constructor<? extends IWriter> constructor = extractorClass.getConstructor(Configuration.class); this.writer = constructor.newInstance(jc); } catch (NoSuchMethodException nsme) { throw new RuntimeException(nsme); } catch (ClassNotFoundException cnfe) { throw new RuntimeException(cnfe); } catch (InstantiationException ie) { throw new RuntimeException(ie); } catch (IllegalAccessException iae) { throw new RuntimeException(iae); } catch (InvocationTargetException ite) { throw new RuntimeException(ite.getCause()); } }
From source file:org.unigram.likelike.lsh.LSHRecommendations.java
License:Apache License
/** * Add the configuration information from the result of * extract candidates to conf.// w w w. ja v a 2 s. c o m * * @param counters contains counter * @param conf configuration */ protected void setResultConf(final Counters counters, final Configuration conf) { conf.setLong(LikelikeConstants.LIKELIKE_INPUT_RECORDS, counters.findCounter(LikelikeConstants.COUNTER_GROUP, "MAP_INPUT_RECORDS").getValue()); this.logger .logInfo("The number of record is " + conf.getLong(LikelikeConstants.LIKELIKE_INPUT_RECORDS, -1)); }
From source file:org.unigram.likelike.lsh.SelectClustersReducer.java
License:Apache License
/** * setup./*from ww w .ja v a 2 s. c o m*/ * @param context - */ @Override public final void setup(final Context context) { Configuration jc = context.getConfiguration(); if (context == null || jc == null) { jc = new Configuration(); } this.maximumClusterSize = jc.getLong(LikelikeConstants.MAX_CLUSTER_SIZE, LikelikeConstants.DEFAULT_MAX_CLUSTER_SIZE); this.minimumClusterSize = jc.getLong(LikelikeConstants.MIN_CLUSTER_SIZE, LikelikeConstants.DEFAULT_MIN_CLUSTER_SIZE); }
From source file:org.voltdb.hadoop.VoltConfiguration.java
License:Open Source License
/** * Reads volt specific configuration parameters from the * given {@linkplain JobConf} job configuration * * @param conf job configuration// www. j ava 2s . co m */ public VoltConfiguration(Configuration conf) { this(new Config(conf.get(TABLENAME_PROP), conf.getStrings(HOSTNAMES_PROP, new String[] {}), conf.get(USERNAME_PROP), conf.get(PASSWORD_PROP), conf.getInt(BATCHSIZE_PROP, BATCHSIZE_DFLT), conf.getLong(CLIENT_TIMEOUT_PROP, TIMEOUT_DFLT), conf.getInt(BULKLOADER_MAX_ERRORS_PROP, FaultCollector.MAXFAULTS), conf.getBoolean(BULKLOADER_UPSERT_PROP, false))); }
From source file:parquet.hadoop.ParquetInputFormat.java
License:Apache License
/** * @param configuration the configuration to connect to the file system * @param footers the footers of the files to read * @return the splits for the footers/*w w w .j a v a2s . c om*/ * @throws IOException */ public List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> footers) throws IOException { final long maxSplitSize = configuration.getLong("mapred.max.split.size", Long.MAX_VALUE); final long minSplitSize = Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L)); if (maxSplitSize < 0 || minSplitSize < 0) { throw new ParquetDecodingException("maxSplitSize or minSplitSie should not be negative: maxSplitSize = " + maxSplitSize + "; minSplitSize = " + minSplitSize); } List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>(); GlobalMetaData globalMetaData = ParquetFileWriter.getGlobalMetaData(footers, configuration.getBoolean(STRICT_TYPE_CHECKING, true)); ReadContext readContext = getReadSupport(configuration).init( new InitContext(configuration, globalMetaData.getKeyValueMetaData(), globalMetaData.getSchema())); for (Footer footer : footers) { final Path file = footer.getFile(); LOG.debug(file); FileSystem fs = file.getFileSystem(configuration); FileStatus fileStatus = fs.getFileStatus(file); ParquetMetadata parquetMetaData = footer.getParquetMetadata(); List<BlockMetaData> blocks = parquetMetaData.getBlocks(); BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); splits.addAll(generateSplits(blocks, fileBlockLocations, fileStatus, parquetMetaData.getFileMetaData(), readContext.getRequestedSchema().toString(), readContext.getReadSupportMetadata(), minSplitSize, maxSplitSize)); } return splits; }
From source file:simsql.code_generator.MyPhysicalDatabase.java
License:Apache License
public TableStatistics<Record> loadTable(Relation loadMe, String inputFile, RuntimeParameter params, String sortAtt) throws IOException { // get the params. ExampleRuntimeParameter pp = (ExampleRuntimeParameter) params; Configuration conf = new Configuration(); // get the DFS block size long dfsBlockSize = conf.getLong("dfs.blocksize", 1024 * 1024 * 128); // get the filesize and check. long fSize = inputFile.startsWith("hdfs:") ? Long.MAX_VALUE : inputFile.length(); DataLoader dl;//from w w w. j a va 2 s . com // get the position of the sorting attribute. int sortAttPos = -1; if (sortAtt != null) { int i = 0; for (simsql.compiler.Attribute a : loadMe.getAttributes()) { if (a.getName().equalsIgnoreCase(sortAtt)) { sortAttPos = i; break; } i++; } } // straight loader if (1) small relation; (2) not sorted and (3) not from HDFS. if (fSize < dfsBlockSize && sortAttPos == -1 && !inputFile.startsWith("hdfs:")) { dl = new HDFSLoader(); } else { dl = new MRLoader(); ((MRLoader) dl).setNumTasks(pp.getNumCPUs()); } long newSize = dl.run(inputFile, getFileName(loadMe.getName()), getTypeCode(loadMe.getName()), loadMe, sortAttPos); System.out.println("Done! Loaded " + newSize + " raw bytes."); // save the file size. physicalInfo.get(loadMe.getName()).setSizeInBytes(newSize); // save the sorting attribute. physicalInfo.get(loadMe.getName()).setSortingAtt(sortAttPos); // set the num. atts physicalInfo.get(loadMe.getName()).setNumAtts(loadMe.getAttributes().size()); // get the stats. try { HDFSTableStats coll = new HDFSTableStats(); coll.load(getFileName(loadMe.getName())); return new TableStatisticsWrapper(coll); } catch (Exception e) { e.printStackTrace(); return null; } }