List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
@Override public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException { Path path = new Path(fname); FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); FSDataOutputStream writer = fs.create(path); writer.writeBytes("1 1 0"); writer.close();/* www . ja v a2 s . c o m*/ }
From source file:com.ibm.bi.dml.runtime.matrix.DataGenMR.java
License:Open Source License
/** * <p>Starts a Rand MapReduce job which will produce one or more random objects.</p> * /*from w ww .ja v a 2 s . c o m*/ * @param numRows number of rows for each random object * @param numCols number of columns for each random object * @param blockRowSize number of rows in a block for each random object * @param blockColSize number of columns in a block for each random object * @param minValue minimum of the random values for each random object * @param maxValue maximum of the random values for each random object * @param sparsity sparsity for each random object * @param pdf probability density function for each random object * @param replication file replication * @param inputs input file for each random object * @param outputs output file for each random object * @param outputInfos output information for each random object * @param instructionsInMapper instruction for each random object * @param resultIndexes result indexes for each random object * @return matrix characteristics for each random object * @throws Exception if an error occurred in the MapReduce phase */ public static JobReturn runJob(MRJobInstruction inst, String[] dataGenInstructions, String instructionsInMapper, String aggInstructionsInReducer, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String dimsUnknownFilePrefix, String[] outputs, OutputInfo[] outputInfos) throws Exception { JobConf job = new JobConf(DataGenMR.class); job.setJobName("DataGen-MR"); //whether use block representation or cell representation MRJobConfiguration.setMatrixValueClass(job, true); byte[] realIndexes = new byte[dataGenInstructions.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b; String[] inputs = new String[dataGenInstructions.length]; InputInfo[] inputInfos = new InputInfo[dataGenInstructions.length]; long[] rlens = new long[dataGenInstructions.length]; long[] clens = new long[dataGenInstructions.length]; int[] brlens = new int[dataGenInstructions.length]; int[] bclens = new int[dataGenInstructions.length]; FileSystem fs = FileSystem.get(job); String dataGenInsStr = ""; int numblocks = 0; int maxbrlen = -1, maxbclen = -1; double maxsparsity = -1; for (int i = 0; i < dataGenInstructions.length; i++) { dataGenInsStr = dataGenInsStr + Lop.INSTRUCTION_DELIMITOR + dataGenInstructions[i]; MRInstruction mrins = MRInstructionParser.parseSingleInstruction(dataGenInstructions[i]); MRINSTRUCTION_TYPE mrtype = mrins.getMRInstructionType(); DataGenMRInstruction genInst = (DataGenMRInstruction) mrins; rlens[i] = genInst.getRows(); clens[i] = genInst.getCols(); brlens[i] = genInst.getRowsInBlock(); bclens[i] = genInst.getColsInBlock(); maxbrlen = Math.max(maxbrlen, brlens[i]); maxbclen = Math.max(maxbclen, bclens[i]); if (mrtype == MRINSTRUCTION_TYPE.Rand) { RandInstruction randInst = (RandInstruction) mrins; inputs[i] = genInst.getBaseDir() + "tmp" + _seqRandInput.getNextID() + ".randinput"; maxsparsity = Math.max(maxsparsity, randInst.getSparsity()); FSDataOutputStream fsOut = fs.create(new Path(inputs[i])); PrintWriter pw = new PrintWriter(fsOut); //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); //seed generation Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(randInst.getSeed()); long[] nnz = LibMatrixDatagen.computeNNZperBlock(rlens[i], clens[i], brlens[i], bclens[i], randInst.getSparsity()); int nnzIx = 0; for (long r = 0; r < rlens[i]; r += brlens[i]) { long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r)); for (long c = 0; c < clens[i]; c += bclens[i]) { long curBlockColSize = Math.min(bclens[i], (clens[i] - c)); sb.append((r / brlens[i]) + 1); sb.append(','); sb.append((c / bclens[i]) + 1); sb.append(','); sb.append(curBlockRowSize); sb.append(','); sb.append(curBlockColSize); sb.append(','); sb.append(nnz[nnzIx++]); sb.append(','); sb.append(bigrand.nextLong()); pw.println(sb.toString()); sb.setLength(0); numblocks++; } } pw.close(); fsOut.close(); inputInfos[i] = InputInfo.TextCellInputInfo; } else if (mrtype == MRINSTRUCTION_TYPE.Seq) { SeqInstruction seqInst = (SeqInstruction) mrins; inputs[i] = genInst.getBaseDir() + System.currentTimeMillis() + ".seqinput"; maxsparsity = 1.0; //always dense double from = seqInst.fromValue; double to = seqInst.toValue; double incr = seqInst.incrValue; // Correctness checks on (from, to, incr) boolean neg = (from > to); if (incr == 0) throw new DMLRuntimeException("Invalid value for \"increment\" in seq()."); if (neg != (incr < 0)) throw new DMLRuntimeException("Wrong sign for the increment in a call to seq()"); // Compute the number of rows in the sequence long numrows = 1 + (long) Math.floor((to - from) / incr); if (rlens[i] > 0) { if (numrows != rlens[i]) throw new DMLRuntimeException( "Unexpected error while processing sequence instruction. Expected number of rows does not match given number: " + rlens[i] + " != " + numrows); } else { rlens[i] = numrows; } if (clens[i] > 0 && clens[i] != 1) throw new DMLRuntimeException( "Unexpected error while processing sequence instruction. Number of columns (" + clens[i] + ") must be equal to 1."); else clens[i] = 1; FSDataOutputStream fsOut = fs.create(new Path(inputs[i])); PrintWriter pw = new PrintWriter(fsOut); StringBuilder sb = new StringBuilder(); double temp = from; double block_from, block_to; for (long r = 0; r < rlens[i]; r += brlens[i]) { long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r)); // block (bid_i,bid_j) generates a sequence from the interval [block_from, block_to] (inclusive of both end points of the interval) long bid_i = ((r / brlens[i]) + 1); long bid_j = 1; block_from = temp; block_to = temp + (curBlockRowSize - 1) * incr; temp = block_to + incr; // next block starts from here sb.append(bid_i); sb.append(','); sb.append(bid_j); sb.append(','); /* // Need not include block size while generating seq() sb.append(curBlockRowSize); sb.append(','); sb.append(1); sb.append(',');*/ sb.append(block_from); sb.append(','); sb.append(block_to); sb.append(','); sb.append(incr); pw.println(sb.toString()); //System.out.println("MapTask " + r + ": " + sb.toString()); sb.setLength(0); numblocks++; } pw.close(); fsOut.close(); inputInfos[i] = InputInfo.TextCellInputInfo; } else { throw new DMLRuntimeException("Unexpected Data Generation Instruction Type: " + mrtype); } } dataGenInsStr = dataGenInsStr.substring(1);//remove the first "," RunningJob runjob; MatrixCharacteristics[] stats; try { //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.BLOCK); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens); MRJobConfiguration.setDimsUnknownFilePrefix(job, dimsUnknownFilePrefix); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up the rand Instructions MRJobConfiguration.setRandInstructions(job, dataGenInsStr); //set up unary instructions that will perform in the mapper MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer); //set up the instructions that will happen in the reducer, after the aggregation instrucions MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer); //set up the replication factor for the results job.setInt("dfs.replication", replication); //set up map/reduce memory configurations (if in AM context) DMLConfig config = ConfigurationManager.getConfig(); DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config); //determine degree of parallelism (nmappers: 1<=n<=capacity) //TODO use maxsparsity whenever we have a way of generating sparse rand data int capacity = InfrastructureAnalyzer.getRemoteParallelMapTasks(); long dfsblocksize = InfrastructureAnalyzer.getHDFSBlockSize(); //correction max number of mappers on yarn clusters if (InfrastructureAnalyzer.isYarnEnabled()) capacity = (int) Math.max(capacity, YarnClusterAnalyzer.getNumCores()); int nmapers = Math .max(Math.min((int) (8 * maxbrlen * maxbclen * (long) numblocks / dfsblocksize), capacity), 1); job.setNumMapTasks(nmapers); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, otherInstructionsInReducer, resultIndexes); MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false); stats = ret.stats; //set up the number of reducers MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers); // print the complete MRJob instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats); // Update resultDimsUnknown based on computed "stats" byte[] resultDimsUnknown = new byte[resultIndexes.length]; for (int i = 0; i < resultIndexes.length; i++) { if (stats[i].getRows() == -1 || stats[i].getCols() == -1) { resultDimsUnknown[i] = (byte) 1; } else { resultDimsUnknown[i] = (byte) 0; } } boolean mayContainCtable = instructionsInMapper.contains("ctabletransform") || instructionsInMapper.contains("groupedagg"); //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, mayContainCtable); // configure mapper and the mapper output key value pairs job.setMapperClass(DataGenMapper.class); if (numReducers == 0) { job.setMapOutputKeyClass(Writable.class); job.setMapOutputValueClass(Writable.class); } else { job.setMapOutputKeyClass(MatrixIndexes.class); job.setMapOutputValueClass(TaggedMatrixBlock.class); } //set up combiner if (numReducers != 0 && aggInstructionsInReducer != null && !aggInstructionsInReducer.isEmpty()) job.setCombinerClass(GMRCombiner.class); //configure reducer job.setReducerClass(GMRReducer.class); //job.setReducerClass(PassThroughReducer.class); // By default, the job executes in "cluster" mode. // Determine if we can optimize and run it in "local" mode. MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]); } //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); runjob = JobClient.runJob(job); /* Process different counters */ Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS); for (int i = 0; i < resultIndexes.length; i++) { // number of non-zeros stats[i].setNonZeros(group.getCounter(Integer.toString(i))); } String dir = dimsUnknownFilePrefix + "/" + runjob.getID().toString() + "_dimsFile"; stats = MapReduceTool.processDimsFiles(dir, stats); MapReduceTool.deleteFileIfExistOnHDFS(dir); } finally { for (String input : inputs) MapReduceTool.deleteFileIfExistOnHDFS(new Path(input), job); } return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>//from ww w . j ava2s.c o m * Generates a test matrix with the specified parameters and writes it to a * file using the text format. * </p> * <p> * Set seed to -1 to use the current time as seed. * </p> * * @param file * output file * @param rows * number of rows * @param cols * number of columns * @param min * minimum value * @param max * maximum value * @param sparsity * sparsity * @param seed * seed */ public static void generateTestMatrixToFile(String file, int rows, int cols, double min, double max, double sparsity, long seed) { try { FileSystem fs = FileSystem.get(conf); Path inFile = new Path(file); DataOutputStream out = fs.create(inFile); PrintWriter pw = new PrintWriter(out); Random random; if (seed == -1) random = TestUtils.random; else random = new Random(seed); for (int i = 1; i <= rows; i++) { for (int j = 1; j <= cols; j++) { if (random.nextDouble() > sparsity) continue; double value = (random.nextDouble() * (max - min) + min); if (value != 0) pw.println(i + " " + j + " " + value); } } pw.close(); out.close(); } catch (IOException e) { fail("unable to write test matrix: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>//from www . j a v a2s .c o m * Creates an empty file. * </p> * * @param file * filename */ public static void createFile(String filename) throws IOException { FileSystem fs = FileSystem.get(conf); fs.create(new Path(filename)); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void writeSequentialHeap() throws Exception { System.out.println("writing sequential file in heap mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FSDataOutputStream instream = fs.create(path); byte[] buf = new byte[size]; double sumbytes = 0; double ops = 0; System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { // System.out.println("writing data, len " + buf.length); instream.write(buf, 0, buf.length); sumbytes = sumbytes + buf.length; ops = ops + 1.0;/* w w w . ja va 2 s . co m*/ } instream.flush(); long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void createFile() throws Exception, InterruptedException { System.out.println("create file async hdfs, path " + path + ", size " + size + ", loop " + loop); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); int repfactor = 4; for (int k = 0; k < repfactor; k++) { LinkedBlockingQueue<Path> pathQueue = new LinkedBlockingQueue<Path>(); fs.mkdirs(path);/*from w w w . j a v a 2 s . c om*/ for (int i = 0; i < loop * size; i++) { String name = "" + i; Path f = new Path(path, name); pathQueue.add(f); } LinkedBlockingQueue<FSDataOutputStream> streamQueue = new LinkedBlockingQueue<FSDataOutputStream>(); long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { //single operation == loop for (int j = 0; j < loop; j++) { Path path = pathQueue.poll(); fs.create(path).close(); } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) size); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); while (!streamQueue.isEmpty()) { FSDataOutputStream stream = streamQueue.poll(); stream.close(); } if (k < repfactor - 1) { fs.delete(path, true); Thread.sleep(2000); } } fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void keyGet() throws Exception { System.out.println("key get, path " + path + ", size " + size + ", loop " + loop); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path[] paths = new Path[loop]; for (int i = 0; i < loop; i++) { String child = "" + i; paths[i] = new Path(path, child); System.out.println("path " + paths[i]); }//from w w w . j a v a 2 s .c o m byte[] outBuf = new byte[size]; for (Path p : paths) { FSDataOutputStream outputStream = fs.create(p); outputStream.write(outBuf); outputStream.close(); } long start = System.currentTimeMillis(); ByteBuffer inBuf = ByteBuffer.allocateDirect(size); for (int i = 0; i < loop; i++) { Path p = paths[i]; FSDataInputStream inputStream = fs.open(p); inBuf.clear(); while (inBuf.remaining() > 0) { inputStream.read(inBuf); } inputStream.close(); } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) loop); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); fs.close(); }
From source file:com.ibm.stocator.fs.swift2d.systemtests.StreamingSwiftTest.java
License:Open Source License
@Test public void accessObjectWithSpaceTest() throws Exception { FileSystem fs = new ObjectStoreFileSystem(); Configuration conf = new Configuration(); String uriString = conf.get("fs.swift2d.test.uri"); Assume.assumeNotNull(uriString);// w w w .j a va 2 s . c o m // adding suffix with space to the container name String scheme = "swift2d"; String objectName = "/a/testObject.txt"; URI publicContainerURI = new URI(uriString + objectName); // initialize file system fs.initialize(publicContainerURI, conf); FileStatus objectFS = null; Path f = null; try { FSDataOutputStream fsDataOutputStream = null; String currObjName = null; for (int i = 0; i < 5; i++) { currObjName = objectName + String.valueOf(i); // create timer createObjectTimer(90000.0, currObjName); publicContainerURI = new URI(scheme + "://" + getHost(URI.create(uriString)) + "/" + currObjName); f = new Path(publicContainerURI.toString()); fsDataOutputStream = fs.create(f); String line = null; while (!objectExpired) { // generates input byte[] bytes = new byte[0]; line = "\"2017-7-15 3:6:43\"," + String.valueOf(Math.random()) + ",6,18" + "\n"; ByteBuffer linesBB = ByteBuffer.wrap(line.getBytes()); bytes = new byte[linesBB.limit()]; linesBB.get(bytes); // writes to output fsDataOutputStream.write(bytes); // simulate delays in input Thread.sleep(50); } fsDataOutputStream.close(); objectExpired = false; } } catch (Exception e) { e.printStackTrace(); Assert.assertNotNull("Unable to access public object.", objectFS); } finally { fs.delete(f, true); } }
From source file:com.idvp.platform.hdfs.HDFSDataStream.java
License:Apache License
protected void doOpen(Configuration conf, Path dstPath, FileSystem hdfs) throws IOException { boolean appending = false; if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { outStream = hdfs.append(dstPath); appending = true;/*w ww .j a v a2 s . c o m*/ } else { outStream = hdfs.create(dstPath); } serializer = new BodyTextEventSerializer.Builder().build(outStream); if (appending && !serializer.supportsReopen()) { outStream.close(); serializer = null; throw new IOException("serializer (" + "TEXT" + ") does not support append"); } // must call superclass to check for replication issues registerCurrentStream(outStream, hdfs, dstPath); if (appending) { serializer.afterReopen(); } else { serializer.afterCreate(); } }
From source file:com.inmobi.conduit.CompressedFileReaderTest.java
License:Apache License
private void uncompress(String fileName) throws Exception { Configuration conf = new Configuration(); FileSystem fs; fs = FileSystem.getLocal(conf); CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(new Path(fileName)); if (codec == null) { System.out.println("cant find codec"); System.exit(1);// w w w . java 2 s . c o m } LOG.info("Using compression codec [" + codec.toString() + "]"); CompressionInputStream is = codec.createInputStream(fs.open(new Path(fileName))); OutputStream out = null; try { String outputURI = CompressionCodecFactory.removeSuffix(fileName, codec.getDefaultExtension()); out = fs.create(new Path(outputURI + "-uncompressed")); org.apache.hadoop.io.IOUtils.copyBytes(is, out, conf); } finally { org.apache.hadoop.io.IOUtils.closeStream(out); IOUtils.closeStream(is); } }