List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.skp.experiment.cf.als.hadoop.SolveImplicitFeedbackMultithreadedMapper.java
License:Apache License
/** create file lock per each datanode to prevent too many map task simultaneously * runs on same datanode *//* w ww.jav a 2s.co m*/ private void checkLock(Context ctx, int lockNums) throws InterruptedException, IOException { InetAddress thisIp = InetAddress.getLocalHost(); String hostIp = thisIp.getHostAddress(); // busy wait Configuration conf = ctx.getConfiguration(); long totalSleep = 0; boolean haveLock = false; FileSystem fs = FileSystem.get(conf); while (haveLock == false) { for (int i = 0; i < lockNums; i++) { Path checkPath = new Path(lockPath, hostIp + "_" + i); if (fs.exists(checkPath) == false) { haveLock = true; currentLockPath = checkPath; BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(currentLockPath))); br.write(ctx.getTaskAttemptID().toString()); break; } } if (haveLock == false) { Random random = new Random(); int diff = 1000 + random.nextInt(1000) % 1000; totalSleep += diff + sleepPeriod; ctx.setStatus("sleeping: " + String.valueOf(totalSleep)); Thread.sleep(sleepPeriod + diff); } } }
From source file:com.splicemachine.derby.stream.function.RowKeyGenerator.java
License:Open Source License
/** * Sort keys and output in HBase escaped string format * @throws IOException/* www . ja v a2 s . c om*/ */ private void outputKeys() throws IOException { BufferedWriter br = null; try { Configuration conf = HConfiguration.unwrapDelegate(); FileSystem fs = FileSystem.get(URI.create(bulkImportDirectory), conf); Collections.sort(keys, new Comparator<byte[]>() { @Override public int compare(byte[] o1, byte[] o2) { return Bytes.compareTo(o1, o2); } }); long conglom = indexConglom == -1 ? heapConglom : indexConglom; Path path = new Path(bulkImportDirectory, new Long(conglom).toString()); Path outFile = new Path(path, "keys"); FSDataOutputStream os = fs.create(outFile); br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); for (byte[] key : keys) { br.write(Bytes.toStringBinary(key) + "\n"); } br.close(); fileNames.add(outFile.toString()); } finally { if (br != null) br.close(); } }
From source file:com.splicemachine.derby.stream.spark.SparkHBaseBulkImport.java
License:Open Source License
/** * Output cut points to files/*from w w w .j av a 2s. c om*/ * @param cutPointsList * @throws IOException */ private void dumpCutPoints(List<Tuple2<Long, byte[][]>> cutPointsList) throws StandardException { BufferedWriter br = null; try { Configuration conf = HConfiguration.unwrapDelegate(); FileSystem fs = FileSystem.get(URI.create(bulkImportDirectory), conf); for (Tuple2<Long, byte[][]> t : cutPointsList) { Long conglomId = t._1; Path path = new Path(bulkImportDirectory, conglomId.toString()); FSDataOutputStream os = fs.create(new Path(path, "cutpoints")); br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); byte[][] cutPoints = t._2; for (byte[] cutPoint : cutPoints) { br.write(Bytes.toStringBinary(cutPoint) + "\n"); } br.close(); } } catch (IOException e) { throw StandardException.plainWrapException(e); } finally { try { if (br != null) br.close(); } catch (IOException e) { throw StandardException.plainWrapException(e); } } }
From source file:com.splout.db.dnode.Fetcher.java
License:Open Source License
private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); }// www .j a v a 2 s .com toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetching() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); Fetcher fetcher = new Fetcher(testConfig); Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write("This is what happens when you don't know what to write".getBytes()); oS.close();//from www . j a v a 2s . c o m File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); assertTrue(f.exists()); assertTrue(f.isDirectory()); File file = new File(f, "tmp-" + TestFetcher.class.getName()); assertTrue(file.exists()); assertEquals("This is what happens when you don't know what to write", Files.toString(file, Charset.defaultCharset())); fS.delete(path, true); FileUtils.deleteDirectory(f); }
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetchingInterrupted() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); final FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); final Fetcher fetcher = new Fetcher(testConfig); final Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write("This is what happens when you don't know what to write".getBytes()); oS.close();//from w w w .ja v a 2s .c om Thread t = new Thread() { @Override public void run() { try { try { File f = fetcher .fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); } catch (IOException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } fail("An InterruptedException was expected."); } catch (InterruptedException e) { // Everything good. } } }; // We interrupt the thread before starting so we are sure that the interruption check // will be seen even if the file to copy is very small. t.interrupt(); t.start(); }
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetchingAndThrottling() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); testConfig.setProperty(FetcherProperties.DOWNLOAD_BUFFER, 4); testConfig.setProperty(FetcherProperties.BYTES_PER_SEC_THROTTLE, 8); Fetcher fetcher = new Fetcher(testConfig); final String str = "This is what happens when you don't know what to write"; Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write(str.getBytes());/*from ww w . ja v a 2 s. c om*/ oS.close(); long startTime = System.currentTimeMillis(); File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); long endTime = System.currentTimeMillis(); double bytesPerSec = (str.getBytes().length / (double) (endTime - startTime)) * 1000; assertEquals(8, bytesPerSec, 0.5); assertTrue(f.exists()); assertTrue(f.isDirectory()); File file = new File(f, "tmp-" + TestFetcher.class.getName()); assertTrue(file.exists()); assertEquals(str, Files.toString(file, Charset.defaultCharset())); fS.delete(path, true); FileUtils.deleteDirectory(f); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java
License:Apache License
boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) { boolean ok;/*from w ww. j a v a 2 s. c om*/ if (!dirPathTemplate.startsWith("/")) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40)); ok = false; } else { int firstEL = dirPathTemplate.indexOf("$"); if (firstEL > -1) { int lastDir = dirPathTemplate.lastIndexOf("/", firstEL); dirPathTemplate = dirPathTemplate.substring(0, lastDir); } dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate; try { Path dir = new Path(dirPathTemplate); FileSystem fs = getFileSystemForInitDestroy(); if (!fs.exists(dir)) { try { if (fs.mkdirs(dir)) { ok = true; } else { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_41)); ok = false; } } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_42, ex.toString())); ok = false; } } else { try { Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString()); fs.create(dummy).close(); fs.delete(dummy, false); ok = true; } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_43, ex.toString())); ok = false; } } } catch (Exception ex) { LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44, ex.toString())); ok = false; } } return ok; }
From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java
License:Apache License
private boolean validateHadoopDir(final Stage.Context context, final String configName, final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) { final AtomicBoolean ok = new AtomicBoolean(true); if (!dirPathTemplate.startsWith("/")) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40)); ok.set(false);// w w w . j a v a 2 s . co m } else { dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate; try { final Path dir = new Path(dirPathTemplate); final FileSystem fs = getFileSystemForInitDestroy(); getUGI().doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { if (!fs.exists(dir)) { try { if (fs.mkdirs(dir)) { ok.set(true); } else { issues.add( context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41)); ok.set(false); } } catch (IOException ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42, ex.toString())); ok.set(false); } } else { try { Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString()); fs.create(dummy).close(); fs.delete(dummy, false); ok.set(true); } catch (IOException ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43, ex.toString())); ok.set(false); } } return null; } }); } catch (Exception ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString())); ok.set(false); } } return ok.get(); }
From source file:com.streamsets.pipeline.stage.lib.hive.HiveMetastoreUtil.java
License:Apache License
/** * Returns the hdfs paths where the avro schema is stored after serializing. * Path is appended with current time so as to have an ordering. * @param rootTableLocation Root Table Location * @return Hdfs Path String.// ww w .j a va2 s .c om */ public static String serializeSchemaToHDFS(UserGroupInformation loginUGI, final FileSystem fs, final String rootTableLocation, final String schemaJson) throws StageException { final String folderPath = rootTableLocation + HiveMetastoreUtil.SEP + HiveMetastoreUtil.HDFS_SCHEMA_FOLDER_NAME; final Path schemasFolderPath = new Path(folderPath); final String path = folderPath + SEP + HiveMetastoreUtil.AVRO_SCHEMA + DateFormatUtils.format(new Date(System.currentTimeMillis()), "yyyy-MM-dd--HH_mm_ss"); try { loginUGI.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { if (!fs.exists(schemasFolderPath)) { fs.mkdirs(schemasFolderPath); } Path schemaFilePath = new Path(path); //This will never happen unless two HMS targets are writing, we will error out for this //and let user handle this via error record handling. if (!fs.exists(schemaFilePath)) { try (FSDataOutputStream os = fs.create(schemaFilePath)) { os.writeChars(schemaJson); } } else { LOG.error(Utils.format("Already schema file {} exists in HDFS", path)); throw new IOException("Already schema file exists"); } return null; } }); } catch (Exception e) { LOG.error("Error in Writing Schema to HDFS: " + e.toString(), e); throw new StageException(Errors.HIVE_18, path, e.getMessage()); } return path; }