List of usage examples for org.apache.hadoop.fs FileSystem createNewFile
public boolean createNewFile(Path f) throws IOException
From source file:org.apache.oozie.action.hadoop.TestFsActionExecutor.java
License:Apache License
public void testSubmitWithNameNode() throws Exception { FsActionExecutor ae = new FsActionExecutor(); FileSystem fs = getFileSystem(); Path mkdir = new Path(getFsTestCaseDir(), "mkdir"); Path mkdirX = new Path(mkdir.toUri().getPath()); Path delete = new Path(getFsTestCaseDir(), "delete"); Path deleteX = new Path(delete.toUri().getPath()); fs.mkdirs(delete);/*from w w w .ja va 2 s . co m*/ Path source = new Path(getFsTestCaseDir(), "source"); Path sourceX = new Path(source.toUri().getPath()); fs.mkdirs(source); Path target = new Path(new Path(getFsTestCaseDir(), "target").toUri().getPath()); Path chmod1 = new Path(getFsTestCaseDir(), "chmod1"); Path chmod1X = new Path(chmod1.toUri().getPath()); fs.mkdirs(chmod1); Path child1 = new Path(chmod1, "child1"); fs.mkdirs(child1); Path chmod2 = new Path(getFsTestCaseDir(), "chmod2"); Path chmod2X = new Path(chmod2.toUri().getPath()); fs.mkdirs(chmod2); Path child2 = new Path(chmod2, "child2"); fs.mkdirs(child2); Path newFile1 = new Path(mkdir + "newFile1"); Path newFile1X = new Path(newFile1.toUri().getPath()); Path newFile2 = new Path(mkdir + "newFile2"); Path newFile2X = new Path(newFile2.toUri().getPath()); fs.createNewFile(newFile1); String actionXml = MessageFormat.format( "<fs><name-node>{0}</name-node>" + "<mkdir path=''{1}''/>" + "<delete path=''{2}''/>" + "<move source=''{3}'' target=''{4}''/>" + "<chmod path=''{5}'' permissions=''-rwxrwxrwx''/>" + "<chmod path=''{6}'' permissions=''-rwxrwx---'' dir-files=''false''/>" + "<touchz path=''{7}''/>" + "<touchz path=''{8}''/>" + "</fs>", getNameNodeUri(), mkdirX, deleteX, sourceX, target, chmod1X, chmod2X, newFile1X, newFile2X); Context context = createContext(actionXml); WorkflowAction action = context.getAction(); assertFalse(fs.exists(ae.getRecoveryPath(context))); ae.start(context, action); assertTrue(fs.exists(ae.getRecoveryPath(context))); ae.check(context, context.getAction()); assertEquals("OK", context.getAction().getExternalStatus()); assertNull(context.getAction().getData()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertFalse(fs.exists(ae.getRecoveryPath(context))); assertTrue(fs.exists(mkdir)); assertFalse(fs.exists(delete)); assertFalse(fs.exists(source)); assertTrue(fs.exists(target)); assertTrue(fs.exists(newFile1)); assertTrue(fs.exists(newFile2)); assertEquals("rwxrwxrwx", fs.getFileStatus(chmod1).getPermission().toString()); assertNotSame("rwxrwxrwx", fs.getFileStatus(child1).getPermission().toString()); assertEquals("rwxrwx---", fs.getFileStatus(chmod2).getPermission().toString()); assertNotSame("rwxrwx---", fs.getFileStatus(child2).getPermission().toString()); }
From source file:org.apache.oozie.action.hadoop.TestFsActionExecutor.java
License:Apache License
private void createTestDirForChgrp(Path basePath, FileSystem fs) throws Exception { String testUser = getTestUser(); String testGroup = getTestGroup(); fs.mkdirs(basePath);/*from ww w . j av a2 s.c o m*/ fs.mkdirs(new Path(basePath, "10")); fs.mkdirs(new Path(basePath + "/10/dir1")); fs.createNewFile(new Path(basePath + "/10/dir1/file1")); fs.mkdirs(new Path(basePath + "/10/dir2")); fs.mkdirs(new Path(basePath, "11")); fs.mkdirs(new Path(basePath + "/11/dir3")); fs.mkdirs(new Path(basePath, "12")); fs.setOwner(new Path(basePath, "10"), testUser, testGroup); fs.setOwner(new Path(basePath + "/10/dir1"), testUser, testGroup); fs.setOwner(new Path(basePath + "/10/dir1/file1"), testUser, testGroup); fs.setOwner(new Path(basePath + "/10/dir2"), testUser, testGroup); fs.setOwner(new Path(basePath, "11"), testUser, testGroup); fs.setOwner(new Path(basePath + "/11/dir3"), testUser, testGroup); fs.setOwner(new Path(basePath, "12"), testUser, testGroup); }
From source file:org.apache.oozie.action.hadoop.TestPyspark.java
License:Apache License
/** * @param listLibFiles list of files to be created in workflow lib/ * directory/*from w w w .j a v a 2s. c om*/ * @return * @throws Exception */ protected WorkflowJobBean getWorkflow(ArrayList<String> listLibFiles) throws Exception { // add the example file as well listLibFiles.add(PI_EXAMPLE); String[] libPaths = new String[listLibFiles.size()]; FileSystem fs = getFileSystem(); for (int i = 0; i < listLibFiles.size(); i++) { libPaths[i] = new Path("lib/" + listLibFiles.get(i)).toString(); if (listLibFiles.get(i).equals(PY4J_ZIP) || listLibFiles.get(i).equals(PYSPARK_ZIP) || listLibFiles.get(i).equals(PI_EXAMPLE)) { IOUtils.copyStream(IOUtils.getResourceAsStream(listLibFiles.get(i), -1), fs.create(new Path(getAppPath(), "lib/" + listLibFiles.get(i)))); } else { fs.createNewFile(new Path(getAppPath(), "lib/" + listLibFiles.get(i))); } } XConfiguration protoConf = new XConfiguration(); protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser()); SharelibUtils.addToDistributedCache("spark", getFileSystem(), getFsTestCaseDir(), protoConf); WorkflowJobBean wf = createBaseWorkflow(protoConf, "spark-action"); String defaultProtoConf = wf.getProtoActionConf(); XConfiguration newProtoConf = new XConfiguration(new StringReader(defaultProtoConf)); newProtoConf.setStrings(WorkflowAppService.APP_LIB_PATH_LIST, libPaths); wf.setProtoActionConf(newProtoConf.toXmlString()); return wf; }
From source file:org.apache.pig.test.TestPigServer.java
License:Apache License
private static void createFakeJarFile(String location, String name, FileSystem fs) throws IOException { System.err.println("Location: " + location + " name: " + name); Path dir = new Path(location); fs.mkdirs(dir);/*from w w w . j ava2s . c o m*/ assertTrue(fs.createNewFile(new Path(dir, name))); }
From source file:org.apache.sysml.runtime.io.WriterTextCSV.java
License:Apache License
@SuppressWarnings("unchecked") public final void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path destFilePath = new Path(destFileName); FileSystem fs = IOUtilFunctions.getFileSystem(srcFilePath, conf); if (!_props.hasHeader()) { // simply move srcFile to destFile /*//from w w w . ja v a2s . c om * TODO: Remove this roundabout way! * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv * & the only path that exists already on HDFS is /user/biadmin/csv/. * In this case: the directory structure /user/biadmin/csv/temp/out must be created. * Simple hdfs.rename() does not seem to create this directory structure. */ // delete the destination file, if exists already fs.delete(destFilePath, true); // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created. fs.createNewFile(destFilePath); // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/ fs.delete(destFilePath, true); // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv fs.rename(srcFilePath, destFilePath); return; } // construct the header line StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(_props.getDelim()); } sb.append('\n'); if (fs.isDirectory(srcFilePath)) { // compute sorted order among part files ArrayList<Path> files = new ArrayList<>(); for (FileStatus stat : fs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); // first part file path Path firstpart = files.get(0); // create a temp file, and add header and contents of first part Path tmp = new Path(firstpart.toString() + ".tmp"); OutputStream out = fs.create(tmp, true); out.write(sb.toString().getBytes()); sb.setLength(0); // copy rest of the data from firstpart InputStream in = null; try { in = fs.open(firstpart); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } // rename tmp to firstpart fs.delete(firstpart, true); fs.rename(tmp, firstpart); // rename srcfile to destFile fs.delete(destFilePath, true); fs.createNewFile(destFilePath); // force the creation of directory structure fs.delete(destFilePath, true); // delete the file, but preserve the directory structure fs.rename(srcFilePath, destFilePath); // move the data } else if (fs.isFile(srcFilePath)) { // create destination file OutputStream out = fs.create(destFilePath, true); // write header out.write(sb.toString().getBytes()); sb.setLength(0); // copy the data from srcFile InputStream in = null; try { in = fs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:org.apache.tez.common.TestReflectionUtils.java
License:Apache License
@Test(timeout = 5000) public void testAddResourceToClasspath() throws IOException, TezException { String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); p = localFs.makeQualified(p);//from w ww . ja v a 2s . c o m localFs.delete(p, false); try { URL loadedUrl = null; loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNull(loadedUrl); // Add parent to classpath since we're not adding a jar assertTrue(localFs.createNewFile(p)); String urlForm = p.toUri().toURL().toString(); urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNotNull(loadedUrl); } finally { localFs.delete(p, false); } }
From source file:org.apache.tez.runtime.TestReflectionUtils.java
License:Apache License
@Test public void testAddResourceToClasspath() throws IOException, TezException { String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); p = localFs.makeQualified(p);/*w w w. ja v a2s .co m*/ localFs.delete(p, false); try { URL loadedUrl = null; loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNull(loadedUrl); // Add parent to classpath since we're not adding a jar assertTrue(localFs.createNewFile(p)); String urlForm = p.toUri().toURL().toString(); urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNotNull(loadedUrl); } finally { localFs.delete(p, false); } }
From source file:org.apache.tez.runtime.TestRuntimeUtils.java
License:Apache License
@Test public void testAddResourceToClasspath() throws IOException, TezException { String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); p = localFs.makeQualified(p);/* ww w . j a v a2s . c o m*/ localFs.delete(p, false); try { URL loadedUrl = null; loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNull(loadedUrl); // Add parent to classpath since we're not adding a jar assertTrue(localFs.createNewFile(p)); String urlForm = p.toUri().toURL().toString(); urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); RuntimeUtils.addResourcesToClasspath(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNotNull(loadedUrl); } finally { localFs.delete(p, false); } }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
License:Open Source License
private void purgeHDFSSegmentLogs(FileSystem hdfs, int listId, int segmentId) throws IOException { Path listLogDirectory = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory(), ((Integer) listId).toString()); Path segmentLogDirectory = new Path(listLogDirectory, ((Integer) segmentId).toString()); Path completionLogFilePath = new Path(segmentLogDirectory, CrawlEnvironment.buildCrawlSegmentCompletionLogFileName(getNodeName())); if (!hdfs.exists(completionLogFilePath)) { // create a zero length completion log file on hdfs ... hdfs.createNewFile(completionLogFilePath); }//from w ww .j ava 2 s . c o m // skip this step as history servers now manage segment logs /* * // and now ... delete all logs Path segmentLogWildcardPath = new * Path(segmentLogDirectory * ,CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString * (getNodeName())); FileStatus paths[] = * hdfs.globStatus(segmentLogWildcardPath); if (paths != null) { for * (FileStatus path : paths) { // hdfs.delete(path.getPath()); } } */ }
From source file:org.commoncrawl.service.crawlhistory.CrawlHistoryServer.java
License:Open Source License
private void startCheckpointThread(final FileSystem fs) { _checkpointThread = new Thread(new Runnable() { @Override//from w w w .j a v a2s.co m public void run() { // ok check point thread run in perpetuty while (!_shutdownFlag) { if (_lastCheckpointScanTime == -1 || _lastCheckpointFlushTime == -1 || (System.currentTimeMillis() - _lastCheckpointScanTime) >= CHECKPOINT_SCAN_INTERVAL || (System.currentTimeMillis() - _lastCheckpointFlushTime) >= CHECKPOINT_FLUSH_INTERVAL) { //LOG.info("Checkpoint Thread Grabbing Semaphore"); // grab checkpoint thread semaphore _checkpointThreadSemaphore.acquireUninterruptibly(); //LOG.info("Checkpoint Thread Grabbed Semaphore"); try { // create scan pattern Path hdfsScanPath = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/" + _state.getCurrentCrawlNumber() + "/*/" + CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString(getHostName())); // scan hdfs for log files FileStatus candidates[]; try { LOG.info("Checkpoint Thread Scanning For Cadnidates in:" + hdfsScanPath); candidates = fs.globStatus(hdfsScanPath); // iterate candidates for (FileStatus candidate : candidates) { // check candidate against processed path list ... if (!_processedPaths.contains(candidate.getPath())) { int urlCountBeforeProcessing = _urlsProcessedSinceCheckpoint.get(); // ok found a candidate we can work on LOG.info("Checkpoint Thread Found Candidate:" + candidate.getPath()); final URLFPV2 placeHolderFP = new URLFPV2(); CrawlSegmentLog.walkFingerprintsInLogFile(fs, candidate.getPath(), new CrawlSegmentLog.LogFileItemCallback() { @Override public void processItem(long domainHash, long urlFingerprint) { placeHolderFP.setDomainHash(domainHash); placeHolderFP.setUrlHash(urlFingerprint); // add item for bloom filter _bloomFilter.add(placeHolderFP); // inrement urls processed count ... _urlsProcessedSinceCheckpoint.addAndGet(1); } }); _processedPaths.add(candidate.getPath()); LOG.info("Finished Processing Candidate:" + candidate.getPath()); } } // update scan time ... _lastCheckpointScanTime = System.currentTimeMillis(); // see if can do a full checkpoint ... if (_lastCheckpointFlushTime == -1 || System.currentTimeMillis() - _lastCheckpointFlushTime >= CHECKPOINT_FLUSH_INTERVAL) { int approximateItemsToFlush = _urlsProcessedSinceCheckpoint.get(); // ok at this point we are read to initialize a checkpoint if (approximateItemsToFlush != 0) { Path checkpointMutexPath = getCheckpointMutexPath(); if (fs.createNewFile(checkpointMutexPath)) { try { LOG.info("Checkpoint Thread Starting Checkpoint"); // get the checkpoint path ... Path checkpointPath = getDataFileCheckpointPath(); Path finalPath = getDataFileFinalPath(); LOG.info("Checkpoint Thread Writing BloomFilter Data"); // serialize the filter ... serializeBloomFilter(checkpointPath); LOG.info("Checkpoint Thread Deleting Old Checkpoint Data"); // ok now everything seems to have gone fine ... delete existing data file fs.delete(finalPath); LOG.info("Checkpoint Thread ReWriting New Checkpoint Data"); // rename checkpoint to final ... fs.rename(checkpointPath, finalPath); if (_state .getCurrentCheckpointState() != CrawlHistoryStatus.CheckpointState.TRANSITIONING) { LOG.info("Checkpoint Thread Deleting Processed Files"); // ok safely delete all processed files for (Path processedFilePath : _processedPaths) { fs.delete(processedFilePath); } _processedPaths.clear(); } else { LOG.info( "Skipping Processed Files Purge because we are in Transitioning State"); } _urlsProcessedSinceCheckpoint.addAndGet(-approximateItemsToFlush); } finally { LOG.info( "Checkpoint Thread Releasing Mutex:" + checkpointMutexPath); fs.delete(checkpointMutexPath, false); } } else { int delay = (int) (Math.random() * CHECKPOINT_MUTEX_ACQUISITON_DELAY); LOG.info("Checkpoint thread failed to acquire Mutex:" + checkpointMutexPath + " Waiting " + delay + "(MS) before retry"); try { Thread.sleep(delay); } catch (InterruptedException e) { } } } // update last checkpoint no time no matter what ... _lastCheckpointFlushTime = System.currentTimeMillis(); } } catch (IOException e) { LOG.error("Checkpoint Thread Bloom Filter Checkpoint Failed with Exception:" + CCStringUtils.stringifyException(e)); try { Thread.sleep(60000); } catch (InterruptedException e1) { } } } finally { LOG.info("Checkpoint Thread Releasing Checkpoint Semaphore"); _checkpointThreadSemaphore.release(); } } else { try { //LOG.info("Checkpoint Thread IDLE"); Thread.sleep(100); } catch (InterruptedException e) { } } } } }); _checkpointThread.start(); }