Example usage for org.apache.hadoop.fs FileSystem createNewFile

List of usage examples for org.apache.hadoop.fs FileSystem createNewFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem createNewFile.

Prototype

public boolean createNewFile(Path f) throws IOException 

Source Link

Document

Creates the given Path as a brand-new zero-length file.

Usage

From source file:org.apache.oozie.action.hadoop.TestFsActionExecutor.java

License:Apache License

public void testSubmitWithNameNode() throws Exception {
    FsActionExecutor ae = new FsActionExecutor();
    FileSystem fs = getFileSystem();

    Path mkdir = new Path(getFsTestCaseDir(), "mkdir");
    Path mkdirX = new Path(mkdir.toUri().getPath());
    Path delete = new Path(getFsTestCaseDir(), "delete");
    Path deleteX = new Path(delete.toUri().getPath());
    fs.mkdirs(delete);/*from  w  w  w  .ja  va 2 s . co  m*/
    Path source = new Path(getFsTestCaseDir(), "source");
    Path sourceX = new Path(source.toUri().getPath());
    fs.mkdirs(source);
    Path target = new Path(new Path(getFsTestCaseDir(), "target").toUri().getPath());
    Path chmod1 = new Path(getFsTestCaseDir(), "chmod1");
    Path chmod1X = new Path(chmod1.toUri().getPath());
    fs.mkdirs(chmod1);
    Path child1 = new Path(chmod1, "child1");
    fs.mkdirs(child1);
    Path chmod2 = new Path(getFsTestCaseDir(), "chmod2");
    Path chmod2X = new Path(chmod2.toUri().getPath());
    fs.mkdirs(chmod2);
    Path child2 = new Path(chmod2, "child2");
    fs.mkdirs(child2);
    Path newFile1 = new Path(mkdir + "newFile1");
    Path newFile1X = new Path(newFile1.toUri().getPath());
    Path newFile2 = new Path(mkdir + "newFile2");
    Path newFile2X = new Path(newFile2.toUri().getPath());
    fs.createNewFile(newFile1);

    String actionXml = MessageFormat.format(
            "<fs><name-node>{0}</name-node>" + "<mkdir path=''{1}''/>" + "<delete path=''{2}''/>"
                    + "<move source=''{3}'' target=''{4}''/>"
                    + "<chmod path=''{5}'' permissions=''-rwxrwxrwx''/>"
                    + "<chmod path=''{6}'' permissions=''-rwxrwx---'' dir-files=''false''/>"
                    + "<touchz path=''{7}''/>" + "<touchz path=''{8}''/>" + "</fs>",
            getNameNodeUri(), mkdirX, deleteX, sourceX, target, chmod1X, chmod2X, newFile1X, newFile2X);

    Context context = createContext(actionXml);
    WorkflowAction action = context.getAction();

    assertFalse(fs.exists(ae.getRecoveryPath(context)));

    ae.start(context, action);

    assertTrue(fs.exists(ae.getRecoveryPath(context)));

    ae.check(context, context.getAction());
    assertEquals("OK", context.getAction().getExternalStatus());
    assertNull(context.getAction().getData());
    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());

    assertFalse(fs.exists(ae.getRecoveryPath(context)));

    assertTrue(fs.exists(mkdir));
    assertFalse(fs.exists(delete));
    assertFalse(fs.exists(source));
    assertTrue(fs.exists(target));
    assertTrue(fs.exists(newFile1));
    assertTrue(fs.exists(newFile2));

    assertEquals("rwxrwxrwx", fs.getFileStatus(chmod1).getPermission().toString());
    assertNotSame("rwxrwxrwx", fs.getFileStatus(child1).getPermission().toString());
    assertEquals("rwxrwx---", fs.getFileStatus(chmod2).getPermission().toString());
    assertNotSame("rwxrwx---", fs.getFileStatus(child2).getPermission().toString());

}

From source file:org.apache.oozie.action.hadoop.TestFsActionExecutor.java

License:Apache License

private void createTestDirForChgrp(Path basePath, FileSystem fs) throws Exception {
    String testUser = getTestUser();
    String testGroup = getTestGroup();
    fs.mkdirs(basePath);/*from   ww  w . j av a2 s.c  o  m*/
    fs.mkdirs(new Path(basePath, "10"));
    fs.mkdirs(new Path(basePath + "/10/dir1"));
    fs.createNewFile(new Path(basePath + "/10/dir1/file1"));
    fs.mkdirs(new Path(basePath + "/10/dir2"));
    fs.mkdirs(new Path(basePath, "11"));
    fs.mkdirs(new Path(basePath + "/11/dir3"));
    fs.mkdirs(new Path(basePath, "12"));

    fs.setOwner(new Path(basePath, "10"), testUser, testGroup);
    fs.setOwner(new Path(basePath + "/10/dir1"), testUser, testGroup);
    fs.setOwner(new Path(basePath + "/10/dir1/file1"), testUser, testGroup);
    fs.setOwner(new Path(basePath + "/10/dir2"), testUser, testGroup);
    fs.setOwner(new Path(basePath, "11"), testUser, testGroup);
    fs.setOwner(new Path(basePath + "/11/dir3"), testUser, testGroup);
    fs.setOwner(new Path(basePath, "12"), testUser, testGroup);
}

From source file:org.apache.oozie.action.hadoop.TestPyspark.java

License:Apache License

/**
 * @param listLibFiles list of files to be created in workflow lib/
 *        directory/*from  w w w .j  a  v  a  2s. c  om*/
 * @return
 * @throws Exception
 */
protected WorkflowJobBean getWorkflow(ArrayList<String> listLibFiles) throws Exception {
    // add the example file as well
    listLibFiles.add(PI_EXAMPLE);
    String[] libPaths = new String[listLibFiles.size()];
    FileSystem fs = getFileSystem();
    for (int i = 0; i < listLibFiles.size(); i++) {
        libPaths[i] = new Path("lib/" + listLibFiles.get(i)).toString();
        if (listLibFiles.get(i).equals(PY4J_ZIP) || listLibFiles.get(i).equals(PYSPARK_ZIP)
                || listLibFiles.get(i).equals(PI_EXAMPLE)) {
            IOUtils.copyStream(IOUtils.getResourceAsStream(listLibFiles.get(i), -1),
                    fs.create(new Path(getAppPath(), "lib/" + listLibFiles.get(i))));
        } else {
            fs.createNewFile(new Path(getAppPath(), "lib/" + listLibFiles.get(i)));
        }
    }
    XConfiguration protoConf = new XConfiguration();
    protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser());
    SharelibUtils.addToDistributedCache("spark", getFileSystem(), getFsTestCaseDir(), protoConf);
    WorkflowJobBean wf = createBaseWorkflow(protoConf, "spark-action");
    String defaultProtoConf = wf.getProtoActionConf();
    XConfiguration newProtoConf = new XConfiguration(new StringReader(defaultProtoConf));
    newProtoConf.setStrings(WorkflowAppService.APP_LIB_PATH_LIST, libPaths);
    wf.setProtoActionConf(newProtoConf.toXmlString());
    return wf;
}

From source file:org.apache.pig.test.TestPigServer.java

License:Apache License

private static void createFakeJarFile(String location, String name, FileSystem fs) throws IOException {
    System.err.println("Location: " + location + " name: " + name);
    Path dir = new Path(location);
    fs.mkdirs(dir);/*from  w  w w  .  j  ava2s  . c  o  m*/

    assertTrue(fs.createNewFile(new Path(dir, name)));
}

From source file:org.apache.sysml.runtime.io.WriterTextCSV.java

License:Apache License

@SuppressWarnings("unchecked")
public final void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem fs = IOUtilFunctions.getFileSystem(srcFilePath, conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*//from   w w  w  . ja v a2s  . c  om
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        fs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        fs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        fs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        fs.rename(srcFilePath, destFilePath);

        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (fs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<>();
        for (FileStatus stat : fs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = fs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = fs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        fs.delete(firstpart, true);
        fs.rename(tmp, firstpart);

        // rename srcfile to destFile
        fs.delete(destFilePath, true);
        fs.createNewFile(destFilePath); // force the creation of directory structure
        fs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        fs.rename(srcFilePath, destFilePath); // move the data 

    } else if (fs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = fs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = fs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:org.apache.tez.common.TestReflectionUtils.java

License:Apache License

@Test(timeout = 5000)
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);//from w  ww . ja v  a 2s  . c  o  m

    localFs.delete(p, false);

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.apache.tez.runtime.TestReflectionUtils.java

License:Apache License

@Test
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);/*w  w w.  ja  v  a2s  .co m*/

    localFs.delete(p, false);

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.apache.tez.runtime.TestRuntimeUtils.java

License:Apache License

@Test
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);/* ww  w  .  j  a v  a2s  . c o m*/

    localFs.delete(p, false);

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        RuntimeUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

License:Open Source License

private void purgeHDFSSegmentLogs(FileSystem hdfs, int listId, int segmentId) throws IOException {

    Path listLogDirectory = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory(),
            ((Integer) listId).toString());
    Path segmentLogDirectory = new Path(listLogDirectory, ((Integer) segmentId).toString());
    Path completionLogFilePath = new Path(segmentLogDirectory,
            CrawlEnvironment.buildCrawlSegmentCompletionLogFileName(getNodeName()));

    if (!hdfs.exists(completionLogFilePath)) {
        // create a zero length completion log file on hdfs ...
        hdfs.createNewFile(completionLogFilePath);
    }//from  w  ww .j  ava 2  s .  c  o  m

    // skip this step as history servers now manage segment logs
    /*
     * // and now ... delete all logs Path segmentLogWildcardPath = new
     * Path(segmentLogDirectory
     * ,CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString
     * (getNodeName())); FileStatus paths[] =
     * hdfs.globStatus(segmentLogWildcardPath); if (paths != null) { for
     * (FileStatus path : paths) { // hdfs.delete(path.getPath()); } }
     */
}

From source file:org.commoncrawl.service.crawlhistory.CrawlHistoryServer.java

License:Open Source License

private void startCheckpointThread(final FileSystem fs) {

    _checkpointThread = new Thread(new Runnable() {

        @Override//from   w  w w .j a v a2s.co  m
        public void run() {

            // ok check point thread run in perpetuty
            while (!_shutdownFlag) {

                if (_lastCheckpointScanTime == -1 || _lastCheckpointFlushTime == -1
                        || (System.currentTimeMillis() - _lastCheckpointScanTime) >= CHECKPOINT_SCAN_INTERVAL
                        || (System.currentTimeMillis()
                                - _lastCheckpointFlushTime) >= CHECKPOINT_FLUSH_INTERVAL) {

                    //LOG.info("Checkpoint Thread Grabbing Semaphore");
                    // grab checkpoint thread semaphore 
                    _checkpointThreadSemaphore.acquireUninterruptibly();
                    //LOG.info("Checkpoint Thread Grabbed Semaphore");

                    try {
                        // create scan pattern 
                        Path hdfsScanPath = new Path(CrawlEnvironment.getCrawlSegmentDataDirectory() + "/"
                                + _state.getCurrentCrawlNumber() + "/*/"
                                + CrawlEnvironment.buildCrawlSegmentLogCheckpointWildcardString(getHostName()));

                        // scan hdfs for log files
                        FileStatus candidates[];
                        try {
                            LOG.info("Checkpoint Thread Scanning For Cadnidates in:" + hdfsScanPath);
                            candidates = fs.globStatus(hdfsScanPath);

                            // iterate candidates 
                            for (FileStatus candidate : candidates) {

                                // check candidate against processed path list ... 
                                if (!_processedPaths.contains(candidate.getPath())) {
                                    int urlCountBeforeProcessing = _urlsProcessedSinceCheckpoint.get();
                                    // ok found a candidate we can work on 
                                    LOG.info("Checkpoint Thread Found Candidate:" + candidate.getPath());
                                    final URLFPV2 placeHolderFP = new URLFPV2();
                                    CrawlSegmentLog.walkFingerprintsInLogFile(fs, candidate.getPath(),
                                            new CrawlSegmentLog.LogFileItemCallback() {

                                                @Override
                                                public void processItem(long domainHash, long urlFingerprint) {
                                                    placeHolderFP.setDomainHash(domainHash);
                                                    placeHolderFP.setUrlHash(urlFingerprint);
                                                    // add item for bloom filter 
                                                    _bloomFilter.add(placeHolderFP);
                                                    // inrement urls processed count ...
                                                    _urlsProcessedSinceCheckpoint.addAndGet(1);
                                                }
                                            });
                                    _processedPaths.add(candidate.getPath());
                                    LOG.info("Finished Processing Candidate:" + candidate.getPath());
                                }
                            }

                            // update scan time ... 
                            _lastCheckpointScanTime = System.currentTimeMillis();

                            // see if can do a full checkpoint ... 
                            if (_lastCheckpointFlushTime == -1 || System.currentTimeMillis()
                                    - _lastCheckpointFlushTime >= CHECKPOINT_FLUSH_INTERVAL) {

                                int approximateItemsToFlush = _urlsProcessedSinceCheckpoint.get();
                                // ok at this point we are read to initialize a checkpoint 
                                if (approximateItemsToFlush != 0) {

                                    Path checkpointMutexPath = getCheckpointMutexPath();

                                    if (fs.createNewFile(checkpointMutexPath)) {
                                        try {
                                            LOG.info("Checkpoint Thread Starting Checkpoint");

                                            // get the checkpoint path ... 
                                            Path checkpointPath = getDataFileCheckpointPath();
                                            Path finalPath = getDataFileFinalPath();

                                            LOG.info("Checkpoint Thread Writing BloomFilter Data");
                                            // serialize the filter ... 
                                            serializeBloomFilter(checkpointPath);

                                            LOG.info("Checkpoint Thread Deleting Old Checkpoint Data");
                                            // ok now everything seems to have gone fine ... delete existing data file 
                                            fs.delete(finalPath);
                                            LOG.info("Checkpoint Thread ReWriting New Checkpoint Data");
                                            // rename checkpoint to final ... 
                                            fs.rename(checkpointPath, finalPath);

                                            if (_state
                                                    .getCurrentCheckpointState() != CrawlHistoryStatus.CheckpointState.TRANSITIONING) {
                                                LOG.info("Checkpoint Thread Deleting Processed Files");
                                                // ok safely delete all processed files
                                                for (Path processedFilePath : _processedPaths) {
                                                    fs.delete(processedFilePath);
                                                }
                                                _processedPaths.clear();
                                            } else {
                                                LOG.info(
                                                        "Skipping Processed Files Purge because we are in Transitioning State");
                                            }
                                            _urlsProcessedSinceCheckpoint.addAndGet(-approximateItemsToFlush);
                                        } finally {
                                            LOG.info(
                                                    "Checkpoint Thread Releasing Mutex:" + checkpointMutexPath);
                                            fs.delete(checkpointMutexPath, false);
                                        }
                                    } else {
                                        int delay = (int) (Math.random() * CHECKPOINT_MUTEX_ACQUISITON_DELAY);
                                        LOG.info("Checkpoint thread failed to acquire Mutex:"
                                                + checkpointMutexPath + " Waiting " + delay
                                                + "(MS) before retry");
                                        try {
                                            Thread.sleep(delay);
                                        } catch (InterruptedException e) {
                                        }
                                    }
                                }
                                // update last checkpoint no time no matter what ...
                                _lastCheckpointFlushTime = System.currentTimeMillis();
                            }

                        } catch (IOException e) {
                            LOG.error("Checkpoint Thread Bloom Filter Checkpoint Failed with Exception:"
                                    + CCStringUtils.stringifyException(e));
                            try {
                                Thread.sleep(60000);
                            } catch (InterruptedException e1) {
                            }
                        }
                    } finally {
                        LOG.info("Checkpoint Thread Releasing Checkpoint Semaphore");
                        _checkpointThreadSemaphore.release();
                    }
                } else {
                    try {
                        //LOG.info("Checkpoint Thread IDLE");
                        Thread.sleep(100);
                    } catch (InterruptedException e) {
                    }
                }
            }

        }

    });
    _checkpointThread.start();
}