Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.skp.experiment.cf.als.hadoop.SolveImplicitFeedbackMultithreadedMapper.java

License:Apache License

/** create file lock per each datanode to prevent too many map task simultaneously 
 *  runs on same datanode *//*  w ww.jav a 2s.co m*/
private void checkLock(Context ctx, int lockNums) throws InterruptedException, IOException {
    InetAddress thisIp = InetAddress.getLocalHost();
    String hostIp = thisIp.getHostAddress();

    // busy wait
    Configuration conf = ctx.getConfiguration();
    long totalSleep = 0;
    boolean haveLock = false;
    FileSystem fs = FileSystem.get(conf);
    while (haveLock == false) {
        for (int i = 0; i < lockNums; i++) {
            Path checkPath = new Path(lockPath, hostIp + "_" + i);
            if (fs.exists(checkPath) == false) {
                haveLock = true;
                currentLockPath = checkPath;
                BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(currentLockPath)));
                br.write(ctx.getTaskAttemptID().toString());
                break;
            }
        }
        if (haveLock == false) {
            Random random = new Random();
            int diff = 1000 + random.nextInt(1000) % 1000;
            totalSleep += diff + sleepPeriod;
            ctx.setStatus("sleeping: " + String.valueOf(totalSleep));
            Thread.sleep(sleepPeriod + diff);
        }
    }
}

From source file:com.splicemachine.derby.stream.function.RowKeyGenerator.java

License:Open Source License

/**
 * Sort keys and output in HBase escaped string format
 * @throws IOException/*  www  . ja v a2 s . c  om*/
 */
private void outputKeys() throws IOException {
    BufferedWriter br = null;
    try {
        Configuration conf = HConfiguration.unwrapDelegate();
        FileSystem fs = FileSystem.get(URI.create(bulkImportDirectory), conf);

        Collections.sort(keys, new Comparator<byte[]>() {
            @Override
            public int compare(byte[] o1, byte[] o2) {
                return Bytes.compareTo(o1, o2);
            }
        });
        long conglom = indexConglom == -1 ? heapConglom : indexConglom;
        Path path = new Path(bulkImportDirectory, new Long(conglom).toString());
        Path outFile = new Path(path, "keys");
        FSDataOutputStream os = fs.create(outFile);
        br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
        for (byte[] key : keys) {
            br.write(Bytes.toStringBinary(key) + "\n");
        }
        br.close();
        fileNames.add(outFile.toString());

    } finally {
        if (br != null)
            br.close();
    }
}

From source file:com.splicemachine.derby.stream.spark.SparkHBaseBulkImport.java

License:Open Source License

/**
 * Output cut points to files/*from   w w  w  .j  av a 2s. c om*/
 * @param cutPointsList
 * @throws IOException
 */
private void dumpCutPoints(List<Tuple2<Long, byte[][]>> cutPointsList) throws StandardException {

    BufferedWriter br = null;
    try {
        Configuration conf = HConfiguration.unwrapDelegate();
        FileSystem fs = FileSystem.get(URI.create(bulkImportDirectory), conf);

        for (Tuple2<Long, byte[][]> t : cutPointsList) {
            Long conglomId = t._1;

            Path path = new Path(bulkImportDirectory, conglomId.toString());
            FSDataOutputStream os = fs.create(new Path(path, "cutpoints"));
            br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));

            byte[][] cutPoints = t._2;

            for (byte[] cutPoint : cutPoints) {
                br.write(Bytes.toStringBinary(cutPoint) + "\n");
            }
            br.close();
        }
    } catch (IOException e) {
        throw StandardException.plainWrapException(e);
    } finally {
        try {
            if (br != null)
                br.close();
        } catch (IOException e) {
            throw StandardException.plainWrapException(e);
        }
    }
}

From source file:com.splout.db.dnode.Fetcher.java

License:Open Source License

private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
        FileUtils.deleteDirectory(toDir);
    }//  www  .j  a  v a 2 s .com
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
        for (FileStatus fStatus : fS.globStatus(fromPath)) {
            log.info("Copying " + fStatus.getPath() + " to " + toPath);
            long bytesSoFar = 0;

            FSDataInputStream iS = fS.open(fStatus.getPath());
            FSDataOutputStream oS = tofS.create(toPath);

            byte[] buffer = new byte[downloadBufferSize];

            int nRead;
            while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
                // Needed to being able to be interrupted at any moment.
                if (Thread.interrupted()) {
                    iS.close();
                    oS.close();
                    cleanDirNoExceptions(toDir);
                    throw new InterruptedException();
                }
                bytesSoFar += nRead;
                oS.write(buffer, 0, nRead);
                throttler.incrementAndThrottle(nRead);
                if (bytesSoFar >= bytesToReportProgress) {
                    reporter.progress(bytesSoFar);
                    bytesSoFar = 0l;
                }
            }

            if (reporter != null) {
                reporter.progress(bytesSoFar);
            }

            oS.close();
            iS.close();
        }

        return toDir;
    } catch (ClosedByInterruptException e) {
        // This can be thrown by the method read.
        cleanDirNoExceptions(toDir);
        throw new InterruptedIOException();
    }
}

From source file:com.splout.db.dnode.TestFetcher.java

License:Open Source License

@Test
public void testHdfsFetching() throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    FileSystem fS = FileSystem.getLocal(conf);

    SploutConfiguration testConfig = SploutConfiguration.getTestConfig();
    testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName());
    Fetcher fetcher = new Fetcher(testConfig);

    Path path = new Path("tmp-" + TestFetcher.class.getName());
    OutputStream oS = fS.create(path);
    oS.write("This is what happens when you don't know what to write".getBytes());
    oS.close();//from www .  j a v a 2s . c  o m

    File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString());

    assertTrue(f.exists());
    assertTrue(f.isDirectory());

    File file = new File(f, "tmp-" + TestFetcher.class.getName());
    assertTrue(file.exists());

    assertEquals("This is what happens when you don't know what to write",
            Files.toString(file, Charset.defaultCharset()));

    fS.delete(path, true);
    FileUtils.deleteDirectory(f);
}

From source file:com.splout.db.dnode.TestFetcher.java

License:Open Source License

@Test
public void testHdfsFetchingInterrupted() throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    final FileSystem fS = FileSystem.getLocal(conf);

    SploutConfiguration testConfig = SploutConfiguration.getTestConfig();
    testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName());
    final Fetcher fetcher = new Fetcher(testConfig);

    final Path path = new Path("tmp-" + TestFetcher.class.getName());
    OutputStream oS = fS.create(path);
    oS.write("This is what happens when you don't know what to write".getBytes());
    oS.close();//from   w  w  w .ja  v a  2s  .c  om

    Thread t = new Thread() {
        @Override
        public void run() {
            try {
                try {
                    File f = fetcher
                            .fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString());
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (URISyntaxException e) {
                    e.printStackTrace();
                }
                fail("An InterruptedException was expected.");
            } catch (InterruptedException e) {
                // Everything good.
            }
        }
    };
    // We interrupt the thread before starting so we are sure that the interruption check
    // will be seen even if the file to copy is very small.
    t.interrupt();
    t.start();
}

From source file:com.splout.db.dnode.TestFetcher.java

License:Open Source License

@Test
public void testHdfsFetchingAndThrottling() throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    FileSystem fS = FileSystem.getLocal(conf);

    SploutConfiguration testConfig = SploutConfiguration.getTestConfig();
    testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName());
    testConfig.setProperty(FetcherProperties.DOWNLOAD_BUFFER, 4);
    testConfig.setProperty(FetcherProperties.BYTES_PER_SEC_THROTTLE, 8);
    Fetcher fetcher = new Fetcher(testConfig);

    final String str = "This is what happens when you don't know what to write";

    Path path = new Path("tmp-" + TestFetcher.class.getName());
    OutputStream oS = fS.create(path);
    oS.write(str.getBytes());/*from   ww  w  .  ja  v  a  2  s. c  om*/
    oS.close();

    long startTime = System.currentTimeMillis();
    File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString());
    long endTime = System.currentTimeMillis();

    double bytesPerSec = (str.getBytes().length / (double) (endTime - startTime)) * 1000;
    assertEquals(8, bytesPerSec, 0.5);

    assertTrue(f.exists());
    assertTrue(f.isDirectory());

    File file = new File(f, "tmp-" + TestFetcher.class.getName());
    assertTrue(file.exists());

    assertEquals(str, Files.toString(file, Charset.defaultCharset()));

    fS.delete(path, true);
    FileUtils.deleteDirectory(f);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java

License:Apache License

boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) {
    boolean ok;/*from   w  ww.  j a v  a 2 s.  c om*/
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40));
        ok = false;
    } else {
        int firstEL = dirPathTemplate.indexOf("$");
        if (firstEL > -1) {
            int lastDir = dirPathTemplate.lastIndexOf("/", firstEL);
            dirPathTemplate = dirPathTemplate.substring(0, lastDir);
        }
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            Path dir = new Path(dirPathTemplate);
            FileSystem fs = getFileSystemForInitDestroy();
            if (!fs.exists(dir)) {
                try {
                    if (fs.mkdirs(dir)) {
                        ok = true;
                    } else {
                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                                Errors.HADOOPFS_41));
                        ok = false;
                    }
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_42, ex.toString()));
                    ok = false;
                }
            } else {
                try {
                    Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                    fs.create(dummy).close();
                    fs.delete(dummy, false);
                    ok = true;
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_43, ex.toString()));
                    ok = false;
                }
            }
        } catch (Exception ex) {
            LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex);
            issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44,
                    ex.toString()));
            ok = false;
        }
    }
    return ok;
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java

License:Apache License

private boolean validateHadoopDir(final Stage.Context context, final String configName,
        final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) {
    final AtomicBoolean ok = new AtomicBoolean(true);
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40));
        ok.set(false);// w  w  w  . j  a  v  a 2  s  .  co  m
    } else {
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            final Path dir = new Path(dirPathTemplate);
            final FileSystem fs = getFileSystemForInitDestroy();
            getUGI().doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    if (!fs.exists(dir)) {
                        try {
                            if (fs.mkdirs(dir)) {
                                ok.set(true);
                            } else {
                                issues.add(
                                        context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41));
                                ok.set(false);
                            }
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42,
                                    ex.toString()));
                            ok.set(false);
                        }
                    } else {
                        try {
                            Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                            fs.create(dummy).close();
                            fs.delete(dummy, false);
                            ok.set(true);
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43,
                                    ex.toString()));
                            ok.set(false);
                        }
                    }
                    return null;
                }
            });
        } catch (Exception ex) {
            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString()));
            ok.set(false);
        }
    }
    return ok.get();
}

From source file:com.streamsets.pipeline.stage.lib.hive.HiveMetastoreUtil.java

License:Apache License

/**
 * Returns the hdfs paths where the avro schema is stored after serializing.
 * Path is appended with current time so as to have an ordering.
 * @param rootTableLocation Root Table Location
 * @return Hdfs Path String.//  ww  w .j a va2  s .c om
 */
public static String serializeSchemaToHDFS(UserGroupInformation loginUGI, final FileSystem fs,
        final String rootTableLocation, final String schemaJson) throws StageException {
    final String folderPath = rootTableLocation + HiveMetastoreUtil.SEP
            + HiveMetastoreUtil.HDFS_SCHEMA_FOLDER_NAME;
    final Path schemasFolderPath = new Path(folderPath);
    final String path = folderPath + SEP + HiveMetastoreUtil.AVRO_SCHEMA
            + DateFormatUtils.format(new Date(System.currentTimeMillis()), "yyyy-MM-dd--HH_mm_ss");
    try {
        loginUGI.doAs(new PrivilegedExceptionAction<Void>() {
            @Override
            public Void run() throws Exception {
                if (!fs.exists(schemasFolderPath)) {
                    fs.mkdirs(schemasFolderPath);
                }
                Path schemaFilePath = new Path(path);
                //This will never happen unless two HMS targets are writing, we will error out for this
                //and let user handle this via error record handling.
                if (!fs.exists(schemaFilePath)) {
                    try (FSDataOutputStream os = fs.create(schemaFilePath)) {
                        os.writeChars(schemaJson);
                    }
                } else {
                    LOG.error(Utils.format("Already schema file {} exists in HDFS", path));
                    throw new IOException("Already schema file exists");
                }
                return null;
            }
        });
    } catch (Exception e) {
        LOG.error("Error in Writing Schema to HDFS: " + e.toString(), e);
        throw new StageException(Errors.HIVE_18, path, e.getMessage());
    }
    return path;
}