Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

public abstract boolean delete(Path f, boolean recursive) throws IOException;

Source Link

Document

Delete a file.

Usage

From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java

License:Apache License

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames,
        StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }//from  w w  w . java 2 s . co  m

    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }

    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));

    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings()
            .splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
            .map(hiveType -> hiveType.getType(typeManager)).collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);

        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path),
                            fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes,
                codecName, fileInputColumnIndexes,
                ImmutableMap.<String, String>builder()
                        .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString())
                        .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(),
                validationInputFactory));
    } catch (Exception e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}

From source file:com.fanlehai.hadoop.serialize.json.multiline.ExampleJob.java

License:Apache License

/**
 * Writes the contents of {@link #JSON} into a file in the job input
 * directory in HDFS./*from w  ww .j  a v a 2s  .co m*/
 *
 * @param conf
 *            the Hadoop config
 * @param inputDir
 *            the HDFS input directory where we'll write a file
 * @throws IOException
 *             if something goes wrong
 */
public static void writeInput(Configuration conf, Path inputDir) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    if (fs.exists(inputDir)) {
        // throw new IOException(
        // String.format("Input directory '%s' exists - please remove and
        // rerun this example", inputDir));
        fs.delete(inputDir, true);
    }

    OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt")));
    writer.write(JSON);
    IOUtils.closeStream(writer);
}

From source file:com.finderbots.miner.MinerTool.java

License:Apache License

static void setupWorkingDir(FileSystem fs, Path workingDirPath, String seedUrlsfileName) throws Exception {

    // Check if we already have a crawldb
    Path crawlDbPath = null;/*from   ww w  .ja v a2s.  c  o  m*/
    Path loopDirPath = CrawlDirUtils.findLatestLoopDir(fs, workingDirPath);
    if (loopDirPath != null) {//todo: depending on -overwritecrawl erase mined data OR everything. For now everything
        // Clear out any previous loop directory, so we're always starting from scratch
        LOGGER.info("deleting existing working dir");
        while (loopDirPath != null) {
            fs.delete(loopDirPath, true);
            loopDirPath = CrawlDirUtils.findLatestLoopDir(fs, workingDirPath);
        }
    }

    // Create a "0-<timestamp>" loop sub-directory and import the seed urls
    loopDirPath = CrawlDirUtils.makeLoopDir(fs, workingDirPath, 0);
    crawlDbPath = new Path(loopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
    //MinerWorkflow.importSeedUrls(crawlDbPath, seedUrlsfileName);
    importUrls(seedUrlsfileName, crawlDbPath);

}

From source file:com.finderbots.miner2.pinterest.PinterestCrawlAndMinerTool.java

License:Apache License

public static void main(String[] args) {
    Options options = new Options();
    CmdLineParser parser = new CmdLineParser(options);

    try {//from ww w .  j  a v  a 2 s . c  om
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        printUsageAndExit(parser);
    }

    // Before we get too far along, see if the domain looks valid.
    String domain = options.getDomain();
    String urlsFile = options.getUrlsFile();
    if (domain != null) {
        validateDomain(domain, parser);
    } else {
        if (urlsFile == null) {
            System.err.println(
                    "Either a target domain should be specified or a file with a list of urls needs to be provided");
            printUsageAndExit(parser);
        }
    }

    if (domain != null && urlsFile != null) {
        System.out.println("Warning: Both domain and urls file list provided - using domain");
    }

    String outputDirName = options.getOutputDir();
    if (options.isDebugLogging()) {
        System.setProperty("bixo.root.level", "DEBUG");
    } else {
        System.setProperty("bixo.root.level", "INFO");
    }

    if (options.getLoggingAppender() != null) {
        // Set console vs. DRFA vs. something else
        System.setProperty("bixo.appender", options.getLoggingAppender());
    }

    String logsDir = options.getLogsDir();
    if (!logsDir.endsWith("/")) {
        logsDir = logsDir + "/";
    }

    try {
        JobConf conf = new JobConf();
        Path outputPath = new Path(outputDirName);
        FileSystem fs = outputPath.getFileSystem(conf);

        // First check if the user wants to clean
        if (options.isCleanOutputDir()) {
            if (fs.exists(outputPath)) {
                fs.delete(outputPath, true);
            }
        }

        // See if the user isn't starting from scratch then set up the
        // output directory and create an initial urls subdir.
        if (!fs.exists(outputPath)) {
            fs.mkdirs(outputPath);

            // Create a "0-<timestamp>" sub-directory with just a /crawldb subdir
            // In the /crawldb dir the input file will have a single URL for the target domain.

            Path curLoopDir = CrawlDirUtils.makeLoopDir(fs, outputPath, 0);
            String curLoopDirName = curLoopDir.getName();
            setLoopLoggerFile(logsDir + curLoopDirName, 0);

            Path crawlDbPath = new Path(curLoopDir, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            if (domain != null) {
                importOneDomain(domain, crawlDbPath, conf);
            } else {
                importUrls(urlsFile, crawlDbPath);
            }
        }

        Path latestDirPath = CrawlDirUtils.findLatestLoopDir(fs, outputPath);

        if (latestDirPath == null) {
            System.err.println("No previous cycle output dirs exist in " + outputDirName);
            printUsageAndExit(parser);
        }

        Path crawlDbPath = new Path(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

        // Set up the start and end loop counts.
        int startLoop = CrawlDirUtils.extractLoopNumber(latestDirPath);
        int endLoop = startLoop + options.getNumLoops();

        // Set up the UserAgent for the fetcher.
        UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS,
                CrawlConfig.WEB_ADDRESS);

        // You also get to customize the FetcherPolicy
        FetcherPolicy defaultPolicy;
        if (options.getCrawlDuration() != 0) {
            defaultPolicy = new AdaptiveFetcherPolicy(options.getEndCrawlTime(), options.getCrawlDelay());
        } else {
            defaultPolicy = new FetcherPolicy();
        }
        defaultPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE);
        defaultPolicy.setRequestTimeout(10L * 1000L);//10 seconds

        // COMPLETE for crawling a single site, EFFICIENT for many sites
        if (options.getCrawlPolicy().equals(Options.IMPOLITE_CRAWL_POLICY)) {
            defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.IMPOLITE);
        } else if (options.getCrawlPolicy().equals(Options.EFFICIENT_CRAWL_POLICY)) {
            defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.EFFICIENT);
        } else if (options.getCrawlPolicy().equals(Options.COMPLETE_CRAWL_POLICY)) {
            defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.COMPLETE);
        }

        // It is a good idea to set up a crawl duration when running long crawls as you may
        // end up in situations where the fetch slows down due to a 'long tail' and by
        // specifying a crawl duration you know exactly when the crawl will end.
        int crawlDurationInMinutes = options.getCrawlDuration();
        boolean hasEndTime = crawlDurationInMinutes != Options.NO_CRAWL_DURATION;
        long targetEndTime = hasEndTime
                ? System.currentTimeMillis() + (crawlDurationInMinutes * CrawlConfig.MILLISECONDS_PER_MINUTE)
                : FetcherPolicy.NO_CRAWL_END_TIME;

        // By setting up a url filter we only deal with urls that we want to
        // instead of all the urls that we extract.
        BaseUrlFilter urlFilter = null;
        List<String> patterns = null;
        String regexUrlFiltersFile = options.getRegexUrlFiltersFile();
        if (regexUrlFiltersFile != null) {
            patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlFiltersFile);
        } else {
            patterns = RegexUrlDatumFilter.getDefaultUrlFilterPatterns();
            if (domain != null) {
                String domainPatterStr = "+(?i)^(http|https)://([a-z0-9]*\\.)*" + domain;
                patterns.add(domainPatterStr);
            } else {
                String protocolPatterStr = "+(?i)^(http|https)://*";
                patterns.add(protocolPatterStr);
                //Log.warn("Defaulting to basic url regex filtering (just suffix and protocol");
            }
        }
        urlFilter = new RegexUrlDatumFilter(patterns.toArray(new String[patterns.size()]));

        // get a list of patterns which tell the miner which URLs to include or exclude.
        patterns.clear();
        RegexUrlStringFilter urlsToMineFilter = null;
        String regexUrlsToMineFiltersFile = options.getRegexUrlToMineFile();
        AnalyzeHtml analyzer = null;
        if (regexUrlsToMineFiltersFile != null) {
            patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlsToMineFiltersFile);
            urlsToMineFilter = new RegexUrlStringFilter(patterns.toArray(new String[patterns.size()]));
            analyzer = new AnalyzeHtml(urlsToMineFilter);
        }

        // OK, now we're ready to start looping, since we've got our current
        // settings
        for (int curLoop = startLoop + 1; curLoop <= endLoop; curLoop++) {

            // Adjust target end time, if appropriate.
            if (hasEndTime) {
                int remainingLoops = (endLoop - curLoop) + 1;
                long now = System.currentTimeMillis();
                long perLoopTime = (targetEndTime - now) / remainingLoops;
                defaultPolicy.setCrawlEndTime(now + perLoopTime);
            }

            Path curLoopDirPath = CrawlDirUtils.makeLoopDir(fs, outputPath, curLoop);
            String curLoopDirName = curLoopDirPath.getName();
            setLoopLoggerFile(logsDir + curLoopDirName, curLoop);

            Flow flow = PinterestCrawlAndMinerWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy,
                    userAgent, urlFilter, analyzer, options);
            flow.complete();

            // Writing out .dot files is a good way to verify your flows.
            flow.writeDOT("valid-flow.dot");

            // Update crawlDbPath to point to the latest crawl db
            crawlDbPath = new Path(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
        }
    } catch (PlannerException e) {
        e.writeDOT("failed-flow.dot");
        System.err.println("PlannerException: " + e.getMessage());
        e.printStackTrace(System.err);
        System.exit(-1);
    } catch (Throwable t) {
        System.err.println("Exception running tool: " + t.getMessage());
        t.printStackTrace(System.err);
        System.exit(-1);
    }
}

From source file:com.finderbots.miner2.tomatoes.RTCriticsCrawlAndMinerTool.java

License:Apache License

public static void main(String[] args) {
    Options options = new Options();
    CmdLineParser parser = new CmdLineParser(options);

    try {// w w  w . jav a2 s  .  co  m
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        printUsageAndExit(parser);
    }

    // Before we get too far along, see if the domain looks valid.
    String domain = options.getDomain();
    String urlsFile = options.getUrlsFile();
    if (domain != null) {
        validateDomain(domain, parser);
    } else {
        if (urlsFile == null) {
            System.err.println(
                    "Either a target domain should be specified or a file with a list of urls needs to be provided");
            printUsageAndExit(parser);
        }
    }

    if (domain != null && urlsFile != null) {
        System.out.println("Warning: Both domain and urls file list provided - using domain");
    }

    String outputDirName = options.getOutputDir();
    if (options.isDebugLogging()) {
        System.setProperty("bixo.root.level", "DEBUG");
    } else {
        System.setProperty("bixo.root.level", "INFO");
    }

    if (options.getLoggingAppender() != null) {
        // Set console vs. DRFA vs. something else
        System.setProperty("bixo.appender", options.getLoggingAppender());
    }

    String logsDir = options.getLogsDir();
    if (!logsDir.endsWith("/")) {
        logsDir = logsDir + "/";
    }

    try {
        JobConf conf = new JobConf();
        Path outputPath = new Path(outputDirName);
        FileSystem fs = outputPath.getFileSystem(conf);

        // First check if the user wants to clean
        if (options.isCleanOutputDir()) {
            if (fs.exists(outputPath)) {
                fs.delete(outputPath, true);
            }
        }

        // See if the user isn't starting from scratch then set up the
        // output directory and create an initial urls subdir.
        if (!fs.exists(outputPath)) {
            fs.mkdirs(outputPath);

            // Create a "0-<timestamp>" sub-directory with just a /crawldb subdir
            // In the /crawldb dir the input file will have a single URL for the target domain.

            Path curLoopDir = CrawlDirUtils.makeLoopDir(fs, outputPath, 0);
            String curLoopDirName = curLoopDir.getName();
            setLoopLoggerFile(logsDir + curLoopDirName, 0);

            Path crawlDbPath = new Path(curLoopDir, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            if (domain != null) {
                importOneDomain(domain, crawlDbPath, conf);
            } else {
                importUrls(urlsFile, crawlDbPath);
            }
        }

        Path latestDirPath = CrawlDirUtils.findLatestLoopDir(fs, outputPath);

        if (latestDirPath == null) {
            System.err.println("No previous cycle output dirs exist in " + outputDirName);
            printUsageAndExit(parser);
        }

        Path crawlDbPath = new Path(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

        // Set up the start and end loop counts.
        int startLoop = CrawlDirUtils.extractLoopNumber(latestDirPath);
        int endLoop = startLoop + options.getNumLoops();

        // Set up the UserAgent for the fetcher.
        UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS,
                CrawlConfig.WEB_ADDRESS);

        // You also get to customize the FetcherPolicy
        FetcherPolicy defaultPolicy;
        if (options.getCrawlDuration() != 0) {
            defaultPolicy = new AdaptiveFetcherPolicy(options.getEndCrawlTime(), options.getCrawlDelay());
        } else {
            defaultPolicy = new FetcherPolicy();
        }
        defaultPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE);
        defaultPolicy.setRequestTimeout(10L * 1000L);//10 seconds

        // COMPLETE for crawling a single site, EFFICIENT for many sites
        if (options.getCrawlPolicy().equals(Options.IMPOLITE_CRAWL_POLICY)) {
            defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.IMPOLITE);
        } else if (options.getCrawlPolicy().equals(Options.EFFICIENT_CRAWL_POLICY)) {
            defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.EFFICIENT);
        } else if (options.getCrawlPolicy().equals(Options.COMPLETE_CRAWL_POLICY)) {
            defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.COMPLETE);
        }

        // It is a good idea to set up a crawl duration when running long crawls as you may
        // end up in situations where the fetch slows down due to a 'long tail' and by
        // specifying a crawl duration you know exactly when the crawl will end.
        int crawlDurationInMinutes = options.getCrawlDuration();
        boolean hasEndTime = crawlDurationInMinutes != Options.NO_CRAWL_DURATION;
        long targetEndTime = hasEndTime
                ? System.currentTimeMillis() + (crawlDurationInMinutes * CrawlConfig.MILLISECONDS_PER_MINUTE)
                : FetcherPolicy.NO_CRAWL_END_TIME;

        // By setting up a url filter we only deal with urls that we want to
        // instead of all the urls that we extract.
        BaseUrlFilter urlFilter = null;
        List<String> patterns = null;
        String regexUrlFiltersFile = options.getRegexUrlFiltersFile();
        if (regexUrlFiltersFile != null) {
            patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlFiltersFile);
        } else {
            patterns = RegexUrlDatumFilter.getDefaultUrlFilterPatterns();
            if (domain != null) {
                String domainPatterStr = "+(?i)^(http|https)://([a-z0-9]*\\.)*" + domain;
                patterns.add(domainPatterStr);
            } else {
                String protocolPatterStr = "+(?i)^(http|https)://*";
                patterns.add(protocolPatterStr);
                //Log.warn("Defaulting to basic url regex filtering (just suffix and protocol");
            }
        }
        urlFilter = new RegexUrlDatumFilter(patterns.toArray(new String[patterns.size()]));

        // get a list of patterns which tell the miner which URLs to include or exclude.
        patterns.clear();
        RegexUrlStringFilter urlsToMineFilter = null;
        String regexUrlsToMineFiltersFile = options.getRegexUrlToMineFile();
        MineRTCriticsPreferences prefsAnalyzer = null;
        if (regexUrlsToMineFiltersFile != null) {
            patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlsToMineFiltersFile);
            urlsToMineFilter = new RegexUrlStringFilter(patterns.toArray(new String[patterns.size()]));
            prefsAnalyzer = new MineRTCriticsPreferences(urlsToMineFilter);
        }

        // OK, now we're ready to start looping, since we've got our current
        // settings
        for (int curLoop = startLoop + 1; curLoop <= endLoop; curLoop++) {

            // Adjust target end time, if appropriate.
            if (hasEndTime) {
                int remainingLoops = (endLoop - curLoop) + 1;
                long now = System.currentTimeMillis();
                long perLoopTime = (targetEndTime - now) / remainingLoops;
                defaultPolicy.setCrawlEndTime(now + perLoopTime);
            }

            Path curLoopDirPath = CrawlDirUtils.makeLoopDir(fs, outputPath, curLoop);
            String curLoopDirName = curLoopDirPath.getName();
            setLoopLoggerFile(logsDir + curLoopDirName, curLoop);

            Flow flow = RTCriticsCrawlAndMinerWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy,
                    userAgent, urlFilter, prefsAnalyzer, options);
            flow.complete();

            // Writing out .dot files is a good way to verify your flows.
            flow.writeDOT("valid-flow.dot");

            // Update crawlDbPath to point to the latest crawl db
            crawlDbPath = new Path(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
        }
    } catch (PlannerException e) {
        e.writeDOT("failed-flow.dot");
        System.err.println("PlannerException: " + e.getMessage());
        e.printStackTrace(System.err);
        System.exit(-1);
    } catch (Throwable t) {
        System.err.println("Exception running tool: " + t.getMessage());
        t.printStackTrace(System.err);
        System.exit(-1);
    }
}

From source file:com.firewallid.util.FIFile.java

public static void deleteExistHDFSPath(String fullPath) throws IOException {
    Configuration hadoopConf = new Configuration();
    FileSystem fileSystem = FileSystem.get(hadoopConf);
    Path path = new Path(fullPath);

    if (fileSystem.exists(path)) {
        fileSystem.delete(path, true);
        LOG.info("Deleted " + fullPath);
    }/*from w w  w .  j  a  v  a 2s . c  o m*/
}

From source file:com.firewallid.util.FIFile.java

public static void writeStringToHDFSFile(String pathFile, String text) throws IOException {
    Configuration hadoopConf = new Configuration();
    FileSystem fileSystem = FileSystem.get(hadoopConf);
    Path path = new Path(pathFile);

    if (fileSystem.exists(path)) {
        fileSystem.delete(path, true);
    }//from   w w  w . j ava  2s  .co m

    try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fileSystem.create(path)))) {
        bw.write(text);
    }
    LOG.info("Created file: " + pathFile);
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java

License:Apache License

/**
 * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks
 * (splits) based on the configured chunk size.
 *
 * @param fileSystem Hadoop file system.
 * @param sstablePath SSTable Index.db./*from  ww w.  j ava 2  s .  c  o m*/
 * @throws IOException
 */
public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException {

    final Configuration configuration = fileSystem.getConf();

    final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB,
            HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024;

    final Closer closer = Closer.create();

    final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX);
    final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX);

    boolean success = false;
    try {
        final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath));

        final TLongArrayList splitOffsets = new TLongArrayList();
        long currentStart = 0;
        long currentEnd = 0;
        final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem);

        while (index.hasNext()) {
            // NOTE: This does not give an exact size of this split in bytes but a rough estimate.
            // This should be good enough since it's only used for sorting splits by size in hadoop land.
            while (currentEnd - currentStart < splitSize && index.hasNext()) {
                currentEnd = index.next();
                splitOffsets.add(currentEnd);
            }

            // Record the split
            final long[] offsets = splitOffsets.toArray();
            os.writeLong(offsets[0]); // Start
            os.writeLong(offsets[offsets.length - 1]); // End

            // Clear the offsets
            splitOffsets.clear();

            if (index.hasNext()) {
                currentStart = index.next();
                currentEnd = currentStart;
                splitOffsets.add(currentStart);
            }
        }

        success = true;
    } finally {
        closer.close();

        if (!success) {
            fileSystem.delete(inProgressOutputPath, false);
        } else {
            fileSystem.rename(inProgressOutputPath, outputPath);
        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

private void cleanupTmpFiles() throws IOException {
    if (oldTmpFiles == null && tmpFiles == null) {
        return;//from ww w  .  jav a  2 s . c  om
    }

    if (oldTmpFiles != null) {
        FileSystem fs = store.getFileSystem();
        long now = System.currentTimeMillis();
        for (Iterator<FileStatus> itr = oldTmpFiles.iterator(); itr.hasNext();) {
            FileStatus file = itr.next();
            if (file.getModificationTime() + TMP_FILE_EXPIRATION_TIME_MS > now) {
                if (logger.isDebugEnabled()) {
                    logger.debug("{}Deleting temporary file:" + file.getPath(), logPrefix);
                }
                fs.delete(file.getPath(), false);
                itr.remove();
            }
        }
    }
    if (tmpFiles != null) {
        for (Hoplog so : tmpFiles.keySet()) {
            if (logger.isDebugEnabled()) {
                logger.debug("{}Deleting temporary file:" + so.getFileName(), logPrefix);
            }
            deleteTmpFile(null, so);
        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

public static void exposeCleanupIntervalMillis(FileSystem fs, Path path, long intervalDurationMillis) {
    FSDataInputStream input = null;//from  ww  w.  j a  v a 2  s.  c om
    FSDataOutputStream output = null;
    try {
        if (fs.exists(path)) {
            input = new FSDataInputStream(fs.open(path));
            if (intervalDurationMillis == input.readLong()) {
                input.close();
                return;
            }
            input.close();
            fs.delete(path, true);
        }
        output = fs.create(path);
        output.writeLong(intervalDurationMillis);
        output.close();
    } catch (IOException e) {
        return;
    } finally {
        try {
            if (input != null) {
                input.close();
            }
            if (output != null) {
                output.close();
            }
        } catch (IOException e2) {

        }
    }
}