com.addthis.hydra.task.map.DataPurgeServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.addthis.hydra.task.map.DataPurgeServiceImpl.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.task.map;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import com.addthis.basis.util.LessStrings;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;

import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DataPurgeServiceImpl implements DataPurgeService {

    private static final Logger logger = LoggerFactory.getLogger(DataPurgeServiceImpl.class);

    private static final String dirSeperator = File.separator;
    private static final String dirRegexSeperator = File.separator.equals("\\") ? "\\\\" : File.separator;

    public DataPurgeServiceImpl() {
    }

    @Override
    public boolean purgeData(DataPurgeConfig dataPurgeConfig, DateTime currentTime) {
        if (!validatePurgeParameters(dataPurgeConfig, currentTime)) {
            return false;
        }
        DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern(dataPurgeConfig.getDatePathFormat());
        DateTime oldestDataAllowed;
        if (dataPurgeConfig.getMaxAgeInDays() > 0) {
            oldestDataAllowed = currentTime.plusDays(-dataPurgeConfig.getMaxAgeInDays());
        } else {
            oldestDataAllowed = currentTime.plusHours(-dataPurgeConfig.getMaxAgeInHours());
        }
        logger.debug("Oldest data allowed {} , current time is {}",
                new Object[] { oldestDataAllowed, currentTime });
        for (String directoryPrefix : dataPurgeConfig.getDirectoryPrefix()) {
            for (File prefixDirectory : expandPrefix(directoryPrefix)) {
                List<File> subdirectories = getSubdirectoryList(prefixDirectory, null);
                for (File subdirectory : subdirectories) {
                    logger.trace("Considering directory {} for purge", subdirectory);
                    safeDelete(prefixDirectory.getPath(), dateTimeFormatter, oldestDataAllowed, subdirectory,
                            dataPurgeConfig.isFileBasedPurge(), dataPurgeConfig.getDateStartIndex(),
                            dataPurgeConfig.getDateStringLength());
                }
                if (dataPurgeConfig.getCleanEmptyParents()) {
                    for (File directory : subdirectories) {
                        if (directory.list() != null && directory.list().length == 0) {
                            try {
                                FileUtils.deleteDirectory(directory);
                            } catch (IOException e) {
                                logger.warn("Failed to delete empty directory {}", directory);
                            }
                        }
                    }
                }
            }
        }

        return true;

    }

    protected List<File> generateDirectoryList(String prefixDirectory) {
        List<File> directoryList = new LinkedList<>();
        for (File directory : expandPrefix(prefixDirectory)) {
            logger.trace("prefix expanded {} to {}", prefixDirectory, directory);
            getSubdirectoryList(directory, directoryList);
        }
        return directoryList;
    }

    protected void safeDelete(String directoryPrefix, DateTimeFormatter dateTimeFormatter,
            DateTime oldestDataAllowed, File directory, boolean fileBasedPurge, int dateStartIndex,
            int dateStringLength) {
        String dateString;
        if (fileBasedPurge) {
            File[] fileList = directory.listFiles(new FileFilter() {
                @Override
                public boolean accept(File file) {
                    return file.isFile();
                }
            });
            if (fileList != null && fileList.length > 0) {
                for (File file : fileList) {
                    String fileName = file.getName();
                    dateString = fileName.substring(dateStartIndex, dateStringLength + dateStartIndex);
                    if (shouldDelete(dateTimeFormatter, oldestDataAllowed, dateString)) {
                        delete(file);
                    }
                }
            }
        } else {
            String directoryStr = directory.getPath().replace(directoryPrefix, "");
            if (directoryStr.startsWith(dirSeperator)) {
                directoryStr = directoryStr.substring(1);
            }
            if (shouldDelete(dateTimeFormatter, oldestDataAllowed, directoryStr)) {
                delete(directory);
            }
        }
    }

    private void delete(File file) {
        String name;
        try {
            name = file.getCanonicalPath();
        } catch (IOException e) {
            name = "(unk-path)" + file.getName();
        }
        logger.debug("Deleting: " + name);
        try {
            if (file.isDirectory()) {
                FileUtils.deleteDirectory(file);
            } else {
                FileUtils.deleteQuietly(file);
            }
        } catch (IOException e) {
            logger.error("error purging : " + file, e);
        }
    }

    protected boolean shouldDelete(DateTimeFormatter dateTimeFormatter, DateTime oldestDataAllowed,
            String dateString) {
        boolean result = false;
        DateTime time = null;
        try {
            time = dateTimeFormatter.parseDateTime(dateString);
        } catch (Exception e) {
            // ignore this directory
        }
        if (time != null && time.isBefore(oldestDataAllowed)) {
            result = true;
        }
        return result;
    }

    protected List<File> expandPrefix(String path) {
        if (path.indexOf('*') == -1) {
            LinkedList<File> list = new LinkedList<>();
            list.add(new File(path));
            return list;
        }
        File cur = path.startsWith(dirSeperator) ? new File(dirSeperator) : new File(".");
        LinkedList<File> list = new LinkedList<>();
        String[] tokens = LessStrings.splitArray(path, dirRegexSeperator);
        expandPrefix(list, cur, tokens, 0);
        return list;
    }

    protected void expandPrefix(List<File> list, File cur, String[] tokens, int index) {
        if (index == tokens.length) {
            if (cur.isDirectory() && cur.exists()) {
                list.add(cur);
            }
            return;
        }
        String tok = tokens[index];
        if (tok.indexOf('*') >= 0) {
            FileFilter fileFilter = new WildcardFileFilter(tok);
            File[] find = cur.listFiles(fileFilter);

            if (find != null) {
                for (File found : find) {
                    if (found.isDirectory()) {
                        expandPrefix(list, found, tokens, index + 1);
                    }
                }
            }
        } else {
            expandPrefix(list, new File(cur, tok), tokens, index + 1);
        }
    }

    /**
     * recursively add subdirectories into the directoryList
     */
    protected List<File> getSubdirectoryList(File current, List<File> directoryList) {
        if (directoryList == null) {
            directoryList = new ArrayList<>();
        }
        directoryList.add(current);
        if (current.isDirectory()) {
            File[] fileArray = current.listFiles(new FileFilter() {
                @Override
                public boolean accept(File file) {
                    return file.isDirectory();
                }
            });
            if (fileArray != null) {
                for (File directory : fileArray) {
                    getSubdirectoryList(directory, directoryList);
                }
            }
        }
        return directoryList;
    }

    private boolean validatePurgeParameters(DataPurgeConfig dataPurgeConfig, DateTime currentTime) {
        if (dataPurgeConfig.getDirectoryPrefix() == null || dataPurgeConfig.getDirectoryPrefix().length == 0) {
            logger.error("Directory prefix can not be null or blank");
            return false;
        }
        if (dataPurgeConfig.getDatePathFormat() == null || dataPurgeConfig.getDatePathFormat().isEmpty()) {
            logger.error("Date path format can not be null or blank");
            return false;
        }
        if (currentTime == null) {
            logger.error("Current time can not be null");
            return false;
        }
        if (dataPurgeConfig.getMaxAgeInDays() <= 0 && dataPurgeConfig.getMaxAgeInHours() <= 0) {
            logger.error("max age must be > 0");
            return false;
        }
        if (dataPurgeConfig.isFileBasedPurge() && dataPurgeConfig.getDateStartIndex() < 0) {
            logger.error("File based purges require the dataStartIndex to be set");
            return false;
        }
        if (dataPurgeConfig.getDateStartIndex() >= 0 && dataPurgeConfig.getDateStringLength() < 0) {
            logger.error("Date start index was set but date string length was not defined");
            return false;
        }
        return true;
    }
}