com.flicklib.folderscanner.MovieNameExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.flicklib.folderscanner.MovieNameExtractor.java

Source

/*
 * This file is part of Flicklib.
 *
 * Copyright (C) Francis De Brabandere
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.flicklib.folderscanner;

import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.regex.Pattern;

import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 *
 * @author francisdb
 */
public class MovieNameExtractor {

    private static final Logger LOGGER = LoggerFactory.getLogger(MovieNameExtractor.class);

    /*
     * more specefic items first!
     */
    private static final String TO_REMOVE[] = { ".5.1", ".ws.dvdrip", ".fs.dvdrip", ".fs.internal", ".se.internal",
            ".extended.dvdrip", ".real.repack", ".real.proper", ".real.retail", ".limited.dvdrip",
            ".limited.color.fixed", ".read.nfo", ".dvdrip", ".samplefix", ".dvdivx4", ".dvdivx5", ".dvdivx",
            ".dvdr", ".divx", ".dual", ".r5.xvid", ".xvid", ".limited", ".internal", ".special.edition", ".proper",
            ".dc", ".ac3", ".unrated", ".uncut", ".stv", ".dutch", // keep this one?
            ".hundub", ".hund", ".hun", ".rerip", ".nfofix", ".full.subpack", ".subpack", ".subfix", ".syncfix",
            ".ts", ".cd1", ".cd2", ".screener", ".dvdr", ".dvd", ".pal", ".1080p", ".direcors.cut", ".extended.cut",
            ".repack", ".disc", ".retail"//".ws"        
    };

    static class LanguageSuggestion {
        Pattern pattern;
        Locale language;

        public LanguageSuggestion(String pattern, Locale language) {
            this.pattern = Pattern.compile(pattern);
            this.language = language;
        }

        public Locale match(String name) {
            if (pattern.matcher(name.toLowerCase()).find()) {
                return language;
            }
            return null;
        }

    }

    final static Locale HUNGARIAN = new Locale("hu_HU", "hu", "HU");
    final static Locale DUTCH = new Locale("nl_NL", "nl", "NL");

    private static final LanguageSuggestion[] SUGGESTIONS = new LanguageSuggestion[] {
            new LanguageSuggestion("\\.hungarian", HUNGARIAN), new LanguageSuggestion("\\.hun\\.", HUNGARIAN),
            new LanguageSuggestion("\\.dutch", DUTCH), new LanguageSuggestion("\\.en\\.", Locale.ENGLISH) };

    public MovieNameExtractor() {
    }

    public String removeCrap(FileObject file) throws FileSystemException {
        return removeCrap(file.getName().getBaseName(), file.getType().hasChildren());
    }

    public String removeCrap(String fileName, boolean directory) {
        String movieName = fileName.toLowerCase().replace('_', '.');
        if (!directory) {
            movieName = clearMovieExtension(movieName);
        }
        // getYear(movieName);
        boolean release = false;
        for (String bad : TO_REMOVE) {
            if (movieName.contains(bad)) {
                // these strings should not be available in non-release movies
                release = true;
                movieName = movieName.replaceAll(bad, "");
            }
            bad = bad.replace('.', ' ');
            if (movieName.contains(bad)) {
                // these strings should not be available in non-release movies
                release = true;
                movieName = movieName.replaceAll(bad, "");
            }
        }

        if (release) {
            int dashPos = movieName.lastIndexOf('-');
            if (dashPos != -1) {
                movieName = movieName.substring(0, movieName.lastIndexOf('-'));
            }

            // this actualy yields better results in imdb
            // TODO make this optional or depending on the main service
            Calendar calendar = new GregorianCalendar();
            int thisYear = calendar.get(Calendar.YEAR);
            // TODO recup the movie year!
            for (int i = 1800; i < thisYear; i++) {
                movieName = movieName.replaceAll(Integer.toString(i), "");
            }
        }

        // clean up dashes for normal movies
        movieName = movieName.replace('-', '.');

        movieName = movieName.replaceAll("\\.", " ");
        movieName = movieName.trim();
        LOGGER.trace(movieName);
        return movieName;
    }

    public String clearMovieExtension(String name) {
        int lastDotPos = name.lastIndexOf('.');
        if (lastDotPos != -1 && lastDotPos != 0 && lastDotPos < name.length() - 1) {
            String ext = name.substring(lastDotPos + 1).toLowerCase();
            if (MovieFileFilter.VIDEO_EXT_EXTENSIONS.contains(ext)) {
                return name.substring(0, lastDotPos);
            }
        }
        return name;
    }

    public Locale getLanguageSuggestion(FileObject file) {
        return getLanguageSuggestion(file.getName().getBaseName());
    }

    public Locale getLanguageSuggestion(String filename) {
        for (LanguageSuggestion s : SUGGESTIONS) {
            Locale l = s.match(filename);
            if (l != null) {
                return l;
            }
        }
        return Locale.ENGLISH;
    }

}