Java tutorial
/* * Copyright (c) 2004-2013 YAMJ Members * https://github.com/organizations/YAMJ/teams * * This file is part of the Yet Another Media Jukebox (YAMJ). * * YAMJ is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * YAMJ is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with YAMJ. If not, see <http://www.gnu.org/licenses/>. * * Web: https://github.com/YAMJ/yamj-v3 * */ package org.yamj.core.service.mediaimport; import static java.util.regex.Pattern.CASE_INSENSITIVE; import static org.springframework.util.StringUtils.tokenizeToStringArray; import org.yamj.common.util.KeywordMap; import org.yamj.common.util.PatternUtils; import org.yamj.common.util.TokensPatternMap; import org.yamj.core.database.model.type.FileType; import org.yamj.core.tools.LanguageTools; import org.yamj.common.tools.PropertyTools; import org.yamj.core.tools.StringTools; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @Service("filenameScanner") public class FilenameScanner { private static final Logger LOG = LoggerFactory.getLogger(FilenameScanner.class); // Allow the use of [IMDB tt123456] to define the IMDB reference private static final Pattern ID_PATTERN = PatternUtils.patt("\\[ID ([^\\[\\]]*)\\]"); // Search for tt followed by 6 or 7 digits and then a word boundary private static final Pattern IMDB_PATTERN = PatternUtils.patt("(?i)(tt\\d{6,7})\\b"); // Everything in format [SET something] (case insensitive) private static final Pattern SET_PATTERN = PatternUtils.ipatt("\\[SET(?:\\s|-)([^\\[\\]]*)\\]"); // Number at the end of string preceded with '-' private static final Pattern SET_INDEX_PATTERN = PatternUtils.patt("-\\s*(\\d+)\\s*$"); private static final Pattern TV_PATTERN = PatternUtils .ipatt("(?<![0-9])((s[0-9]{1,4})|[0-9]{1,2})(?:(\\s|\\.|x))??((?:(e|x)\\s??[0-9]+)+)"); private static final Pattern SEASON_PATTERN = PatternUtils.ipatt("s{0,1}([0-9]+)(\\s|\\.)??[ex-]"); private static final Pattern EPISODE_PATTERN = PatternUtils.ipatt("[ex]\\s??([0-9]+)"); // Last 4 digits or last 4 digits in parenthesis private static final Pattern MOVIE_YEAR_PATTERN = PatternUtils .patt("\\({0,1}(\\d{4})(?:/|\\\\|\\||-){0,1}(I*)\\){0,1}$"); // One or more '.[]_ ' private static final Pattern TITLE_CLEANUP_DIV_PATTERN = PatternUtils.patt("([\\. _\\[\\]]+)"); // '-' or '(' at the end private static final Pattern TITLE_CLEANUP_CUT_PATTERN = PatternUtils.patt("-$|\\($"); // All symbols between '-' and '/' but not after '/TVSHOW/' or '/PART/' private static final Pattern SECOND_TITLE_PATTERN = PatternUtils.patt("(?<!/TVSHOW/|/PART/)-([^/]+)"); /** * Parts/disks markers. * * CAUTION: Grouping is used for part number detection/parsing. */ private static final List<Pattern> PART_PATTERNS = new ArrayList<Pattern>() { private static final long serialVersionUID = 2534565160759765860L; { add(PatternUtils.iwpatt("CD ([0-9]+)")); add(PatternUtils.iwpatt("(?:(?:CD)|(?:DISC)|(?:DISK)|(?:PART))([0-9]+)")); add(PatternUtils.tpatt("([0-9]{1,2})[ \\.]{0,1}DVD")); } }; /** * Detect if the file/folder name is incomplete and additional info must be taken from parent folder. * * CAUTION: Grouping is used for part number detection/parsing. */ private static final List<Pattern> PARENT_FOLDER_PART_PATTERNS = new ArrayList<Pattern>() { private static final long serialVersionUID = 6125546333783004357L; { for (Pattern p : PART_PATTERNS) { add(Pattern.compile("^" + p, CASE_INSENSITIVE)); } add(Pattern.compile("^" + TV_PATTERN, CASE_INSENSITIVE)); } }; private static final Map<Integer, Pattern> FPS_MAP = new HashMap<Integer, Pattern>() { private static final long serialVersionUID = -514057952318403685L; { for (int i : new int[] { 23, 24, 25, 29, 30, 50, 59, 60 }) { put(i, PatternUtils.iwpatt("p" + i + "|" + i + "p")); } } }; private static final Map<String, Pattern> AUDIO_CODEC_MAP = new HashMap<String, Pattern>() { private static final long serialVersionUID = 8916278631320047158L; { for (String s : new String[] { "AC3", "DTS", "DD", "AAC", "FLAC" }) { put(s, PatternUtils.iwpatt(s)); } } }; private static final Map<String, Pattern> VIDEO_CODEC_MAP = new HashMap<String, Pattern>() { private static final long serialVersionUID = 7370884465939448891L; { put("XviD", PatternUtils.iwpatt("XVID")); put("DivX", PatternUtils.iwpatt("DIVX|DIVX6")); put("H.264", PatternUtils.iwpatt("H264|H\\.264|X264")); } }; private static final Map<String, Pattern> HD_RESOLUTION_MAP = new HashMap<String, Pattern>() { private static final long serialVersionUID = 3476960701738952741L; { for (String s : new String[] { "720p", "1080i", "1080p", "HD", "1280x720", "1920x1080" }) { put(s, PatternUtils.iwpatt(s)); } } }; private final TokensPatternMap videoSourceMap = new TokensPatternMap() { private static final long serialVersionUID = 4166458100829813911L; @Override public void put(String key, Collection<String> tokens) { StringBuilder patt = new StringBuilder(key); for (String token : tokens) { patt.append("|"); patt.append(token); } put(key, PatternUtils.iwpatt(patt.toString())); } }; private Collection<String> videoExtensions = new HashSet<String>(); private Collection<String> subtitleExtensions = new HashSet<String>(); private Collection<String> imageExtensions = new HashSet<String>(); private final Collection<Pattern> skipPatterns = new ArrayList<Pattern>(); private final Collection<Pattern> movieVersionPatterns = new ArrayList<Pattern>(); private final Collection<Pattern> extraPatterns = new ArrayList<Pattern>(); private final boolean languageDetection; private final boolean skipEpisodeTitle; private boolean useParentRegex; private Pattern useParentPattern; private LanguageTools languageTools; public FilenameScanner() { // resolve extensions videoExtensions = StringTools.tokenize(PropertyTools.getProperty("filename.scanner.video.extensions", "avi,divx,xvid,mkv,wmv,m2ts,ts,rm,qt,iso,vob,mpg,mov,mp4,m1v,m2v,m4v,m2p,top,trp,m2t,mts,asf,rmp4,img,mk3d,rar,001"), ",;|"); subtitleExtensions = StringTools.tokenize( PropertyTools.getProperty("filename.scanner.subtitle.extensions", "srt,sub,ssa,smi,pgs"), ",;|"); imageExtensions = StringTools.tokenize( PropertyTools.getProperty("filename.scanner.image.extensions", "jpg,jpeg,gif,bmp,png"), ",;|"); // other properties languageDetection = PropertyTools.getBooleanProperty("filename.scanner.language.detection", Boolean.TRUE); skipEpisodeTitle = PropertyTools.getBooleanProperty("filename.scanner.skip.episodeTitle", Boolean.FALSE); // parent patterns useParentRegex = PropertyTools.getBooleanProperty("filename.scanner.useParentRegex", Boolean.FALSE); String patternString = PropertyTools.getProperty("filename.scanner.parentRegex", ""); if (StringUtils.isNotBlank(patternString)) { useParentPattern = PatternUtils.ipatt(patternString); } else { useParentRegex = Boolean.FALSE; } // build the skip patterns boolean caseSensitive = PropertyTools.getBooleanProperty("filename.scanner.skip.caseSensitive", Boolean.TRUE); for (String token : tokenizeToStringArray(PropertyTools.getProperty("filename.scanner.skip.keywords", ""), ",;| ")) { if (caseSensitive) { skipPatterns.add(PatternUtils.wpatt(Pattern.quote(token))); } else { skipPatterns.add(PatternUtils.iwpatt(Pattern.quote(token))); } } caseSensitive = PropertyTools.getBooleanProperty("filename.scanner.skip.caseSensitive.regex", Boolean.TRUE); for (String token : tokenizeToStringArray( PropertyTools.getProperty("filename.scanner.skip.keywords.regex", ""), ",;| ")) { if (caseSensitive) { skipPatterns.add(PatternUtils.patt(token)); } else { skipPatterns.add(PatternUtils.ipatt(token)); } } // build version keywords pattern for (String token : tokenizeToStringArray(PropertyTools.getProperty("filename.scanner.version.keywords", "director's cut,directors cut,extended cut,final cut,remastered,extended version,special edition"), ",;|")) { movieVersionPatterns.add( PatternUtils.iwpatt(token.replace(" ", PatternUtils.WORD_DELIMITERS_MATCH_PATTERN.pattern()))); } // build extra keywords pattern for (String token : tokenizeToStringArray( PropertyTools.getProperty("filename.scanner.extra.keywords", "trailer,extra,bonus"), ",;|")) { extraPatterns.add(PatternUtils.pattInSBrackets(Pattern.quote(token))); } // set source keywords KeywordMap sourceKeywords = PropertyTools.getKeywordMap("filename.scanner.source.keywords", "HDTV,PDTV,DVDRip,DVDSCR,DSRip,CAM,R5,LINE,HD2DVD,DVD,DVD5,DVD9,HRHDTV,MVCD,VCD,TS,VHSRip,BluRay,BDRip,HDDVD,D-THEATER,SDTV"); videoSourceMap.putAll(sourceKeywords.getKeywords(), sourceKeywords); } @Autowired public void setLanguageTools(LanguageTools languageTools) { this.languageTools = languageTools; } public FileType determineFileType(final String extension) { String ext = extension.toLowerCase(); try { if ("nfo".equals(ext)) { return FileType.NFO; } if (videoExtensions.contains(ext)) { return FileType.VIDEO; } if (subtitleExtensions.contains(ext)) { return FileType.SUBTITLE; } if (imageExtensions.contains(ext)) { return FileType.IMAGE; } } catch (Exception error) { LOG.error("Failed to determine file type for extension {}", extension); LOG.warn("File type detection error", error); } return FileType.UNKNOWN; } public void scan(FilenameDTO dto) { // CHECK FOR USE_PARENT_PATTERN matches if (useParentRegex && useParentPattern.matcher(dto.getName()).find()) { // Just go up one parent dto.setRest(dto.getParentName()); LOG.debug("UseParentPattern matched for {} - Using parent folder name: {}", dto.getName(), dto.getParentName()); } else { dto.setRest(dto.getName()); } // EXTENSION AND CONTAINER if (dto.isDirectory()) { dto.setContainer("DVD"); dto.setVideoSource("DVD"); } else { // Extract and strip extension String ext = FilenameUtils.getExtension(dto.getRest()); if (ext.length() > 0) { dto.setRest(FilenameUtils.removeExtension(dto.getRest())); dto.setContainer(ext.toUpperCase()); } } dto.setRest(cleanUp(dto.getRest())); // Detect incomplete filenames and add parent folder name to parser for (Pattern pattern : PARENT_FOLDER_PART_PATTERNS) { final Matcher matcher = pattern.matcher(dto.getRest()); if (matcher.find()) { final String parentName = dto.getParentName(); if (parentName == null) { break; } dto.setRest(cleanUp(parentName) + "./." + dto.getRest()); break; } } // Remove version info for (Pattern pattern : movieVersionPatterns) { Matcher matcher = pattern.matcher(dto.getRest()); if (matcher.find()) { dto.setMovieVersion(matcher.group(0)); } dto.setRest(pattern.matcher(dto.getRest()).replaceAll("./.")); } // EXTRAS (Including Trailers) for (Pattern pattern : extraPatterns) { Matcher matcher = pattern.matcher(dto.getRest()); if (matcher.find()) { dto.setExtra(Boolean.TRUE); dto.setPartTitle(matcher.group(1)); dto.setRest(cutMatch(dto.getRest(), matcher, "./EXTRA/.")); break; } } dto.setFps(seekPatternAndUpdateRest(FPS_MAP, dto.getFps(), dto)); dto.setAudioCodec(seekPatternAndUpdateRest(AUDIO_CODEC_MAP, dto.getAudioCodec(), dto)); dto.setVideoCodec(seekPatternAndUpdateRest(VIDEO_CODEC_MAP, dto.getVideoCodec(), dto)); dto.setHdResolution(seekPatternAndUpdateRest(HD_RESOLUTION_MAP, dto.getHdResolution(), dto)); dto.setVideoSource(seekPatternAndUpdateRest(videoSourceMap, dto.getVideoSource(), dto, PART_PATTERNS)); // SEASON + EPISODES { final Matcher matcher = TV_PATTERN.matcher(dto.getRest()); if (matcher.find()) { // logger.finest("It's a TV Show: " + group0); dto.setRest(cutMatch(dto.getRest(), matcher, "./TVSHOW/.")); final Matcher smatcher = SEASON_PATTERN.matcher(matcher.group(0)); smatcher.find(); int season = Integer.parseInt(smatcher.group(1)); dto.setSeason(season); final Matcher ematcher = EPISODE_PATTERN.matcher(matcher.group(0)); while (ematcher.find()) { dto.getEpisodes().add(Integer.parseInt(ematcher.group(1))); } } } // PART { for (Pattern pattern : PART_PATTERNS) { final Matcher matcher = pattern.matcher(dto.getRest()); if (matcher.find()) { dto.setRest(cutMatch(dto.getRest(), matcher, " /PART/ ")); dto.setPart(Integer.parseInt(matcher.group(1))); break; } } } // SETS { for (;;) { final Matcher matcher = SET_PATTERN.matcher(dto.getRest()); if (!matcher.find()) { break; } dto.setRest(cutMatch(dto.getRest(), matcher, PatternUtils.SPACE_SLASH_SPACE)); FilenameDTO.SetDTO set = new FilenameDTO.SetDTO(); dto.getSets().add(set); String n = matcher.group(1); Matcher nmatcher = SET_INDEX_PATTERN.matcher(n); if (nmatcher.find()) { set.setIndex(Integer.parseInt(nmatcher.group(1))); n = cutMatch(n, nmatcher); } set.setTitle(n.trim()); } } // Movie ID detection { Matcher matcher = ID_PATTERN.matcher(dto.getRest()); if (matcher.find()) { dto.setRest(cutMatch(dto.getRest(), matcher, " /ID/ ")); String idString[] = matcher.group(1).split("[-\\s+]"); if (idString.length == 2) { dto.setId(idString[0].toLowerCase(), idString[1]); } else { LOG.debug("Error decoding ID from filename: {}", matcher.group(1)); } } else { matcher = IMDB_PATTERN.matcher(dto.getRest()); if (matcher.find()) { dto.setRest(cutMatch(dto.getRest(), matcher, " /ID/ ")); dto.setId("imdb", matcher.group(1)); } } } // LANGUAGES if (languageDetection) { for (;;) { String language = seekPatternAndUpdateRest(this.languageTools.getStrictLanguageMap(), null, dto); if (language == null) { break; } dto.getLanguages().add(language); } } // TITLE { String rest = dto.getRest(); int iextra = dto.isExtra() ? rest.indexOf("/EXTRA/") : rest.length(); int itvshow = dto.getSeason() >= 0 ? rest.indexOf("/TVSHOW/") : rest.length(); int ipart = dto.getPart() >= 0 ? rest.indexOf("/PART/") : rest.length(); { int min = iextra < itvshow ? iextra : itvshow; min = min < ipart ? min : ipart; // Find first token before trailer, TV show and part // Name should not start with '-' (exclude wrongly marked part/episode titles) String title = ""; StringTokenizer t = new StringTokenizer(rest.substring(0, min), "/[]"); while (t.hasMoreElements()) { String token = t.nextToken(); token = cleanUpTitle(token); if (token.length() >= 1 && token.charAt(0) != '-') { title = token; break; } } boolean first = Boolean.TRUE; while (t.hasMoreElements()) { String token = t.nextToken(); token = cleanUpTitle(token); // Search year (must be next to a non-empty token) if (first) { if (token.length() > 0) { try { int year = Integer.parseInt(token); if (year >= 1800 && year <= 3000) { dto.setYear(year); } } catch (NumberFormatException error) { } } first = Boolean.FALSE; } if (!languageDetection) { break; } // Loose language search if (token.length() >= 2 && token.indexOf('-') < 0) { for (Map.Entry<String, Pattern> e : this.languageTools.getLooseLanguageMap().entrySet()) { Matcher matcher = e.getValue().matcher(token); if (matcher.find()) { dto.getLanguages().add(e.getKey()); } } } } // Search year within title (last 4 digits or 4 digits in parenthesis) if (dto.getYear() < 0) { Matcher ymatcher = MOVIE_YEAR_PATTERN.matcher(title); if (ymatcher.find()) { int year = Integer.parseInt(ymatcher.group(1)); if (year >= 1919 && year <= 2099) { dto.setYear(year); title = cutMatch(title, ymatcher); } } } dto.setTitle(title); } // EPISODE TITLE if (dto.getSeason() >= 0) { itvshow += 8; Matcher matcher = SECOND_TITLE_PATTERN.matcher(rest.substring(itvshow)); while (matcher.find()) { String title = cleanUpTitle(matcher.group(1)); if (title.length() > 0) { if (!skipEpisodeTitle) { dto.setEpisodeTitle(title); } break; } } } // PART TITLE // Just do this for no extra, already named. if ((dto.getPart() >= 0) && !dto.isExtra()) { ipart += 6; Matcher matcher = SECOND_TITLE_PATTERN.matcher(rest.substring(ipart)); while (matcher.find()) { String title = cleanUpTitle(matcher.group(1)); if (title.length() > 0) { dto.setPartTitle(title); break; } } } } } private String cleanUp(final String filename) { String rFilename = filename; for (Pattern p : skipPatterns) { rFilename = p.matcher(rFilename).replaceAll("./."); } return rFilename; } /** * Replace all dividers with spaces and trim trailing spaces and redundant braces/minuses at the end. * * @param token String to clean up. * @return Prepared title. */ private static String cleanUpTitle(String token) { String title = TITLE_CLEANUP_DIV_PATTERN.matcher(token).replaceAll(" ").trim(); return TITLE_CLEANUP_CUT_PATTERN.matcher(title).replaceAll("").trim(); } private static <T> T seekPatternAndUpdateRest(Map<T, Pattern> map, T oldValue, FilenameDTO dto) { for (Map.Entry<T, Pattern> e : map.entrySet()) { Matcher matcher = e.getValue().matcher(dto.getRest()); if (matcher.find()) { dto.setRest(cutMatch(dto.getRest(), matcher, "./.")); return e.getKey(); } } return oldValue; } private static <T> T seekPatternAndUpdateRest(Map<T, Pattern> map, T oldValue, FilenameDTO dto, Collection<Pattern> protectPatterns) { for (Map.Entry<T, Pattern> e : map.entrySet()) { Matcher matcher = e.getValue().matcher(dto.getRest()); if (matcher.find()) { String restCut = cutMatch(dto.getRest(), matcher, "./."); for (Pattern protectPattern : protectPatterns) { if (protectPattern.matcher(dto.getRest()).find() && !protectPattern.matcher(restCut).find()) { return e.getKey(); } } dto.setRest(restCut); return e.getKey(); } } return oldValue; } private static String cutMatch(String rest, Matcher matcher) { return (rest.substring(0, matcher.start()) + rest.substring(matcher.end())).trim(); } private static String cutMatch(String rest, Matcher matcher, String divider) { return rest.substring(0, matcher.start()) + divider + rest.substring(matcher.end()); } }