Java tutorial
package com.inmobi.databus.readers; /* * #%L * messaging-client-databus * %% * Copyright (C) 2012 - 2014 InMobi * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.ReflectionUtils; import com.inmobi.databus.files.FileMap; import com.inmobi.databus.files.StreamFile; import com.inmobi.databus.partition.PartitionId; import com.inmobi.messaging.Message; import com.inmobi.messaging.consumer.InvalidCheckpointException; import com.inmobi.messaging.metrics.PartitionReaderStatsExposer; public abstract class DatabusStreamReader<T extends StreamFile> extends StreamReader<T> { private static final Log LOG = LogFactory.getLog(DatabusStreamReader.class); private final InputFormat<Object, Object> input; private final Configuration conf; private final ByteArrayOutputStream baos = new ByteArrayOutputStream(); private FileSplit currentFileSplit; private RecordReader<Object, Object> recordReader; private Object msgKey; private Object msgValue; private boolean needsSerialize; protected Date buildTimestamp; protected Date startTimestamp; protected DatabusStreamReader(PartitionId partitionId, FileSystem fs, Path streamDir, String inputFormatClass, Configuration conf, long waitTimeForFileCreate, PartitionReaderStatsExposer metrics, boolean noNewFiles, Date stopTime) throws IOException { super(partitionId, fs, streamDir, waitTimeForFileCreate, metrics, noNewFiles, stopTime); this.conf = conf; try { input = (InputFormat<Object, Object>) ReflectionUtils.newInstance(conf.getClassByName(inputFormatClass), conf); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Input format class" + inputFormatClass + " not found", e); } } public void build(Date date) throws IOException { this.buildTimestamp = date; build(); } public void initializeBuildTimeStamp(Date buildTimestamp) throws IOException { this.buildTimestamp = buildTimestamp; this.startTimestamp = buildTimestamp; } protected abstract void buildListing(FileMap<T> fmap, PathFilter pathFilter) throws IOException; protected void doRecursiveListing(Path dir, PathFilter pathFilter, FileMap<T> fmap) throws IOException { FileStatus[] fileStatuses = fsListFileStatus(dir, pathFilter); if (fileStatuses == null || fileStatuses.length == 0) { LOG.debug("No files in directory:" + dir); } else { for (FileStatus file : fileStatuses) { if (file.isDir()) { doRecursiveListing(file.getPath(), pathFilter, fmap); } else { fmap.addPath(file); } } } } protected boolean openCurrentFile(boolean next) throws IOException { closeCurrentFile(); if (getCurrentFile() == null) { return false; } if (next) { resetCurrentFileSettings(); } LOG.info("Opening file:" + getCurrentFile() + " NumLinesTobeSkipped when" + " opening:" + currentLineNum); try { FileStatus status = fsGetFileStatus(getCurrentFile()); if (status != null) { currentFileSplit = new FileSplit(getCurrentFile(), 0L, status.getLen(), new String[0]); recordReader = input.getRecordReader(currentFileSplit, new JobConf(conf), Reporter.NULL); metrics.incrementNumberRecordReaders(); msgKey = recordReader.createKey(); msgValue = recordReader.createValue(); if (msgValue instanceof Writable) { needsSerialize = true; } else { assert (msgValue instanceof Message); needsSerialize = false; } skipLines(currentLineNum); } else { LOG.info("CurrentFile:" + getCurrentFile() + " does not exist"); } } catch (FileNotFoundException fnfe) { LOG.info("CurrentFile:" + getCurrentFile() + " does not exist"); } return true; } protected synchronized void closeCurrentFile() throws IOException { if (recordReader != null) { recordReader.close(); recordReader = null; } currentFileSplit = null; } protected Message readRawLine() throws IOException { if (recordReader != null) { if (!needsSerialize) { msgValue = recordReader.createValue(); } boolean ret = recordReader.next(msgKey, msgValue); if (ret) { if (needsSerialize) { baos.reset(); ((Writable) msgValue).write(new DataOutputStream(baos)); return new Message(baos.toByteArray()); } else { return ((Message) msgValue); } } } return null; } protected boolean setNextHigherAndOpen(FileStatus currentFile) throws IOException, InterruptedException { LOG.debug("finding next higher for " + getCurrentFile()); FileStatus nextHigherFile = getHigherValue(currentFile); return setNextAndOpen(currentFile, nextHigherFile); } protected boolean setNextHigherAndOpen(T file) throws IOException, InterruptedException { LOG.debug("finding next higher for " + file); FileStatus nextHigherFile = getHigherValue(file); return setNextAndOpen(null, nextHigherFile); } private boolean setNextAndOpen(FileStatus currentFile, FileStatus nextHigherFile) throws IOException, InterruptedException { boolean next = true; if (nextHigherFile != null) { next = prepareMoveToNext(currentFile, nextHigherFile); openCurrentFile(next); return true; } return false; } public static Date getDateFromStreamDir(Path streamDir, Path dir) { String pathStr = dir.toString(); int startIndex = streamDir.toString().length() + 1; String dirString = pathStr.substring(startIndex, startIndex + minDirFormatStr.length()); try { return minDirFormat.get().parse(dirString); } catch (ParseException e) { LOG.warn("Could not get date from directory passed", e); throw new IllegalArgumentException(e); } } public static Date getDateFromCheckpointPath(String checkpointPath) { String dirString = checkpointPath.substring(0, minDirFormatStr.length()); try { return minDirFormat.get().parse(dirString); } catch (ParseException e) { LOG.warn("Could not get date from directory passed", e); throw new InvalidCheckpointException("Invalid checkpoint", e); } } static String minDirFormatStr = "yyyy" + File.separator + "MM" + File.separator + "dd" + File.separator + "HH" + File.separator + "mm"; static final ThreadLocal<DateFormat> minDirFormat = new ThreadLocal<DateFormat>() { @Override protected SimpleDateFormat initialValue() { return new SimpleDateFormat(minDirFormatStr); } }; static final ThreadLocal<DateFormat> hhDirFormat = new ThreadLocal<DateFormat>() { @Override protected SimpleDateFormat initialValue() { return new SimpleDateFormat( "yyyy" + File.separator + "MM" + File.separator + "dd" + File.separator + "HH"); } }; public static Path getHourDirPath(Path streamDir, Date date) { return new Path(streamDir, hhDirFormat.get().format(date)); } public static Path getMinuteDirPath(Path streamDir, Date date) { return new Path(streamDir, minDirFormat.get().format(date)); } private int startHour = -1; private void calculateStartHour() throws IOException { Calendar current = Calendar.getInstance(); Date now = current.getTime(); current.setTime(buildTimestamp); while (current.getTime().before(now)) { Path hhDir = getHourDirPath(streamDir, current.getTime()); if (fsIsPathExists(hhDir)) { startHour = current.get(Calendar.HOUR_OF_DAY); ; break; } else { // go to next hour LOG.info("Hour directory " + hhDir + " does not exist"); current.add(Calendar.HOUR_OF_DAY, 1); current.set(Calendar.MINUTE, 0); } } if (startHour != -1) { buildTimestamp = current.getTime(); LOG.info("Starts listing from " + buildTimestamp); } } protected boolean setBuildTimeStamp(PathFilter pathFilter) throws IOException { if (buildTimestamp == null) { Date tmp = getTimestampFromStartOfStream(pathFilter); if (tmp != null) { this.buildTimestamp = tmp; } else { LOG.info("Could not find start directory yet"); return false; } } if (startHour == -1) { calculateStartHour(); } if (startHour == -1) { return false; } return true; } public Date getTimestampFromStartOfStream(PathFilter pathFilter) throws IOException { FileStatus leastTimeStampFileStatus = null; Path dir = streamDir; for (int d = 0; d < 5; d++) { FileStatus[] filestatuses = fsListFileStatus(dir, pathFilter); if (filestatuses != null && filestatuses.length > 0) { leastTimeStampFileStatus = filestatuses[0]; for (int i = 1; i < filestatuses.length; i++) { if (leastTimeStampFileStatus.getPath().compareTo(filestatuses[i].getPath()) > 0) { leastTimeStampFileStatus = filestatuses[i]; } } dir = leastTimeStampFileStatus.getPath(); } else { return null; } } LOG.info("Starting dir in the stream " + leastTimeStampFileStatus.getPath()); return getDateFromStreamDir(streamDir, leastTimeStampFileStatus.getPath()); } }