Java tutorial
/** * Copyright (C) 2015 DataTorrent, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datatorrent.stram.util; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; /** * <p>FSPartFileCollection class.</p> * * @since 0.3.2 */ public class FSPartFileCollection { private transient FileSystem fs; private transient FSDataOutputStream partOutStr; private transient FSDataOutputStream indexOutStr; private transient FSDataOutputStream metaOs; private transient String localBasePath; public static final String INDEX_FILE = "index.txt"; public static final String META_FILE = "meta.txt"; protected int bytesPerPartFile = 1024 * 1024; protected long millisPerPartFile = 60 * 60 * 1000; // 60 minutes protected int fileParts = 0; protected int partFileItemCount = 0; protected int partFileBytes = 0; protected long currentPartFileTimeStamp = 0; protected String basePath = "."; protected String hdfsFile; private boolean isLocalMode = false; private boolean syncRequested = false; public void setBytesPerPartFile(int bytes) { this.bytesPerPartFile = bytes; } public void setMillisPerPartFile(long millis) { this.millisPerPartFile = millis; } public void setLocalMode(boolean isLocalMode) { this.isLocalMode = isLocalMode; } public void setBasePath(String basePath) { this.basePath = basePath; } public String getBasePath() { return this.basePath; } public void setup() throws IOException { if (basePath.startsWith("file:")) { isLocalMode = true; localBasePath = basePath.substring(5); (new File(localBasePath)).mkdirs(); } fs = FileSystem.newInstance(new Path(basePath).toUri(), new Configuration()); Path pa = new Path(basePath, META_FILE); if (isLocalMode) { metaOs = new FSDataOutputStream(new FileOutputStream(localBasePath + "/" + META_FILE), null); } else { metaOs = fs.create(pa); } pa = new Path(basePath, INDEX_FILE); if (isLocalMode) { indexOutStr = new FSDataOutputStream(new FileOutputStream(localBasePath + "/" + INDEX_FILE), null); } else { indexOutStr = fs.create(pa); } } public void teardown() { logger.info("Closing hdfs part collection."); try { if (metaOs != null) { metaOs.close(); } if (partOutStr != null) { logger.debug("Closing part file"); partOutStr.close(); if (indexOutStr != null) { writeIndex(); } } if (indexOutStr != null) { writeIndexEnd(); indexOutStr.close(); } fs.close(); } catch (IOException ex) { logger.error(ex.toString()); } } private void openNewPartFile() throws IOException { hdfsFile = "part" + fileParts + ".txt"; Path path = new Path(basePath, hdfsFile); logger.debug("Opening new part file: {}", hdfsFile); if (isLocalMode) { partOutStr = new FSDataOutputStream(new FileOutputStream(localBasePath + "/" + hdfsFile), null); } else { partOutStr = fs.create(path); } fileParts++; currentPartFileTimeStamp = System.currentTimeMillis(); partFileItemCount = 0; partFileBytes = 0; } public void writeMetaData(byte[] bytes) throws IOException { metaOs.write(bytes); metaOs.hflush(); } public void writeDataItem(byte[] bytes, boolean incrementItemCount) throws IOException { if (partOutStr == null) { openNewPartFile(); } partOutStr.write(bytes); partFileBytes += bytes.length; if (incrementItemCount) { partFileItemCount++; } } public void requestSync() { syncRequested = true; } public boolean isReadyTurnoverPartFile() { try { return (syncRequested || (partOutStr.getPos() > bytesPerPartFile) || (currentPartFileTimeStamp + millisPerPartFile < System.currentTimeMillis())) && partOutStr.getPos() > 0; } catch (IOException ex) { return true; } } public boolean flushData() throws IOException { if (partOutStr != null) { partOutStr.hflush(); if (isReadyTurnoverPartFile()) { turnover(); return true; } } return false; } private void turnover() throws IOException { partOutStr.close(); partOutStr = null; writeIndex(); syncRequested = false; } private void writeIndex() { if (partFileBytes <= 0) { return; } try { String line = getLatestIndexLine(); resetIndexExtraInfo(); indexOutStr.write(line.getBytes()); indexOutStr.hflush(); indexOutStr.hsync(); } catch (IOException ex) { logger.error(ex.toString()); } } public String getLatestIndexLine() { String extraInfo = getIndexExtraInfo(); String line = "F:" + hdfsFile + ":" + currentPartFileTimeStamp + "-" + System.currentTimeMillis() + ":" + partFileItemCount; if (extraInfo != null) { line += ":T:" + extraInfo; } line += "\n"; return line; } private void writeIndexEnd() { try { indexOutStr.write(("E\n").getBytes()); indexOutStr.hflush(); indexOutStr.hsync(); } catch (IOException ex) { logger.error(ex.toString()); } } // to be overrided if user wants to include extra meta info for the current part file protected String getIndexExtraInfo() { return null; } // to be overrided if user wants to reset extra meta info for the current part file protected void resetIndexExtraInfo() { } private static final Logger logger = LoggerFactory.getLogger(FSPartFileCollection.class); }