Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.datatorrent.lib.io.fs; import java.io.File; import java.io.FileFilter; import java.io.IOException; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestWatcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.WildcardFileFilter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.datatorrent.api.DAG; import com.datatorrent.api.LocalMode; import com.datatorrent.api.StreamingApplication; import com.datatorrent.lib.io.block.AbstractBlockReader.ReaderRecord; import com.datatorrent.lib.io.block.BlockMetadata.FileBlockMetadata; import com.datatorrent.lib.io.fs.AbstractFileSplitter.FileMetadata; import com.datatorrent.lib.stream.DevNull; import com.datatorrent.netlet.util.Slice; public class FSInputModuleAppTest { private String inputDir; static String outputDir; private StreamingApplication app; private static final String FILE_1 = "file1.txt"; private static final String FILE_2 = "file2.txt"; private static final String FILE_1_DATA = "File one data"; private static final String FILE_2_DATA = "File two data. This has more data hence more blocks."; static final String OUT_DATA_FILE = "fileData.txt"; static final String OUT_METADATA_FILE = "fileMetaData.txt"; public static class TestMeta extends TestWatcher { public String baseDirectory; @Override protected void starting(org.junit.runner.Description description) { this.baseDirectory = "target/" + description.getClassName() + "/" + description.getMethodName(); } } @Rule public TestMeta testMeta = new TestMeta(); @Before public void setup() throws Exception { inputDir = testMeta.baseDirectory + File.separator + "input"; outputDir = testMeta.baseDirectory + File.separator + "output"; FileUtils.writeStringToFile(new File(inputDir + File.separator + FILE_1), FILE_1_DATA); FileUtils.writeStringToFile(new File(inputDir + File.separator + FILE_2), FILE_2_DATA); FileUtils.forceMkdir(new File(inputDir + File.separator + "dir")); FileUtils.writeStringToFile(new File(inputDir + File.separator + "dir/inner.txt"), FILE_1_DATA); } @After public void tearDown() throws IOException { FileUtils.deleteDirectory(new File(inputDir)); } @Test public void testApplication() throws Exception { app = new Application(); Configuration conf = new Configuration(false); conf.set("dt.operator.hdfsInputModule.prop.files", inputDir); conf.set("dt.operator.hdfsInputModule.prop.blockSize", "10"); conf.set("dt.operator.hdfsInputModule.prop.blocksThreshold", "4"); conf.set("dt.operator.hdfsInputModule.prop.scanIntervalMillis", "10000"); conf.set("dt.attr.CHECKPOINT_WINDOW_COUNT", "10"); LocalMode lma = LocalMode.newInstance(); lma.prepareDAG(app, conf); LocalMode.Controller lc = lma.getController(); lc.setHeartbeatMonitoringEnabled(true); lc.runAsync(); long now = System.currentTimeMillis(); Path outDir = new Path("file://" + new File(outputDir).getAbsolutePath()); FileSystem fs = FileSystem.newInstance(outDir.toUri(), new Configuration()); while (!fs.exists(outDir) && System.currentTimeMillis() - now < 20000) { Thread.sleep(500); LOG.debug("Waiting for {}", outDir); } Thread.sleep(10000); lc.shutdown(); Assert.assertTrue("output dir does not exist", fs.exists(outDir)); File dir = new File(outputDir); FileFilter fileFilter = new WildcardFileFilter(OUT_METADATA_FILE + "*"); verifyFileContents(dir.listFiles(fileFilter), "[fileName=file1.txt, numberOfBlocks=2, isDirectory=false, relativePath=input/file1.txt]"); verifyFileContents(dir.listFiles(fileFilter), "[fileName=file2.txt, numberOfBlocks=6, isDirectory=false, relativePath=input/file2.txt]"); verifyFileContents(dir.listFiles(fileFilter), "[fileName=dir, numberOfBlocks=0, isDirectory=true, relativePath=input/dir]"); verifyFileContents(dir.listFiles(fileFilter), "[fileName=inner.txt, numberOfBlocks=2, isDirectory=false, relativePath=input/dir/inner.txt]"); fileFilter = new WildcardFileFilter(OUT_DATA_FILE + "*"); verifyFileContents(dir.listFiles(fileFilter), FILE_1_DATA); verifyFileContents(dir.listFiles(fileFilter), FILE_2_DATA); } private void verifyFileContents(File[] files, String expectedData) throws IOException { StringBuilder filesData = new StringBuilder(); for (File file : files) { filesData.append(FileUtils.readFileToString(file)); } Assert.assertTrue("File data doesn't contain expected text", filesData.indexOf(expectedData) > -1); } private static Logger LOG = LoggerFactory.getLogger(FSInputModuleAppTest.class); private static class Application implements StreamingApplication { public void populateDAG(DAG dag, Configuration conf) { FSInputModule module = dag.addModule("hdfsInputModule", FSInputModule.class); AbstractFileOutputOperator<FileMetadata> metadataWriter = new MetadataWriter( FSInputModuleAppTest.OUT_METADATA_FILE); metadataWriter.setFilePath(FSInputModuleAppTest.outputDir); dag.addOperator("FileMetadataWriter", metadataWriter); AbstractFileOutputOperator<ReaderRecord<Slice>> dataWriter = new HDFSFileWriter( FSInputModuleAppTest.OUT_DATA_FILE); dataWriter.setFilePath(FSInputModuleAppTest.outputDir); dag.addOperator("FileDataWriter", dataWriter); DevNull<FileBlockMetadata> devNull = dag.addOperator("devNull", DevNull.class); dag.addStream("FileMetaData", module.filesMetadataOutput, metadataWriter.input); dag.addStream("data", module.messages, dataWriter.input); dag.addStream("blockMetadata", module.blocksMetadataOutput, devNull.data); } } private static class MetadataWriter extends AbstractFileOutputOperator<FileMetadata> { String fileName; @SuppressWarnings("unused") private MetadataWriter() { } public MetadataWriter(String fileName) { this.fileName = fileName; } @Override protected String getFileName(FileMetadata tuple) { return fileName; } @Override protected byte[] getBytesForTuple(FileMetadata tuple) { return (tuple).toString().getBytes(); } } private static class HDFSFileWriter extends AbstractFileOutputOperator<ReaderRecord<Slice>> { String fileName; @SuppressWarnings("unused") private HDFSFileWriter() { } public HDFSFileWriter(String fileName) { this.fileName = fileName; } @Override protected String getFileName(ReaderRecord<Slice> tuple) { return fileName; } @Override protected byte[] getBytesForTuple(ReaderRecord<Slice> tuple) { return tuple.getRecord().buffer; } } }