Java tutorial
/* * Licensed to Metamarkets Group Inc. (Metamarkets) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Metamarkets licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package io.druid.storage.hdfs; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.io.ByteSource; import com.google.inject.Inject; import io.druid.java.util.common.CompressionUtils; import io.druid.java.util.common.FileUtils; import io.druid.java.util.common.IAE; import io.druid.java.util.common.RetryUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.UOE; import io.druid.java.util.common.io.NativeIO; import io.druid.java.util.common.logger.Logger; import io.druid.segment.loading.SegmentLoadingException; import io.druid.segment.loading.URIDataPuller; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import javax.tools.FileObject; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.Writer; import java.net.URI; /** */ public class HdfsDataSegmentPuller implements URIDataPuller { public static final int DEFAULT_RETRY_COUNT = 3; /** * FileObject.getLastModified and FileObject.delete don't throw IOException. This allows us to wrap those calls */ public static class HdfsIOException extends RuntimeException { private final IOException cause; public HdfsIOException(IOException ex) { super(ex); this.cause = ex; } protected IOException getIOException() { return cause; } } public static FileObject buildFileObject(final URI uri, final Configuration config) { return buildFileObject(uri, config, false); } public static FileObject buildFileObject(final URI uri, final Configuration config, final Boolean overwrite) { return new FileObject() { final Path path = new Path(uri); @Override public URI toUri() { return uri; } @Override public String getName() { return path.getName(); } @Override public InputStream openInputStream() throws IOException { final FileSystem fs = path.getFileSystem(config); return fs.open(path); } @Override public OutputStream openOutputStream() throws IOException { final FileSystem fs = path.getFileSystem(config); return fs.create(path, overwrite); } @Override public Reader openReader(boolean ignoreEncodingErrors) { throw new UOE("HDFS Reader not supported"); } @Override public CharSequence getCharContent(boolean ignoreEncodingErrors) { throw new UOE("HDFS CharSequence not supported"); } @Override public Writer openWriter() { throw new UOE("HDFS Writer not supported"); } @Override public long getLastModified() { try { final FileSystem fs = path.getFileSystem(config); return fs.getFileStatus(path).getModificationTime(); } catch (IOException ex) { throw new HdfsIOException(ex); } } @Override public boolean delete() { try { final FileSystem fs = path.getFileSystem(config); return fs.delete(path, false); } catch (IOException ex) { throw new HdfsIOException(ex); } } }; } private static final Logger log = new Logger(HdfsDataSegmentPuller.class); protected final Configuration config; @Inject public HdfsDataSegmentPuller(final Configuration config) { this.config = config; } FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException { try { org.apache.commons.io.FileUtils.forceMkdir(outDir); } catch (IOException e) { throw new SegmentLoadingException(e, ""); } try { final FileSystem fs = path.getFileSystem(config); if (fs.isDirectory(path)) { // -------- directory --------- try { return RetryUtils.retry(() -> { if (!fs.exists(path)) { throw new SegmentLoadingException("No files found at [%s]", path.toString()); } final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false); final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult(); while (children.hasNext()) { final LocatedFileStatus child = children.next(); final Path childPath = child.getPath(); final String fname = childPath.getName(); if (fs.isDirectory(childPath)) { log.warn("[%s] is a child directory, skipping", childPath.toString()); } else { final File outFile = new File(outDir, fname); try (final FSDataInputStream in = fs.open(childPath)) { NativeIO.chunkedCopy(in, outFile); } result.addFile(outFile); } } log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw Throwables.propagate(e); } } else if (CompressionUtils.isZip(path.getName())) { // -------- zip --------- final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outDir, shouldRetryPredicate(), false); log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } else if (CompressionUtils.isGz(path.getName())) { // -------- gzip --------- final String fname = path.getName(); final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname)); final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outFile); log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath()); return result; } else { throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString()); } } catch (IOException e) { throw new SegmentLoadingException(e, "Error loading [%s]", path.toString()); } } public InputStream getInputStream(Path path) throws IOException { return buildFileObject(path.toUri(), config).openInputStream(); } @Override public InputStream getInputStream(URI uri) throws IOException { if (!uri.getScheme().equalsIgnoreCase(HdfsStorageDruidModule.SCHEME)) { throw new IAE("Don't know how to load SCHEME [%s] for URI [%s]", uri.getScheme(), uri.toString()); } return buildFileObject(uri, config).openInputStream(); } /** * Return the "version" (aka last modified timestamp) of the URI * * @param uri The URI of interest * * @return The last modified timestamp of the uri in String format * * @throws IOException */ @Override public String getVersion(URI uri) throws IOException { try { return StringUtils.format("%d", buildFileObject(uri, config).getLastModified()); } catch (HdfsIOException ex) { throw ex.getIOException(); } } @Override public Predicate<Throwable> shouldRetryPredicate() { return new Predicate<Throwable>() { @Override public boolean apply(Throwable input) { if (input == null) { return false; } if (input instanceof HdfsIOException) { return true; } if (input instanceof IOException) { return true; } return apply(input.getCause()); } }; } }