Java tutorial
/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.examples; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.codehaus.jackson.JsonNode; import com.linkedin.cubert.block.Block; import com.linkedin.cubert.block.BlockProperties; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.operator.PhaseContext; import com.linkedin.cubert.operator.PostCondition; import com.linkedin.cubert.operator.PreconditionException; import com.linkedin.cubert.operator.PreconditionExceptionType; import com.linkedin.cubert.operator.TupleOperator; import com.linkedin.cubert.utils.CommonUtils; import com.linkedin.cubert.utils.JsonUtils; public class ListFiles implements TupleOperator { private Iterator<String> iterator; private Tuple output; @Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { List<String> files = new ArrayList<String>(); String dirsStr = JsonUtils.getText(json.get("args"), "dirs"); String[] dirs = CommonUtils.trim(dirsStr.split(",")); for (String dir : dirs) { Path path = new Path(dir); FileSystem fs = path.getFileSystem(PhaseContext.getConf()); FileStatus[] allStatus = fs.globStatus(path); if (allStatus == null || allStatus.length == 0) continue; for (FileStatus status : allStatus) { if (status.isDir()) { listFiles(fs, status.getPath(), files); } else { files.add(status.getPath().toUri().getPath()); } } } iterator = files.iterator(); output = TupleFactory.getInstance().newTuple(1); } private void listFiles(FileSystem fs, Path path, List<String> files) throws IOException { FileStatus[] allStatus = fs.listStatus(path); if (allStatus == null) return; for (FileStatus status : allStatus) { if (status.isDir()) { listFiles(fs, status.getPath(), files); } else { files.add(status.getPath().toUri().getPath()); } } } @Override public Tuple next() throws IOException, InterruptedException { if (!iterator.hasNext()) return null; output.set(0, iterator.next()); return output; } @Override public PostCondition getPostCondition(Map<String, PostCondition> preConditions, JsonNode json) throws PreconditionException { if (!json.has("args") || !json.get("args").has("dirs")) { throw new PreconditionException(PreconditionExceptionType.INVALID_CONFIG, "dirs parameter not specified"); } BlockSchema schema = new BlockSchema("STRING filename"); return new PostCondition(schema, null, null); } }