Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.core.file.rfile; import java.util.ArrayList; import org.apache.accumulo.core.conf.DefaultConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile; import org.apache.accumulo.core.file.rfile.RFile.Reader; import org.apache.accumulo.core.file.rfile.RFile.Writer; import org.apache.accumulo.core.util.CachedConfiguration; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; /** * Split an RFile into large and small key/value files. * */ public class SplitLarge { public static void main(String[] args) throws Exception { Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); long maxSize = 10 * 1024 * 1024; Options opts = new Options(); Option maxSizeOption = new Option("m", "", true, "the maximum size of the key/value pair to shunt to the small file"); opts.addOption(maxSizeOption); CommandLine commandLine = new BasicParser().parse(opts, args); if (commandLine.hasOption(maxSizeOption.getOpt())) { maxSize = Long.parseLong(commandLine.getOptionValue(maxSizeOption.getOpt())); } for (String arg : commandLine.getArgs()) { Path path = new Path(arg); CachableBlockFile.Reader rdr = new CachableBlockFile.Reader(fs, path, conf, null, null); Reader iter = new RFile.Reader(rdr); if (!arg.endsWith(".rf")) { throw new IllegalArgumentException("File must end with .rf"); } String smallName = arg.substring(0, arg.length() - 3) + "_small.rf"; String largeName = arg.substring(0, arg.length() - 3) + "_large.rf"; int blockSize = (int) DefaultConfiguration.getDefaultConfiguration() .getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE); Writer small = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(smallName), "gz", conf), blockSize); small.startDefaultLocalityGroup(); Writer large = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(largeName), "gz", conf), blockSize); large.startDefaultLocalityGroup(); iter.seek(new Range(), new ArrayList<ByteSequence>(), false); while (iter.hasTop()) { Key key = iter.getTopKey(); Value value = iter.getTopValue(); if (key.getSize() + value.getSize() < maxSize) { small.append(key, value); } else { large.append(key, value); } iter.next(); } iter.close(); large.close(); small.close(); } } }