Java tutorial
/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.schema.tools; import static org.kiji.schema.tools.ToolUtils.parseRowKeyFlag; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.odiago.common.flags.Flag; import org.apache.avro.Schema; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.util.ToolRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.schema.EntityId; import org.kiji.schema.EntityIdFactory; import org.kiji.schema.KijiAdmin; import org.kiji.schema.KijiColumnName; import org.kiji.schema.KijiCounter; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiMetaTable; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiRowScanner; import org.kiji.schema.KijiTable; import org.kiji.schema.KijiTableReader; import org.kiji.schema.KijiURI; import org.kiji.schema.avro.SchemaType; import org.kiji.schema.layout.KijiTableLayout; import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout; import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout.ColumnLayout; /** * Command-line tool to explore kiji table data like the 'ls' command of a unix shell. * * List all kiji tables: * kiji ls * * List all kiji instances: * kiji ls --instances * * List all families in a kiji table foo: * kiji ls --table=foo * * List all data in the info:email and derived:domain columns of a table foo: * kiji ls --table=foo --columns=info:email,derived:domain * * List all data in the info:email and derived:domain columns of a table foo in row bar: * kiji ls --table=foo --columns=info:email,derived:domain --entity=bar */ public class LsTool extends VersionValidatedTool { private static final Logger LOG = LoggerFactory.getLogger(LsTool.class); @Flag(name = "table", usage = "kiji table name") private String mTableName = ""; @Flag(name = "columns", usage = "Comma-delimited columns (family:qualifier), or * for all columns") private String mColumns = "*"; @Flag(name = "entity-id", usage = "(Unhashed) entity to look up") private String mEntityId = ""; @Flag(name = "entity-hash", usage = "Already-hashed entity to look up") private String mEntityHash = ""; @Flag(name = "start-row", usage = "The row to start scanning at (inclusive), " + "e.g. --start-row='hex:0088deadbeef', or --start-row='utf8:the row key in UTF8'.") private String mStartRow = null; @Flag(name = "limit-row", usage = "The row to stop scanning at (exclusive), " + "e.g. --limit-row='hex:0088deadbeef', or --limit-row='utf8:the row key in UTF8'.") private String mLimitRow = null; @Flag(name = "max-rows", usage = "Max number of rows to scan") private int mMaxRows = 0; @Flag(name = "max-versions", usage = "Max number of versions per cell to display") private int mMaxVersions = 1; @Flag(name = "min-timestamp", usage = "Min timestamp of versions to display") private long mMinTimestamp = 0; @Flag(name = "max-timestamp", usage = "Max timestamp of versions to display") private long mMaxTimestamp = Long.MAX_VALUE; @Flag(name = "instances", usage = "List all kiji instances installed on the cluster") private boolean mInstances = false; private HBaseAdmin mHBaseAdmin; /** * Lists all kiji instances. * @return A program exit code (zero on success). * @throws IOException If there is an error. */ private int listInstances() throws IOException { Set<String> instanceNames = getInstanceNames(); for (String instanceName : instanceNames) { getPrintStream().println(instanceName); } return 0; } /** * Returns a set of instance names. * * @return The set of instance names * @throws IOException if there is an error retrieving the HBase tables */ protected Set<String> getInstanceNames() throws IOException { Set<String> instanceNames = new HashSet<String>(); HBaseAdmin hbaseAdmin = new HBaseAdmin(getConf()); HTableDescriptor[] hTableDescriptors = hbaseAdmin.listTables(); for (HTableDescriptor hTableDescriptor : hTableDescriptors) { String instanceName = parseInstanceName(hTableDescriptor.getNameAsString()); if (null != instanceName) { instanceNames.add(parseInstanceName(hTableDescriptor.getNameAsString())); } } return instanceNames; } /** * Parses a table name for a kiji instance name. * * @param kijiTableName The table name to parse * @return instance name (or null if none found) */ protected static String parseInstanceName(String kijiTableName) { String[] parts = org.apache.hadoop.util.StringUtils.split(kijiTableName, '.'); if (parts.length < 3 || !KijiURI.KIJI_SCHEME.equals(parts[0])) { return null; } return parts[1]; } /** * Lists all the tables in a kiji instance. * * @param admin The kiji admin utility. * @return A program exit code (zero on success). * @throws IOException If there is an error. */ private int listTables(KijiAdmin admin) throws IOException { getPrintStream().println("Listing tables in kiji instance: " + getURI().toString()); for (String name : admin.getTableNames()) { getPrintStream().println(name); } return 0; } /** * Scans a table, displaying the data in the given columns, or all data if columns is null. * * @param reader The reader. * @param request The data request. * @param startRow The first row to include in this scan. * @param limitRow The last row to include in this scan. * @param mapTypeFamilies The map type families to print. * @param groupTypeColumns The group type columns to print. * @return A program exit code (zero on success). * @throws IOException If there is an IO error. */ private int scan(KijiTableReader reader, KijiDataRequest request, EntityId startRow, EntityId limitRow, Map<FamilyLayout, List<String>> mapTypeFamilies, Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns) throws IOException { getPrintStream().println("Scanning kiji table: " + getURI().toString()); KijiRowScanner scanner = reader.getScanner(request, startRow, limitRow); try { int rowsOutput = 0; for (KijiRowData row : scanner) { if (mMaxRows != 0 && ++rowsOutput > mMaxRows) { break; } printRow(row, mapTypeFamilies, groupTypeColumns); } } finally { scanner.close(); } return 0; } /** * Prints the data for a single entity id. * * @param reader The reader. * @param request The data request. * @param entityId The entity id to lookup. * @param mapTypeFamilies The map type families to print. * @param groupTypeColumns The group type columns to print. * @return A program exit code (zero on success). */ private int lookup(KijiTableReader reader, KijiDataRequest request, EntityId entityId, Map<FamilyLayout, List<String>> mapTypeFamilies, Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns) { getPrintStream().println("Looking up entity: " + Bytes.toStringBinary(entityId.getHBaseRowKey()) + " from kiji table: " + getURI().toString()); try { final KijiRowData row = reader.get(entityId, request); printRow(row, mapTypeFamilies, groupTypeColumns); } catch (IOException ioe) { LOG.error(ioe.getMessage()); return 1; } return 0; } /** * Prints cell data from the <code>row</code> for each column specified on the * <code>request</code>. * * @param row The row to read from. * @param mapTypeFamilies The map type families to print. * @param groupTypeColumns The group type columns to print. * @throws IOException if there is an error retrieving data from the KijiRowData. */ private void printRow(KijiRowData row, Map<FamilyLayout, List<String>> mapTypeFamilies, Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns) throws IOException { // Unpack and print result for the map type families. for (Entry<FamilyLayout, List<String>> entry : mapTypeFamilies.entrySet()) { final FamilyLayout family = entry.getKey(); if (family.getDesc().getMapSchema().getType() == SchemaType.COUNTER) { // If this map family of counters has no qualifiers, print entire family. if (entry.getValue().isEmpty()) { for (String key : row.getQualifiers(family.getName())) { KijiCounter counter = row.getCounter(family.getName(), key); if (null != counter) { printCell(row.getEntityId(), counter.getTimestamp(), family.getName(), key, Long.valueOf(counter.getValue())); } } // If this map family of counters has been qualified, print only the given columns. } else { for (String key : entry.getValue()) { KijiCounter counter = row.getCounter(family.getName(), key); if (null != counter) { printCell(row.getEntityId(), counter.getTimestamp(), family.getName(), key, Long.valueOf(counter.getValue())); } } } } else { // If this map family of non-counters has no qualifiers, print entire family. if (entry.getValue().isEmpty()) { NavigableMap<String, NavigableMap<Long, Object>> keyTimeseriesMap = row .getValues(family.getName(), (Schema) null); for (String key : keyTimeseriesMap.keySet()) { for (Entry<Long, Object> timestampedCell : keyTimeseriesMap.get(key).entrySet()) { long timestamp = timestampedCell.getKey(); printCell(row.getEntityId(), timestamp, family.getName(), key, timestampedCell.getValue()); } } // If this map family of non-counters has been qualified, print only the given columns. } else { for (String key : entry.getValue()) { NavigableMap<Long, Object> timeseriesMap = row.getValues(family.getName(), key, (Schema) null); for (Entry<Long, Object> timestampedCell : timeseriesMap.entrySet()) { long timestamp = timestampedCell.getKey(); printCell(row.getEntityId(), timestamp, family.getName(), key, timestampedCell.getValue()); } } } } } // Unpack and print result for the group type families. for (Entry<FamilyLayout, List<ColumnLayout>> entry : groupTypeColumns.entrySet()) { String familyName = entry.getKey().getName(); for (ColumnLayout column : entry.getValue()) { final KijiColumnName colName = new KijiColumnName(familyName, column.getName()); if (column.getDesc().getColumnSchema().getType() == SchemaType.COUNTER) { final KijiCounter counter = row.getCounter(colName.getFamily(), colName.getQualifier()); if (null != counter) { printCell(row.getEntityId(), counter.getTimestamp(), colName.getFamily(), colName.getQualifier(), Long.valueOf(counter.getValue())); } } else { for (Entry<Long, Object> timestampedCell : row .getValues(colName.getFamily(), colName.getQualifier(), (Schema) null).entrySet()) { long timestamp = timestampedCell.getKey(); printCell(row.getEntityId(), timestamp, colName.getFamily(), colName.getQualifier(), timestampedCell.getValue()); } } } } getPrintStream().println(""); } /** * Prints the contents of a single kiji cell to the printstream. * * @param entityId The entity id. * @param timestamp This timestamp of a KijiCell. * @param family The family of a KijiCell. * @param qualifier The qualifier of a KijiCell. * @param cellData The contents of a KijiCell. */ private void printCell(EntityId entityId, Long timestamp, String family, String qualifier, Object cellData) { getPrintStream().printf("%s [%d] %s:%s%n %s%n", Bytes.toStringBinary(entityId.getHBaseRowKey()), timestamp, family, qualifier, "" + cellData); } @Override protected void validateFlags() throws Exception { if (mMaxRows < 0) { throw new RuntimeException("--max-rows must be positive"); } if (!mEntityId.isEmpty() && !mEntityHash.isEmpty()) { if (mStartRow != null) { throw new RuntimeException("--start-row is only relevant when scanning"); } if (mLimitRow != null) { throw new RuntimeException("--limit-row is only relevant when scanning"); } if (mMaxRows != 0) { throw new RuntimeException("--max-rows is only relevant when scanning"); } } } @Override protected int run(List<String> nonFlagArgs) throws Exception { if (mInstances) { return listInstances(); } KijiMetaTable metaTable; try { metaTable = getKiji().getMetaTable(); } catch (TableNotFoundException e) { LOG.error("Could not open the kiji meta table. Has kiji been installed?", e); return 1; } if (mTableName.isEmpty()) { // List tables in this kiji instance. mHBaseAdmin = new HBaseAdmin(getConf()); final KijiAdmin admin = new KijiAdmin(mHBaseAdmin, getKiji()); return listTables(admin); } setURI(getURI().setTableName(mTableName)); // else, list row(s) from this specific table. final KijiTableLayout tableLayout = metaTable.getTableLayout(mTableName); final String[] rawColumnNames = (mColumns.equals("*")) ? null : StringUtils.split(mColumns, ","); final Map<FamilyLayout, List<String>> mapTypeFamilies = getMapTypeFamilies(rawColumnNames, tableLayout); final Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns = getGroupTypeColumns(rawColumnNames, tableLayout); final KijiDataRequest request = getDataRequest(mapTypeFamilies, groupTypeColumns, mMaxVersions, mMinTimestamp, mMaxTimestamp); KijiTable table = null; KijiTableReader reader = null; try { table = getKiji().openTable(mTableName); final EntityIdFactory eidFactory = table.getEntityIdFactory(); reader = table.openTableReader(); if (mEntityId.isEmpty() && mEntityHash.isEmpty()) { // Scan from startRow to limitRow. final EntityId startRow = (mStartRow == null) ? null : eidFactory.fromHBaseRowKey(parseRowKeyFlag(mStartRow)); final EntityId limitRow = (mLimitRow == null) ? null : eidFactory.fromHBaseRowKey(parseRowKeyFlag(mLimitRow)); return scan(reader, request, startRow, limitRow, mapTypeFamilies, groupTypeColumns); } else { // Return the specified entity. final EntityId entityId = ToolUtils.createEntityIdFromUserInputs(mEntityId, mEntityHash, tableLayout.getDesc().getKeysFormat()); return lookup(reader, request, entityId, mapTypeFamilies, groupTypeColumns); } } finally { IOUtils.closeQuietly(reader); IOUtils.closeQuietly(table); } } /** * Returns the list of map-type families specified by <code>rawColumns</code>. * If <code>rawColumns</code> is null, then all map-type families are returned. * * @param rawColumns The raw columns supplied by the user. * @param layout The KijiTableLayout. * @return A list of map type families specified by the raw columns. */ private Map<FamilyLayout, List<String>> getMapTypeFamilies(String[] rawColumns, KijiTableLayout layout) { final Map<FamilyLayout, List<String>> familyMap = Maps.newHashMap(); if (null == rawColumns) { for (FamilyLayout family : layout.getFamilies()) { if (family.isMapType()) { familyMap.put(family, new ArrayList<String>()); } } } else { for (String rawColumn : rawColumns) { final KijiColumnName colName = new KijiColumnName(rawColumn); final FamilyLayout family = layout.getFamilyMap().get(colName.getFamily()); if (null == family) { throw new RuntimeException( String.format("No family '%s' in table '%s'.", colName.getFamily(), layout.getName())); } if (family.isMapType()) { addColumn(family, colName.getQualifier(), familyMap); } } } return familyMap; } /** * Returns the list of group-type columns specified by <code>rawColumns</code>. * If <code>rawColumns</code> is null, then all columns in all group-type families are returned. * If a raw column specifies a group-type family, but no qualifier, then each column in that * family is returned. * * @param rawColumns The raw columns supplied by the user. * @param layout The KijiTableLayout. * @return The fully qualified columns specified by the raw columns. */ private Map<FamilyLayout, List<ColumnLayout>> getGroupTypeColumns(String[] rawColumns, KijiTableLayout layout) { final Map<FamilyLayout, List<ColumnLayout>> familyMap = Maps.newHashMap(); if (null == rawColumns) { for (FamilyLayout family : layout.getFamilies()) { if (family.isGroupType()) { familyMap.put(family, Lists.newArrayList(family.getColumns())); } } } else { for (String rawColumn : rawColumns) { final KijiColumnName colName = new KijiColumnName(rawColumn); final FamilyLayout family = layout.getFamilyMap().get(colName.getFamily()); if (null == family) { throw new RuntimeException( String.format("No family '%s' in table '%s'.", colName.getFamily(), layout.getName())); } if (family.isGroupType()) { // We'll include it. Is it fully qualified? if (!colName.isFullyQualified()) { // User specified a group-type family, but no qualifier. Include all qualifiers. for (ColumnLayout column : family.getColumns()) { addColumn(family, column, familyMap); } } else { final ColumnLayout column = family.getColumnMap().get(colName.getQualifier()); if (null == column) { throw new RuntimeException( String.format("No column '%s' in table '%s'.", colName, layout.getName())); } addColumn(family, column, familyMap); } } } } return familyMap; } /** * Adds a column to the list of columns mapped from <code>family</code>. * * @param family The family as a key. * @param column The column to add to the list of columns for this family. * @param familyColumnMap The map between families and lists of columns. */ private static void addColumn(FamilyLayout family, ColumnLayout column, Map<FamilyLayout, List<ColumnLayout>> familyColumnMap) { if (!familyColumnMap.containsKey(family)) { familyColumnMap.put(family, new ArrayList<ColumnLayout>()); } familyColumnMap.get(family).add(column); } /** * Adds a column to the list of columns mapped from <code>family</code>. * * @param mapFamily The map family as a key. * @param qualifier The qualifier to add to the list of qualifiers for this map family. * @param familyQualifierMap The map between map families and lists of qualifiers. */ private static void addColumn(FamilyLayout mapFamily, String qualifier, Map<FamilyLayout, List<String>> familyQualifierMap) { if (!familyQualifierMap.containsKey(mapFamily)) { familyQualifierMap.put(mapFamily, new ArrayList<String>()); } if (null != qualifier) { familyQualifierMap.get(mapFamily).add(qualifier); } } /** * Returns a KijiDataRequest for the specified columns. If columns is null, * returns a request for all columns. * * @param mapTypeFamilies The list of map type families to include. * @param groupTypeColumns The family:qualifier map of group type columns to include. * @param maxVersions The max versions to include. * @param minTimestamp The min timestamp. * @param maxTimestamp The max timestamp. * @return The KijiDataRequest. */ private static KijiDataRequest getDataRequest(Map<FamilyLayout, List<String>> mapTypeFamilies, Map<FamilyLayout, List<ColumnLayout>> groupTypeColumns, int maxVersions, long minTimestamp, long maxTimestamp) { final KijiDataRequest request = new KijiDataRequest().withTimeRange(minTimestamp, maxTimestamp); for (Entry<FamilyLayout, List<String>> entry : mapTypeFamilies.entrySet()) { String familyName = entry.getKey().getName(); // If the map family is without qualifiers, add entire family. if (entry.getValue().isEmpty()) { LOG.debug("Adding family to data request: " + familyName); request.addColumn( new KijiDataRequest.Column(new KijiColumnName(familyName)).withMaxVersions(maxVersions)); // If the map family is with qualifiers, add only the columns of interest. } else { for (String qualifier : entry.getValue()) { LOG.debug("Adding column to data request: " + familyName + ":" + qualifier); request.addColumn( new KijiDataRequest.Column(familyName, qualifier).withMaxVersions(maxVersions)); } } } for (Entry<FamilyLayout, List<ColumnLayout>> entry : groupTypeColumns.entrySet()) { String familyName = entry.getKey().getName(); for (ColumnLayout column : entry.getValue()) { LOG.debug("Adding column to data request: " + column.getName()); request.addColumn( new KijiDataRequest.Column(familyName, column.getName()).withMaxVersions(maxVersions)); } } return request; } @Override protected void cleanup() throws IOException { IOUtils.closeQuietly(mHBaseAdmin); super.cleanup(); } /** * Program entry point. * * @param args The command-line arguments. * @throws Exception If there is an error. */ public static void main(String[] args) throws Exception { System.exit(ToolRunner.run(new LsTool(), args)); } }