org.apache.accumulo.examples.dirlist.QueryUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.accumulo.examples.dirlist.QueryUtil.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.examples.dirlist;

import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
import org.apache.hadoop.io.Text;

import com.beust.jcommander.Parameter;

/**
 * Provides utility methods for getting the info for a file, listing the contents of a directory, and performing single wild card searches on file or directory
 * names.
 */
public class QueryUtil {
    private Connector conn = null;
    private String tableName;
    private Authorizations auths;
    public static final Text DIR_COLF = new Text("dir");
    public static final Text FORWARD_PREFIX = new Text("f");
    public static final Text REVERSE_PREFIX = new Text("r");
    public static final Text INDEX_COLF = new Text("i");
    public static final Text COUNTS_COLQ = new Text("counts");

    public QueryUtil(Opts opts) throws AccumuloException, AccumuloSecurityException {
        conn = opts.getConnector();
        this.tableName = opts.getTableName();
        this.auths = opts.auths;
    }

    /**
     * Calculates the depth of a path, i.e. the number of forward slashes in the path name.
     *
     * @param path
     *          the full path of a file or directory
     * @return the depth of the path
     */
    public static int getDepth(String path) {
        int numSlashes = 0;
        int index = -1;
        while ((index = path.indexOf("/", index + 1)) >= 0)
            numSlashes++;
        return numSlashes;
    }

    /**
     * Given a path, construct an accumulo row prepended with the path's depth for the directory table.
     *
     * @param path
     *          the full path of a file or directory
     * @return the accumulo row associated with this path
     */
    public static Text getRow(String path) {
        Text row = new Text(String.format("%03d", getDepth(path)));
        row.append(path.getBytes(), 0, path.length());
        return row;
    }

    /**
     * Given a path, construct an accumulo row prepended with the {@link #FORWARD_PREFIX} for the index table.
     *
     * @param path
     *          the full path of a file or directory
     * @return the accumulo row associated with this path
     */
    public static Text getForwardIndex(String path) {
        String part = path.substring(path.lastIndexOf("/") + 1);
        if (part.length() == 0)
            return null;
        Text row = new Text(FORWARD_PREFIX);
        row.append(part.getBytes(), 0, part.length());
        return row;
    }

    /**
     * Given a path, construct an accumulo row prepended with the {@link #REVERSE_PREFIX} with the path reversed for the index table.
     *
     * @param path
     *          the full path of a file or directory
     * @return the accumulo row associated with this path
     */
    public static Text getReverseIndex(String path) {
        String part = path.substring(path.lastIndexOf("/") + 1);
        if (part.length() == 0)
            return null;
        byte[] rev = new byte[part.length()];
        int i = part.length() - 1;
        for (byte b : part.getBytes())
            rev[i--] = b;
        Text row = new Text(REVERSE_PREFIX);
        row.append(rev, 0, rev.length);
        return row;
    }

    /**
     * Returns either the {@link #DIR_COLF} or a decoded string version of the colf.
     *
     * @param colf
     *          the column family
     */
    public static String getType(Text colf) {
        if (colf.equals(DIR_COLF))
            return colf.toString() + ":";
        return Long.toString(Ingest.encoder.decode(colf.getBytes())) + ":";
    }

    /**
     * Scans over the directory table and pulls out stat information about a path.
     *
     * @param path
     *          the full path of a file or directory
     */
    public Map<String, String> getData(String path) throws TableNotFoundException {
        if (path.endsWith("/"))
            path = path.substring(0, path.length() - 1);
        Scanner scanner = conn.createScanner(tableName, auths);
        scanner.setRange(new Range(getRow(path)));
        Map<String, String> data = new TreeMap<>();
        for (Entry<Key, Value> e : scanner) {
            String type = getType(e.getKey().getColumnFamily());
            data.put("fullname", e.getKey().getRow().toString().substring(3));
            data.put(type + e.getKey().getColumnQualifier().toString() + ":"
                    + e.getKey().getColumnVisibility().toString(), new String(e.getValue().get()));
        }
        return data;
    }

    /**
     * Uses the directory table to list the contents of a directory.
     *
     * @param path
     *          the full path of a directory
     */
    public Map<String, Map<String, String>> getDirList(String path) throws TableNotFoundException {
        if (!path.endsWith("/"))
            path = path + "/";
        Map<String, Map<String, String>> fim = new TreeMap<>();
        Scanner scanner = conn.createScanner(tableName, auths);
        scanner.setRange(Range.prefix(getRow(path)));
        for (Entry<Key, Value> e : scanner) {
            String name = e.getKey().getRow().toString();
            name = name.substring(name.lastIndexOf("/") + 1);
            String type = getType(e.getKey().getColumnFamily());
            if (!fim.containsKey(name)) {
                fim.put(name, new TreeMap<String, String>());
                fim.get(name).put("fullname", e.getKey().getRow().toString().substring(3));
            }
            fim.get(name).put(type + e.getKey().getColumnQualifier().toString() + ":"
                    + e.getKey().getColumnVisibility().toString(), new String(e.getValue().get()));
        }
        return fim;
    }

    /**
     * Scans over the index table for files or directories with a given name.
     *
     * @param term
     *          the name a file or directory to search for
     */
    public Iterable<Entry<Key, Value>> exactTermSearch(String term) throws Exception {
        System.out.println("executing exactTermSearch for " + term);
        Scanner scanner = conn.createScanner(tableName, auths);
        scanner.setRange(new Range(getForwardIndex(term)));
        return scanner;
    }

    /**
     * Scans over the index table for files or directories with a given name, prefix, or suffix (indicated by a wildcard '*' at the beginning or end of the term.
     *
     * @param exp
     *          the name a file or directory to search for with an optional wildcard '*' at the beginning or end
     */
    public Iterable<Entry<Key, Value>> singleRestrictedWildCardSearch(String exp) throws Exception {
        if (exp.indexOf("/") >= 0)
            throw new Exception("this method only works with unqualified names");

        Scanner scanner = conn.createScanner(tableName, auths);
        if (exp.startsWith("*")) {
            System.out.println("executing beginning wildcard search for " + exp);
            exp = exp.substring(1);
            scanner.setRange(Range.prefix(getReverseIndex(exp)));
        } else if (exp.endsWith("*")) {
            System.out.println("executing ending wildcard search for " + exp);
            exp = exp.substring(0, exp.length() - 1);
            scanner.setRange(Range.prefix(getForwardIndex(exp)));
        } else if (exp.indexOf("*") >= 0) {
            throw new Exception("this method only works for beginning or ending wild cards");
        } else {
            return exactTermSearch(exp);
        }
        return scanner;
    }

    /**
     * Scans over the index table for files or directories with a given name that can contain a single wildcard '*' anywhere in the term.
     *
     * @param exp
     *          the name a file or directory to search for with one optional wildcard '*'
     */
    public Iterable<Entry<Key, Value>> singleWildCardSearch(String exp) throws Exception {
        int starIndex = exp.indexOf("*");
        if (exp.indexOf("*", starIndex + 1) >= 0)
            throw new Exception("only one wild card for search");

        if (starIndex < 0) {
            return exactTermSearch(exp);
        } else if (starIndex == 0 || starIndex == exp.length() - 1) {
            return singleRestrictedWildCardSearch(exp);
        }

        String firstPart = exp.substring(0, starIndex);
        String lastPart = exp.substring(starIndex + 1);
        String regexString = ".*/" + exp.replace("*", "[^/]*");

        Scanner scanner = conn.createScanner(tableName, auths);
        if (firstPart.length() >= lastPart.length()) {
            System.out.println("executing middle wildcard search for " + regexString
                    + " from entries starting with " + firstPart);
            scanner.setRange(Range.prefix(getForwardIndex(firstPart)));
        } else {
            System.out.println("executing middle wildcard search for " + regexString + " from entries ending with "
                    + lastPart);
            scanner.setRange(Range.prefix(getReverseIndex(lastPart)));
        }
        IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class);
        RegExFilter.setRegexs(regex, null, null, regexString, null, false);
        scanner.addScanIterator(regex);
        return scanner;
    }

    public static class Opts extends ClientOnRequiredTable {
        @Parameter(names = "--path", description = "the directory to list")
        String path = "/";
        @Parameter(names = "--search", description = "find a file or directory with the given name")
        boolean search = false;
    }

    /**
     * Lists the contents of a directory using the directory table, or searches for file or directory names (if the -search flag is included).
     */
    public static void main(String[] args) throws Exception {
        Opts opts = new Opts();
        opts.parseArgs(QueryUtil.class.getName(), args);
        QueryUtil q = new QueryUtil(opts);
        if (opts.search) {
            for (Entry<Key, Value> e : q.singleWildCardSearch(opts.path)) {
                System.out.println(e.getKey().getColumnQualifier());
            }
        } else {
            for (Entry<String, Map<String, String>> e : q.getDirList(opts.path).entrySet()) {
                System.out.println(e);
            }
        }
    }
}