package com.avira.couchdoop.imp;

import com.avira.couchdoop.ArgsException;
import com.avira.couchdoop.CouchbaseArgs;
import org.apache.hadoop.conf.Configuration;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

 * {@link com.avira.couchdoop.CouchbaseArgs} implementation which holds Couchbase view import feature settings.
public class ImportViewArgs extends CouchbaseArgs {

    private String designDocumentName;

    private String viewName;

    private String[] viewKeys;

    private String output;

    private int documentsPerPage;

    private int numMappers;

    public static final ArgDef ARG_DESIGNDOC_NAME = new ArgDef('d', "", true, true,
            "(required) name of the design document");
    public static final ArgDef ARG_VIEW_NAME = new ArgDef('v', "", true, true,
            "(required) name of the view");
    public static final ArgDef ARG_VIEW_KEYS = new ArgDef('k', "couchbase.view.keys", true, true,
            "(required) semicolon separated list of view keys (in JSON format) which are going to be distributed to mappers");
    public static final ArgDef ARG_OUTPUT = new ArgDef('o', "output", true, true,
            "(required) HDFS output directory");
    public static final ArgDef ARG_DOCS_PER_PAGE = new ArgDef('P', "couchbase.view.docsPerPage", true, false,
            "buffer of documents which are going to be retrieved at once at a mapper; defaults to 1024");
    public static final ArgDef ARG_NUM_MAPPERS = new ArgDef('m', "hadoop.mappers", true, false,
            "number of mappers to be used by Hadoop; by default it will be equal to the number of couchbase view keys passed to the job");

    private static final char KEYS_STRING_SEPARATOR = ';';
    private static final String[] KEY_RANGE_ESCAPE_SEQUENCE = new String[] { "\\(\\(", "\\)\\)" };

    public static final List<ArgDef> ARGS_LIST = new ArrayList<>(5);

    static {


    public ImportViewArgs(Configuration conf) throws ArgsException {

    public List<ArgDef> getArgsList() {
        return ImportViewArgs.ARGS_LIST;

    public void loadFromHadoopConfiguration(Configuration conf) throws ArgsException {

        designDocumentName = conf.get(ARG_DESIGNDOC_NAME.getPropertyName());
        viewName = conf.get(ARG_VIEW_NAME.getPropertyName());
        viewKeys = parseViewKeys(conf);
        output = conf.get(ARG_OUTPUT.getPropertyName());
        documentsPerPage = conf.getInt(ARG_DOCS_PER_PAGE.getPropertyName(), 1024);
        //numMappers default to the number of viewKeys
        numMappers = conf.getInt(ARG_NUM_MAPPERS.getPropertyName(), viewKeys.length);

    public String getDesignDocumentName() {
        return designDocumentName;

    public String getViewName() {
        return viewName;

    public String[] getViewKeys() {
        return viewKeys;

    public int getNumMappers() {
        return numMappers;

    protected static String[] parseViewKeys(Configuration hadoopConf) {
        return parseViewKeys(hadoopConf.get(ARG_VIEW_KEYS.getPropertyName()));

    protected static String[] parseViewKeys(String viewKeysString) {
        List<String> splits = splitViewKeys(viewKeysString);
        List<String> keys = new ArrayList<>(splits.size());

        //Look for KEY_RANGE_ESCAPE_SEQUENCE in each split
        Pattern multiKeyPattern = Pattern.compile(
                "(.*)" + KEY_RANGE_ESCAPE_SEQUENCE[0] + "(\\d+)-(\\d+)" + KEY_RANGE_ESCAPE_SEQUENCE[1] + "(.*)");
        for (String split : splits) {
            Matcher kMatch = multiKeyPattern.matcher(split);
            if (kMatch.matches()) {
            } else {
                //If we did not find KEY_RANGE_ESCAPE_SEQUENCE, use the split as it is

        return keys.toArray(new String[keys.size()]);

     * Correctly split a string containing Couchbase view keys. Uses {@code KEYS_STRING_SEPARATOR} to split the
     * string, but not if the separator is found between double-quotes or square braces.
     * @param viewKeysString string containing all Couchbase keys separated by {@code KEYS_STRING_SEPARATOR}
     * @return all Couchbase keys
    protected static List<String> splitViewKeys(String viewKeysString) {
        List<String> splits = new ArrayList<>();
        char[] chars = viewKeysString.toCharArray();

        boolean betweenSquareBraces = false;
        boolean betweenQuotes = false;
        int lastMatch = 0;

        for (int i = 0; i < chars.length; i++) {
            if ((chars[i] == KEYS_STRING_SEPARATOR) && !betweenQuotes && !betweenSquareBraces) {
                splits.add(viewKeysString.substring(lastMatch, i));
                lastMatch = i + 1;

            if ((chars[i] == '"') && (i == 0 || chars[i - 1] != '\\')) {
                betweenQuotes = !betweenQuotes; //toggle betweenQuotes

            if ((chars[i] == '[') && (i == 0 || chars[i - 1] != '\\')) {
                betweenSquareBraces = true;

            if ((chars[i] == ']') && (i == 0 || chars[i - 1] != '\\')) {
                betweenSquareBraces = false;

        //add last key if needed
        if (lastMatch < chars.length) {
            splits.add(viewKeysString.substring(lastMatch, chars.length));

        return splits;

     * Automatically generate keys containing a numeric range. Autodetect padding necessity if
     * {@code rangeStart} and {@code rangeEnd} have the same length
     * @param prefix     part of key that comes before the numeric range
     * @param rangeStart start of the numeric range; must be parsable as int
     * @param rangeEnd   end of the numeric range; must be parsable as int
     * @param suffix     part of the key that comes after the numeric range
     * @return all generated keys
    protected static List<String> getKeysByRange(String prefix, String rangeStart, String rangeEnd, String suffix) {
        int rangeStartInt = Integer.parseInt(rangeStart);
        int rangeEndInt = Integer.parseInt(rangeEnd);

        String numberFormat;
        if (rangeStart.length() == rangeEnd.length()) {
            numberFormat = "%0" + rangeEnd.length() + "d";
        } else {
            numberFormat = "%d";

        List<String> keys = new ArrayList<>();
        for (int i = rangeStartInt; i <= rangeEndInt; i++) {
            keys.add(prefix + String.format(numberFormat, i) + suffix);

        return keys;

    public String getOutput() {
        return output;

    public int getDocumentsPerPage() {
        return documentsPerPage;