Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.clustering.minhash; import org.apache.commons.cli2.builder.ArgumentBuilder; import org.apache.commons.cli2.builder.DefaultOptionBuilder; public final class MinhashOptionCreator { public static final String NUM_HASH_FUNCTIONS = "numHashFunctions"; public static final String KEY_GROUPS = "keyGroups"; public static final String HASH_TYPE = "hashType"; public static final String MIN_CLUSTER_SIZE = "minClusterSize"; public static final String MIN_VECTOR_SIZE = "minVectorSize"; public static final String NUM_REDUCERS = "numReducers"; public static final String DEBUG_OUTPUT = "debugOutput"; private MinhashOptionCreator() { } public static DefaultOptionBuilder debugOutputOption() { return new DefaultOptionBuilder().withLongName(DEBUG_OUTPUT).withShortName("debug") .withDescription("Output the whole vectors for debugging"); } public static DefaultOptionBuilder numReducersOption() { return new DefaultOptionBuilder().withLongName(NUM_REDUCERS).withRequired(false).withShortName("r") .withArgument(new ArgumentBuilder().withName(NUM_REDUCERS).withDefault("2").withMinimum(1) .withMaximum(1).create()) .withDescription("The number of reduce tasks. Defaults to 2"); } /** * Returns a default command line option for specifying the minimum cluster * size in MinHash clustering */ public static DefaultOptionBuilder minClusterSizeOption() { return new DefaultOptionBuilder().withLongName(MIN_CLUSTER_SIZE).withRequired(false) .withArgument(new ArgumentBuilder().withName(MIN_CLUSTER_SIZE).withDefault("10").withMinimum(1) .withMaximum(1).create()) .withDescription("Minimum points inside a cluster").withShortName("mcs"); } /** * Returns a default command line option for specifying the type of hash to * use in MinHash clustering: Should one out of * ("linear","polynomial","murmur") */ public static DefaultOptionBuilder hashTypeOption() { return new DefaultOptionBuilder().withLongName(HASH_TYPE).withRequired(false) .withArgument(new ArgumentBuilder().withName(HASH_TYPE).withDefault("murmur").withMinimum(1) .withMaximum(1).create()) .withDescription("Type of hash function to use. Available types: (linear, polynomial, murmur) ") .withShortName("ht"); } /** * Returns a default command line option for specifying the min size of the * vector to hash Should one out of ("linear","polynomial","murmur") */ public static DefaultOptionBuilder minVectorSizeOption() { return new DefaultOptionBuilder().withLongName(MIN_VECTOR_SIZE).withRequired(false) .withArgument(new ArgumentBuilder().withName(MIN_VECTOR_SIZE).withDefault("5").withMinimum(1) .withMaximum(1).create()) .withDescription("Minimum size of vector to be hashed").withShortName("mvs"); } /** * Returns a default command line option for specifying the number of hash * functions to be used in MinHash clustering */ public static DefaultOptionBuilder numHashFunctionsOption() { return new DefaultOptionBuilder().withLongName(NUM_HASH_FUNCTIONS).withRequired(false) .withArgument(new ArgumentBuilder().withName(NUM_HASH_FUNCTIONS).withDefault("10").withMinimum(1) .withMaximum(1).create()) .withDescription("Number of hash functions to be used").withShortName("nh"); } /** * Returns a default command line option for specifying the number of key * groups to be used in MinHash clustering */ public static DefaultOptionBuilder keyGroupsOption() { return new DefaultOptionBuilder().withLongName(KEY_GROUPS).withRequired(false) .withArgument(new ArgumentBuilder().withName(KEY_GROUPS).withDefault("2").withMinimum(1) .withMaximum(1).create()) .withDescription("Number of key groups to be used").withShortName("kg"); } }