Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:eastcircle.terasort.TotalOrderPartitioner.java

License:Apache License

/**
 * Read the cut points from the given sequence file.
 * @param fs the file system/*from   w w  w . j a v a2s.c om*/
 * @param p the path to read
 * @param job the job config
 * @return the strings to split the partitions on
 * @throws IOException
 */
private static Text[] readPartitions(FileSystem fs, Path p, Configuration conf) throws IOException {
    int reduces = conf.getInt(MRJobConfig.NUM_REDUCES, 1);
    Text[] result = new Text[reduces - 1];
    DataInputStream reader = fs.open(p);
    for (int i = 0; i < reduces - 1; ++i) {
        result[i] = new Text();
        result[i].readFields(reader);
    }
    reader.close();
    return result;
}

From source file:edu.indiana.d2i.htrc.corpus.analysis.LDAAnalysisMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String mappingTableFileName = conf.get("user.args.mapping.table.filename");
    String topicsFileName = conf.get("user.args.topics.filename");

    BufferedReader reader = null;
    String line = null;/*from ww  w  .  jav  a 2s . c  om*/

    // load mapping table
    try {
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(mappingTableFileName)));

        /*
         * each line is a mapping: <word> <index of the word in full word
         * set>
         */
        while ((line = reader.readLine()) != null) {
            String trimmedLine = line.trim();
            int idx = trimmedLine.lastIndexOf(' ');
            mappingTable.add(new MappingTableEntry(trimmedLine.substring(0, idx),
                    Integer.parseInt(trimmedLine.substring(idx + 1))));

        }

    } finally {
        if (reader != null)
            reader.close();
    }

    mappingIndices = CorpusProcessingUtils.extractIdxFromMappingTable(mappingTable);

    reader = null;
    line = null;

    // load topics
    try {
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(topicsFileName)));

        /* Each line is a topic */
        while ((line = reader.readLine()) != null) {
            topics.add(line.trim());
        }

    } finally {
        if (reader != null)
            reader.close();
    }

    // load LDA state, stateFilePath is the path in HDFS
    String stateFilePath = conf.get("user.args.lda.state.filepath");
    int stepSize = conf.getInt("user.args.topdoctable.capacity.stepsize",
            Integer.parseInt(Constants.LDA_ANALYSIS_DEFAULT_STEP_SIZE));

    if (stateFilePath == null) {
        // No previous state for initialization (first iteration)
        ldaAnalyzer = new LDAAnalyzer(mappingTable, topics, stepSize);
    } else {
        // second and following iterations
        FileSystem fs = FileSystem.get(conf);
        SequenceFile.Reader seqFileReader = null;

        try {
            seqFileReader = new SequenceFile.Reader(fs, new Path(stateFilePath), conf);

            Text key = (Text) ReflectionUtils.newInstance(seqFileReader.getKeyClass(), conf);
            LDAState ldaState = (LDAState) ReflectionUtils.newInstance(seqFileReader.getValueClass(), conf);

            // the sequence file should only have one record
            seqFileReader.next(key, ldaState);

            ldaAnalyzer = new LDAAnalyzer(ldaState.getWordsTopicsTable(), ldaState.getTopicsDocumentsTable(),
                    mappingTable, topics);
        } finally {
            IOUtils.closeStream(seqFileReader);
        }

    }

}

From source file:edu.indiana.d2i.htrc.io.index.lucene.LuceneClient.java

License:Apache License

private LuceneClient(Configuration conf) throws IOException {
    String directory = conf.get(HTRCConstants.LUCENE_INDEX_PATH);
    // String directory = conf.get("htrc.lucene.index.path");
    FileSystem fs = FileSystem.get(conf);
    Path indexPath = new Path(directory);
    Directory dir = new FileSystemDirectory(fs, indexPath, false, conf);
    indexSearcher = new IndexSearcher(dir);
    indexReader = IndexReader.open(dir);

    dictionary = new Dictionary(conf);

    // dynamic load the filter ??
    //      filter = new StopWordFilter();
    filter = new StopWordFilter("stopwords.txt"); // found in the classpath
    filter.addNextFilter(new DictionaryFilter(dictionary));
    filter.addNextFilter(new FrequencyFilter(conf.getInt(HTRCConstants.FILTER_WORD_MIN_FREQUENCE, 2)));
    filter.addNextFilter(new WordLengthFilter(conf.getInt(HTRCConstants.FILTER_WORD_MIN_LENGTH, 2)));
}

From source file:edu.indiana.d2i.htrc.io.index.solr.SolrClient.java

License:Apache License

private void initFilters(Configuration conf) throws IOException {
    dictionary = new Dictionary(conf);
    filter = new StopWordFilter("stopwords.txt"); // found in the classpath
    filter.addNextFilter(new DictionaryFilter(dictionary));
    filter.addNextFilter(new FrequencyFilter(conf.getInt(HTRCConstants.FILTER_WORD_MIN_FREQUENCE, 2)));
    filter.addNextFilter(new WordLengthFilter(conf.getInt(HTRCConstants.FILTER_WORD_MIN_LENGTH, 2)));
}

From source file:edu.indiana.d2i.htrc.io.mem.MemCachedRecordWriter.java

License:Apache License

public MemCachedRecordWriter(Configuration conf) {
    // read configuration
    MAX_EXPIRE = conf.getInt(HTRCConstants.MEMCACHED_MAX_EXPIRE, -1);
    int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, -1);
    String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS);
    List<String> hosts = Arrays.asList(hostArray);
    Class<?> writableClass = conf.getClass("mapred.output.value.class", Writable.class);

    String namespace = conf.get(MemKMeansConfig.KEY_NS);
    if (namespace != null)
        NameSpace = namespace;/* w w w  .  ja  va2  s.  co m*/

    client = ThreadedMemcachedClient.getThreadedMemcachedClient(numClients, hosts);
    transcoder = new HadoopWritableTranscoder<V>(conf, writableClass);
}

From source file:edu.indiana.d2i.htrc.io.mem.ThreadedMemcachedClient.java

License:Apache License

public static ThreadedMemcachedClient getThreadedMemcachedClient(Configuration conf) {
    int numClients = conf.getInt(HTRCConstants.MEMCACHED_CLIENT_NUM, 1);
    String[] hostArray = conf.getStrings(HTRCConstants.MEMCACHED_HOSTS);
    List<String> hosts = Arrays.asList(hostArray);
    return getThreadedMemcachedClient(numClients, hosts);
}

From source file:edu.indiana.d2i.htrc.io.SparseVectorsToMemcached.java

License:Apache License

private void sequentialTransform() throws Exception {
    Configuration conf = getConf();
    setupConfiguration(conf);//from  www. j av  a2s .  c  om

    HTRCDataAPIClient client = Utilities.creatDataAPIClient(conf);

    // set up analyzer, filter
    Analyzer analyzer = ClassUtils.instantiateAs(
            conf.get(DocumentProcessor.ANALYZER_CLASS, DefaultAnalyzer.class.getName()), Analyzer.class);
    HTRCFilter filter = new StopWordFilter("stopwords.txt"); // found in the
    // classpath
    Dictionary dictionary = new Dictionary(conf);
    filter.addNextFilter(new DictionaryFilter(dictionary));
    filter.addNextFilter(new WordLengthFilter(conf.getInt(HTRCConstants.FILTER_WORD_MIN_LENGTH, 2)));

    // memcached client
    ThreadedMemcachedClient memcachedClient = ThreadedMemcachedClient.getThreadedMemcachedClient(conf);
    MemcachedClient cache = memcachedClient.getCache();
    int maxExpir = conf.getInt(HTRCConstants.MEMCACHED_MAX_EXPIRE, -1);
    Transcoder<VectorWritable> transcoder = new HadoopWritableTranscoder<VectorWritable>(conf,
            VectorWritable.class);

    //
    Path input = new Path(idListDir);
    FileSystem fs = input.getFileSystem(conf);
    DataInputStream fsinput = new DataInputStream(fs.open(input));
    BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput));
    String line = null;
    int idNumThreshold = maxIdsPerReq;
    int idNum = 0;
    StringBuilder idList = new StringBuilder();
    VectorWritable vectorWritable = new VectorWritable();
    while ((line = reader.readLine()) != null) {
        idList.append(line + "|");
        if ((++idNum) >= idNumThreshold) {
            // <id, content>
            Iterable<Entry<String, String>> content = client.getID2Content(idList.toString());
            for (Entry<String, String> entry : content) {
                Vector result = transform2Vector(entry.getValue(), entry.getKey(), analyzer, filter,
                        dictionary);
                vectorWritable.set(result);
                cache.set(entry.getKey(), maxExpir, vectorWritable, transcoder);

                // validate
                VectorWritable vecWritable = cache.get(entry.getKey(), transcoder);
                if (vecWritable == null) {
                    throw new RuntimeException(entry.getKey() + " is not written to Memcached.");
                } else {
                    System.out.println(entry.getKey());
                }
            }

            idList = new StringBuilder();
            idNum = 0;
        }
    }
    if (idList.length() > 0) {
        Iterable<Entry<String, String>> content = client.getID2Content(idList.toString());
        for (Entry<String, String> entry : content) {
            Vector result = transform2Vector(entry.getValue(), entry.getKey(), analyzer, filter, dictionary);
            vectorWritable.set(result);
            cache.set(entry.getKey(), maxExpir, vectorWritable, transcoder);

            // validate
            VectorWritable vecWritable = cache.get(entry.getKey(), transcoder);
            if (vecWritable == null) {
                throw new RuntimeException(entry.getKey() + " is not written to Memcached.");
            } else {
                System.out.println(entry.getKey());
            }
        }
    }
}

From source file:edu.indiana.d2i.htrc.kmeans.MemKMeansUtil.java

License:Apache License

public static void loadClusterInfo(Configuration conf, Collection<Cluster> clusters) {
    int k = conf.getInt(MemKMeansConfig.CLUSTER_NUM, -1);
    if (k == -1)//from   w  ww.j  a  v a2s  .  co  m
        throw new IllegalArgumentException("Number of cluster is -1!");

    ThreadedMemcachedClient client = ThreadedMemcachedClient.getThreadedMemcachedClient(conf);
    MemcachedClient cache = client.getCache();
    Transcoder<Cluster> clusterTranscoder = new HadoopWritableTranscoder<Cluster>(conf, Cluster.class);

    for (int i = 0; i < k; i++) {
        Cluster cluster = cache.get(toClusterName(i), clusterTranscoder);
        if (cluster != null) {
            clusters.add(cluster);
        } else {
            //            logger.error("cannot find VectorWritable for " + id);
            client.close();
            throw new RuntimeException("can't find cluster " + toClusterName(i));
        }
    }
    client.close();
}

From source file:edu.indiana.d2i.htrc.kmeans.MemKMeansUtil.java

License:Apache License

public static boolean isConverged(Configuration conf) {
    int k = conf.getInt(MemKMeansConfig.CLUSTER_NUM, -1);
    if (k == -1)/*  w ww. j a  v a2 s  .  c o  m*/
        throw new IllegalArgumentException("Number of cluster is -1!");

    ThreadedMemcachedClient client = ThreadedMemcachedClient.getThreadedMemcachedClient(conf);
    MemcachedClient cache = client.getCache();
    Transcoder<Cluster> clusterTranscoder = new HadoopWritableTranscoder<Cluster>(conf, Cluster.class);

    for (int i = 0; i < k; i++) {
        Cluster cluster = cache.get(toClusterName(i), clusterTranscoder);
        if (cluster != null) {
            if (!cluster.isConverged())
                return false;
        } else {
            throw new RuntimeException("can't find cluster " + toClusterName(i));
        }
    }
    client.close();

    return true;
}

From source file:edu.indiana.d2i.htrc.kmeans.MemKMeansUtil.java

License:Apache License

public static void writeClusters2HDFS(Configuration conf, Path des) throws IOException {
    int k = conf.getInt(MemKMeansConfig.CLUSTER_NUM, -1);
    if (k == -1)/*from  ww  w .  j  ava2 s  . co  m*/
        throw new IllegalArgumentException("Number of cluster is -1!");

    ThreadedMemcachedClient client = ThreadedMemcachedClient.getThreadedMemcachedClient(conf);
    MemcachedClient cache = client.getCache();
    Transcoder<Cluster> clusterTranscoder = new HadoopWritableTranscoder<Cluster>(conf, Cluster.class);

    SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, des, Text.class,
            Cluster.class);
    Text key = new Text();
    for (int i = 0; i < k; i++) {
        Cluster cluster = cache.get(toClusterName(i), clusterTranscoder);
        key.set(cluster.getIdentifier());
        writer.append(key, cluster);
    }
    writer.close();
    client.close();
}