joshelser.as2015.query.Query.java Source code

Java tutorial

Introduction

Here is the source code for joshelser.as2015.query.Query.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package joshelser.as2015.query;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;

import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.ClientConfiguration;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.ZooKeeperInstance;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.WholeRowIterator;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;

/**
 * 
 */
public class Query {
    private static final Logger log = LoggerFactory.getLogger(Query.class);

    private static class Opts {
        @Parameter(names = { "-i", "--instance" }, description = "Accumulo instance name", required = true)
        private String instanceName;

        @Parameter(names = { "-z" }, description = "Zookeepers", required = true)
        private String zookeepers;

        @Parameter(names = { "-u" }, description = "Accumulo user", required = true)
        private String user;

        @Parameter(names = { "-p" }, description = "Accumulo password", required = false)
        private String password;

        @Parameter(names = { "--clientConf" }, description = "Accumulo client configuration file", required = false)
        private File clientConfFile;

        @Parameter(names = { "-t", "--table" }, description = "Accumulo table to write to", required = true)
        private String table;
    }

    public static void main(String[] args) throws Exception {
        JCommander commander = new JCommander();
        final Opts options = new Opts();
        commander.addObject(options);

        commander.setProgramName("Query");
        try {
            commander.parse(args);
        } catch (ParameterException ex) {
            commander.usage();
            System.err.println(ex.getMessage());
            System.exit(1);
        }

        ClientConfiguration conf = ClientConfiguration.loadDefault();
        if (null != options.clientConfFile) {
            conf = new ClientConfiguration(new PropertiesConfiguration(options.clientConfFile));
        }
        conf.withInstance(options.instanceName).withZkHosts(options.zookeepers);

        ZooKeeperInstance inst = new ZooKeeperInstance(conf);
        Connector conn = inst.getConnector(options.user, new PasswordToken(options.password));

        BatchScanner bs = conn.createBatchScanner(options.table, Authorizations.EMPTY, 16);
        try {
            bs.setRanges(Collections.singleton(new Range()));
            final Text categoryText = new Text("category");
            bs.fetchColumn(categoryText, new Text("name"));
            bs.fetchColumn(new Text("review"), new Text("score"));
            bs.fetchColumn(new Text("review"), new Text("userId"));

            bs.addScanIterator(new IteratorSetting(50, "wri", WholeRowIterator.class));
            final Text colf = new Text();
            Map<String, List<Integer>> scoresByUser = new HashMap<>();
            for (Entry<Key, Value> entry : bs) {
                SortedMap<Key, Value> row = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue());
                Iterator<Entry<Key, Value>> iter = row.entrySet().iterator();
                if (!iter.hasNext()) {
                    // row was empty
                    continue;
                }
                Entry<Key, Value> categoryEntry = iter.next();
                categoryEntry.getKey().getColumnFamily(colf);
                if (!colf.equals(categoryText)) {
                    throw new IllegalArgumentException("Unknown!");
                }
                if (!categoryEntry.getValue().toString().equals("books")) {
                    // not a book review
                    continue;
                }

                if (!iter.hasNext()) {
                    continue;
                }
                Entry<Key, Value> reviewScore = iter.next();
                if (!iter.hasNext()) {
                    continue;
                }
                Entry<Key, Value> reviewUserId = iter.next();

                String userId = reviewUserId.getValue().toString();
                if (userId.equals("unknown")) {
                    // filter unknow user id
                    continue;
                }

                List<Integer> scores = scoresByUser.get(userId);
                if (null == scores) {
                    scores = new ArrayList<>();
                    scoresByUser.put(userId, scores);
                }
                scores.add(Float.valueOf(reviewScore.getValue().toString()).intValue());
            }

            for (Entry<String, List<Integer>> entry : scoresByUser.entrySet()) {
                int sum = 0;
                for (Integer val : entry.getValue()) {
                    sum += val;
                }

                System.out.println(entry.getKey() + " => " + new Float(sum) / entry.getValue().size());
            }
        } finally {
            bs.close();
        }
    }

}