org.apache.usergrid.tools.UniqueValueScanner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.usergrid.tools.UniqueValueScanner.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.usergrid.tools;

import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;

import com.datastax.driver.core.ConsistencyLevel;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.model.Column;
import com.netflix.astyanax.util.RangeBuilder;
import org.apache.usergrid.persistence.Entity;
import org.apache.usergrid.persistence.EntityManager;
import org.apache.usergrid.persistence.collection.serialization.MvccEntitySerializationStrategy;
import org.apache.usergrid.persistence.collection.serialization.UniqueValueSerializationStrategy;
import org.apache.usergrid.persistence.collection.serialization.UniqueValueSet;
import org.apache.usergrid.persistence.collection.serialization.impl.*;
import org.apache.usergrid.persistence.core.scope.ApplicationScopeImpl;
import org.apache.usergrid.persistence.model.entity.SimpleId;
import org.apache.usergrid.persistence.model.field.StringField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;

import org.apache.usergrid.persistence.core.astyanax.MultiTenantColumnFamily;
import org.apache.usergrid.persistence.core.astyanax.ScopedRowKey;
import org.apache.usergrid.persistence.core.astyanax.ScopedRowKeySerializer;

public class UniqueValueScanner extends ToolBase {

    private static final Logger logger = LoggerFactory.getLogger(UniqueValueScanner.class);

    private static final String APPLICATION_ARG = "app";

    private static final String ENTITY_TYPE_ARG = "entityType";

    private static final String ENTITY_NAME_ARG = "entityName";

    private static final String ENTITY_FIELD_TYPE_ARG = "fieldType";

    //copied shamelessly from unique value serialization strat.
    private static final ScopedRowKeySerializer<TypeField> ROW_KEY_SER = new ScopedRowKeySerializer<>(
            UniqueTypeFieldRowKeySerializer.get());

    private final EntityVersionSerializer ENTITY_VERSION_SER = new EntityVersionSerializer();

    private final MultiTenantColumnFamily<ScopedRowKey<TypeField>, EntityVersion> CF_UNIQUE_VALUES = new MultiTenantColumnFamily<>(
            "Unique_Values_V2", ROW_KEY_SER, ENTITY_VERSION_SER);

    private com.netflix.astyanax.Keyspace keyspace;

    private MvccEntitySerializationStrategy mvccEntitySerializationStrategy;

    private UniqueValueSerializationStrategy uniqueValueSerializationStrategy;

    private EntityManager em;

    @Override
    @SuppressWarnings("static-access")
    public Options createOptions() {

        Options options = super.createOptions();

        Option appOption = OptionBuilder.withArgName(APPLICATION_ARG).hasArg().isRequired(true)
                .withDescription("application id").create(APPLICATION_ARG);

        options.addOption(appOption);

        Option collectionOption = OptionBuilder.withArgName(ENTITY_TYPE_ARG).hasArg().isRequired(true)
                .withDescription("collection name").create(ENTITY_TYPE_ARG);

        options.addOption(collectionOption);

        Option specificEntityNameOption = OptionBuilder.withArgName(ENTITY_NAME_ARG).hasArg().isRequired(false)
                .withDescription("specific entity name").create(ENTITY_NAME_ARG);

        options.addOption(specificEntityNameOption);

        Option fieldTypeOption = OptionBuilder.withArgName(ENTITY_FIELD_TYPE_ARG).hasArg().isRequired(false)
                .withDescription("field type").create(ENTITY_FIELD_TYPE_ARG);

        options.addOption(fieldTypeOption);

        return options;
    }

    /*
     * (non-Javadoc)
     *
     * @see
     * org.apache.usergrid.tools.ToolBase#runTool(org.apache.commons.cli.CommandLine)
     */
    @Override
    public void runTool(CommandLine line) throws Exception {

        startSpring();

        UUID appToFilter = null;
        if (!line.getOptionValue(APPLICATION_ARG).isEmpty()) {
            appToFilter = UUID.fromString(line.getOptionValue(APPLICATION_ARG));
        }

        logger.info("Staring Tool: UniqueValueScanner");
        logger.info("Using Cassandra consistency level: {}",
                System.getProperty("usergrid.read.cl", "CL_LOCAL_QUORUM"));

        keyspace = injector.getInstance(com.netflix.astyanax.Keyspace.class);
        mvccEntitySerializationStrategy = injector.getInstance(MvccEntitySerializationStrategy.class);
        uniqueValueSerializationStrategy = injector.getInstance(UniqueValueSerializationStrategy.class);

        String fieldType = line.getOptionValue(ENTITY_FIELD_TYPE_ARG) != null
                ? line.getOptionValue(ENTITY_FIELD_TYPE_ARG)
                : "name";
        String entityType = line.getOptionValue(ENTITY_TYPE_ARG);
        String entityName = line.getOptionValue(ENTITY_NAME_ARG);

        AtomicInteger count = new AtomicInteger(0);

        if (entityName != null && !entityName.isEmpty()) {

            if (appToFilter == null) {
                throw new RuntimeException("Cannot execute UniqueValueScanner with specific entity without the "
                        + "application UUID for which the entity should exist.");
            }

            if (entityType == null) {
                throw new RuntimeException("Cannot execute UniqueValueScanner without the entity type (singular "
                        + "collection name).");
            }

            logger.info("Running entity unique load only");

            //do stuff w/o read repair
            UniqueValueSet uniqueValueSet = uniqueValueSerializationStrategy.load(
                    new ApplicationScopeImpl(new SimpleId(appToFilter, "application")),
                    ConsistencyLevel.valueOf(System.getProperty("usergrid.read.cl", "LOCAL_QUORUM")), entityType,
                    Collections.singletonList(new StringField(fieldType, entityName)), false);

            StringBuilder stringBuilder = new StringBuilder();

            stringBuilder.append("[");

            uniqueValueSet.forEach(uniqueValue -> {

                String entry = "fieldName=" + uniqueValue.getField().getName() + ", fieldValue="
                        + uniqueValue.getField().getValue() + ", uuid=" + uniqueValue.getEntityId().getUuid()
                        + ", type=" + uniqueValue.getEntityId().getType() + ", version="
                        + uniqueValue.getEntityVersion();
                stringBuilder.append("{").append(entry).append("},");
            });

            stringBuilder.deleteCharAt(stringBuilder.length() - 1);
            stringBuilder.append("]");

            logger.info("Returned unique value set from serialization load = {}", stringBuilder.toString());

        } else {

            logger.info("Running entity unique scanner only");

            // scan through all unique values and log some info

            Iterator<com.netflix.astyanax.model.Row<ScopedRowKey<TypeField>, EntityVersion>> rows = null;
            try {

                rows = keyspace.prepareQuery(CF_UNIQUE_VALUES)
                        .setConsistencyLevel(com.netflix.astyanax.model.ConsistencyLevel
                                .valueOf(System.getProperty("usergrid.read.cl", "CL_LOCAL_QUORUM")))
                        .getAllRows().withColumnRange(new RangeBuilder().setLimit(1000).build()).execute()
                        .getResult().iterator();

            } catch (ConnectionException e) {

                logger.error("Error connecting to cassandra", e);
            }

            UUID finalAppToFilter = appToFilter;

            if (rows != null) {
                rows.forEachRemaining(row -> {

                    count.incrementAndGet();

                    if (count.get() % 1000 == 0) {
                        logger.info("Scanned {} rows in {}", count.get(), CF_UNIQUE_VALUES.getName());
                    }

                    final String fieldName = row.getKey().getKey().getField().getName();
                    final String fieldValue = row.getKey().getKey().getField().getValue().toString();
                    final String scopeType = row.getKey().getScope().getType();
                    final UUID scopeUUID = row.getKey().getScope().getUuid();

                    if (!fieldName.equalsIgnoreCase(fieldType)
                            || (finalAppToFilter != null && !finalAppToFilter.equals(scopeUUID))) {
                        // do nothing

                    } else {

                        // if we have more than 1 column, let's check for a duplicate
                        if (row.getColumns() != null && row.getColumns().size() > 1) {

                            final List<EntityVersion> values = new ArrayList<>(row.getColumns().size());

                            Iterator<Column<EntityVersion>> columns = row.getColumns().iterator();
                            columns.forEachRemaining(column -> {

                                final EntityVersion entityVersion = column.getName();

                                logger.trace(scopeType + ": " + scopeUUID + ", " + fieldName + ": " + fieldValue
                                        + ", " + "entity type: " + entityVersion.getEntityId().getType() + ", "
                                        + "entity uuid: " + entityVersion.getEntityId().getUuid());

                                if (entityType != null
                                        && entityVersion.getEntityId().getType().equalsIgnoreCase(entityType)) {

                                    // add the first value into the list
                                    if (values.size() == 0) {

                                        values.add(entityVersion);

                                    } else {

                                        if (!values.get(0).getEntityId().getUuid()
                                                .equals(entityVersion.getEntityId().getUuid())) {

                                            values.add(entityVersion);

                                            logger.error(
                                                    "Duplicate found for field [{}={}].  Entry 1: [{}], Entry 2: [{}]",
                                                    fieldName, fieldValue, values.get(0).getEntityId(),
                                                    entityVersion.getEntityId());

                                        }

                                    }

                                }

                            });
                        }
                    }

                });
            } else {

                logger.warn("No rows returned from table: {}", CF_UNIQUE_VALUES.getName());

            }

        }
    }
}