Java tutorial
/******************************************************************************* * Copyright 2012 Apigee Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package org.usergrid.tools; import static me.prettyprint.hector.api.factory.HFactory.createMutator; import static org.usergrid.persistence.Schema.DICTIONARY_COLLECTIONS; import static org.usergrid.persistence.Schema.getDefaultSchema; import static org.usergrid.persistence.cassandra.ApplicationCF.ENTITY_INDEX; import static org.usergrid.persistence.cassandra.ApplicationCF.ENTITY_INDEX_ENTRIES; import static org.usergrid.persistence.cassandra.CassandraPersistenceUtils.addDeleteToMutator; import static org.usergrid.persistence.cassandra.CassandraPersistenceUtils.key; import static org.usergrid.persistence.cassandra.CassandraService.INDEX_ENTRY_LIST_COUNT; import static org.usergrid.utils.CompositeUtils.setEqualityFlag; import static org.usergrid.utils.UUIDUtils.getTimestampInMicros; import static org.usergrid.utils.UUIDUtils.newTimeUUID; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.UUID; import me.prettyprint.cassandra.serializers.ByteBufferSerializer; import me.prettyprint.hector.api.Keyspace; import me.prettyprint.hector.api.beans.AbstractComposite.ComponentEquality; import me.prettyprint.hector.api.beans.DynamicComposite; import me.prettyprint.hector.api.beans.HColumn; import me.prettyprint.hector.api.mutation.Mutator; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.usergrid.persistence.Entity; import org.usergrid.persistence.IndexBucketLocator; import org.usergrid.persistence.IndexBucketLocator.IndexType; import org.usergrid.persistence.cassandra.CassandraService; import org.usergrid.persistence.cassandra.EntityManagerImpl; import org.usergrid.persistence.cassandra.index.IndexScanner; import org.usergrid.persistence.entities.Application; import org.usergrid.persistence.query.ir.result.SliceIterator; import org.usergrid.persistence.query.ir.result.UUIDIndexSliceParser; import org.usergrid.persistence.schema.CollectionInfo; /** * This is a utility to audit all available entity ids in the secondary index. * It then checks to see if any index value is not present in the * Entity_Index_Entries. If it is not, the value from the index is removed, and * a forced re-index is triggered * * USERGRID-323 * * @author tnine * */ public class EntityIndexCleanup extends ToolBase { /** * */ private static final int PAGE_SIZE = 100; public static final ByteBufferSerializer be = new ByteBufferSerializer(); private static final Logger logger = LoggerFactory.getLogger(EntityIndexCleanup.class); @Override @SuppressWarnings("static-access") public Options createOptions() { Option hostOption = OptionBuilder.withArgName("host").hasArg().isRequired(true) .withDescription("Cassandra host").create("host"); Options options = new Options(); options.addOption(hostOption); return options; } /* * (non-Javadoc) * * @see * org.usergrid.tools.ToolBase#runTool(org.apache.commons.cli.CommandLine) */ @Override public void runTool(CommandLine line) throws Exception { startSpring(); logger.info("Starting entity cleanup"); // List<UUID> ids = null; // Query query = new Query(); // query.setLimit(PAGE_SIZE); // String lastCursor = null; for (Entry<String, UUID> app : emf.getApplications().entrySet()) { logger.info("Starting cleanup for app {}", app.getKey()); UUID applicationId = app.getValue(); EntityManagerImpl em = (EntityManagerImpl) emf.getEntityManager(applicationId); //sanity check for corrupt apps Application appEntity = em.getApplication(); if (appEntity == null) { logger.warn("Application does not exist in data. {}", app.getKey()); continue; } CassandraService cass = em.getCass(); IndexBucketLocator indexBucketLocator = em.getIndexBucketLocator(); Keyspace ko = cass.getApplicationKeyspace(applicationId); UUID timestampUuid = newTimeUUID(); long timestamp = getTimestampInMicros(timestampUuid); Set<String> collectionNames = em.getApplicationCollections(); // go through each collection and audit the values for (String collectionName : collectionNames) { IndexScanner scanner = cass.getIdList(cass.getApplicationKeyspace(applicationId), key(applicationId, DICTIONARY_COLLECTIONS, collectionName), null, null, PAGE_SIZE, false, indexBucketLocator, applicationId, collectionName); SliceIterator<UUID> itr = new SliceIterator<UUID>(scanner, null, new UUIDIndexSliceParser()); while (itr.hasNext()) { Set<UUID> ids = itr.next(); CollectionInfo collection = getDefaultSchema().getCollection("application", collectionName); //We shouldn't have to do this, but otherwise the cursor won't work Set<String> indexed = collection.getPropertiesIndexed(); // what's left needs deleted, do so logger.info("Auditing {} entities for collection {} in app {}", new Object[] { ids.size(), collectionName, app.getValue() }); for (UUID id : ids) { boolean reIndex = false; Mutator<ByteBuffer> m = createMutator(ko, be); for (String prop : indexed) { String bucket = indexBucketLocator.getBucket(applicationId, IndexType.COLLECTION, id, prop); Object rowKey = key(applicationId, collection.getName(), prop, bucket); List<HColumn<ByteBuffer, ByteBuffer>> indexCols = scanIndexForAllTypes(ko, indexBucketLocator, applicationId, rowKey, id, prop); // loop through the indexed values and verify them as present in // our entity_index_entries. If they aren't, we need to delete the // from the secondary index, and mark // this object for re-index via n update for (HColumn<ByteBuffer, ByteBuffer> index : indexCols) { DynamicComposite secondaryIndexValue = DynamicComposite .fromByteBuffer(index.getName().duplicate()); Object code = secondaryIndexValue.get(0); Object propValue = secondaryIndexValue.get(1); UUID timestampId = (UUID) secondaryIndexValue.get(3); DynamicComposite existingEntryStart = new DynamicComposite(prop, code, propValue, timestampId); DynamicComposite existingEntryFinish = new DynamicComposite(prop, code, propValue, timestampId); setEqualityFlag(existingEntryFinish, ComponentEquality.GREATER_THAN_EQUAL); // now search our EntityIndexEntry for previous values, see if // they don't match this one List<HColumn<ByteBuffer, ByteBuffer>> entries = cass.getColumns(ko, ENTITY_INDEX_ENTRIES, id, existingEntryStart, existingEntryFinish, INDEX_ENTRY_LIST_COUNT, false); // we wouldn't find this column in our entity_index_entries // audit. Delete it, then mark this entity for update if (entries.size() == 0) { logger.info( "Could not find reference to value '{}' for property '{}' on entity {} in collection {}. Forcing reindex", new Object[] { propValue, prop, id, collectionName }); addDeleteToMutator(m, ENTITY_INDEX, rowKey, index.getName().duplicate(), timestamp); reIndex = true; } if (entries.size() > 1) { reIndex = true; } } } //force this entity to be updated if (reIndex) { Entity entity = em.get(id); //entity may not exist, but we should have deleted rows from the index if (entity == null) { logger.warn("Entity with id {} did not exist in app {}", id, applicationId); //now execute the cleanup. In this case the entity is gone, so we'll want to remove references from the secondary index m.execute(); continue; } em.update(entity); //now execute the cleanup. This way if the above update fails, we still have enough data to run again later m.execute(); } } } } } logger.info("Completed audit of apps"); } private List<HColumn<ByteBuffer, ByteBuffer>> scanIndexForAllTypes(Keyspace ko, IndexBucketLocator indexBucketLocator, UUID applicationId, Object rowKey, UUID entityId, String prop) throws Exception { //TODO Determine the index bucket. Scan the entire index for properties with this entityId. DynamicComposite start = null; List<HColumn<ByteBuffer, ByteBuffer>> cols; List<HColumn<ByteBuffer, ByteBuffer>> results = new ArrayList<HColumn<ByteBuffer, ByteBuffer>>(); do { cols = cass.getColumns(ko, ENTITY_INDEX, rowKey, start, null, 100, false); for (HColumn<ByteBuffer, ByteBuffer> col : cols) { DynamicComposite secondaryIndexValue = DynamicComposite.fromByteBuffer(col.getName().duplicate()); UUID storedId = (UUID) secondaryIndexValue.get(2); //add it to the set. We can't short circuit due to property ordering if (entityId.equals(storedId)) { results.add(col); } start = secondaryIndexValue; } } while (cols.size() == 100); return results; } }