org.apache.usergrid.tools.WarehouseExport.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.usergrid.tools.WarehouseExport.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.usergrid.tools;

import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.TimeUnit;

import org.jclouds.ContextBuilder;
import org.jclouds.blobstore.AsyncBlobStore;
import org.jclouds.blobstore.BlobStoreContext;
import org.jclouds.blobstore.domain.Blob;
import org.jclouds.blobstore.domain.BlobBuilder;
import org.jclouds.blobstore.options.PutOptions;
import org.jclouds.http.config.JavaUrlHttpCommandExecutorServiceModule;
import org.jclouds.logging.log4j.config.Log4JLoggingModule;
import org.jclouds.netty.config.NettyPayloadModule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.usergrid.management.OrganizationInfo;
import org.apache.usergrid.persistence.Entity;
import org.apache.usergrid.persistence.EntityManager;
import org.apache.usergrid.persistence.Query;
import org.apache.usergrid.persistence.Results;
import org.apache.usergrid.persistence.Results.Level;
import org.apache.usergrid.persistence.Schema;
import org.apache.usergrid.persistence.entities.Application;
import org.apache.usergrid.persistence.schema.CollectionInfo;
import org.apache.usergrid.utils.InflectionUtils;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;

import com.google.common.collect.BiMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.inject.Module;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;

import static org.apache.usergrid.persistence.Schema.getDefaultSchema;

/**
 * Exports all known (non-Dynamic) fields from Schema that are primitive, Date, or String into a pipe-delimited file.
 * Also includes (hard-coded for now) fields from Notification, Notifier, and Receipt.  With no -startTime, scans the
 * existing *.csv files in the output directory and starts from last end date found. With no -endTime, ends at current
 * time - 1 hour. Explicitly sets "cassandra.readcl=ONE" for efficiency.
 */
public class WarehouseExport extends ExportingToolBase {

    private static final Logger LOG = LoggerFactory.getLogger(WarehouseExport.class);
    private static final char SEPARATOR = '|';

    public static final String BUCKET_PROPNAME = "usergrid.warehouse-export-bucket";
    public static final String ACCESS_ID_PROPNAME = "usergrid.warehouse-export-access-id";
    public static final String SECRET_KEY_PROPNAME = "usergrid.warehouse-export-secret-key";

    private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");

    private static final String[] BASE_ATTRIBUTES = { "uuid", "organization", "application", "type", "created",
            "modified" };

    private static final String START_TIME = "startTime";
    private static final String END_TIME = "endTime";
    private static final String UPLOAD = "upload";

    private static final String[] NOTIFICATION_ATTRIBUTES = { "payloads", "queued", "started", "finished",
            "deliver", "expire", "canceled", "errorMessage", "statistics" };

    private static final String[] NOTIFIER_ATTRIBUTES = { "provider", "environment" };
    private static final String[] RECEIPT_ATTRIBUTES = { "payload", "sent", "errorCode", "errorMessage",
            "notifierId", "notificationUUID" };

    private static final Map<String, String[]> URAP_ATTRIBUTES = new HashMap<String, String[]>();

    static {
        URAP_ATTRIBUTES.put("notification", NOTIFICATION_ATTRIBUTES);
        URAP_ATTRIBUTES.put("notifier", NOTIFIER_ATTRIBUTES);
        URAP_ATTRIBUTES.put("receipt", RECEIPT_ATTRIBUTES);
    }

    private CSVWriter writer;
    private String[] collectionNames;
    private Map<String, String[]> collectionFieldMap;
    private Date startTime, endTime;

    @Override
    public void runTool(CommandLine line) throws Exception {

        // keep it light and fast
        System.setProperty("cassandra.readcl", "ONE");

        startSpring();
        setVerbose(line);

        applyOrgId(line);
        prepareBaseOutputFileName(line);
        outputDir = createOutputParentDir();
        LOG.info("Export directory: {}", outputDir.getAbsolutePath());

        // create writer
        applyStartTime(line);
        applyEndTime(line);
        LOG.error("startTime: {}, endTime: {}", startTime, endTime);
        if (startTime.getTime() >= endTime.getTime()) {
            LOG.error("startTime must be before endTime. exiting.");
            System.exit(1);
        }

        // create "modified" query to select data
        StringBuilder builder = new StringBuilder();
        builder.append("modified >= ").append(startTime.getTime()).append(" and ");
        builder.append("modified <= ").append(endTime.getTime());
        String queryString = builder.toString();

        // create writer
        String dateString = DATE_FORMAT.format(new Date());
        String fileName = outputDir.getAbsolutePath() + "/" + dateString + ".csv";
        FileWriter fw = new FileWriter(fileName);
        writer = new CSVWriter(fw, SEPARATOR, CSVWriter.NO_QUOTE_CHARACTER, '\'');

        try {
            writeMetadata();
            writeHeaders();

            // Loop through the organizations
            Map<UUID, String> organizations = getOrganizations();
            for (Entry<UUID, String> orgIdAndName : organizations.entrySet()) {
                exportApplicationsForOrg(orgIdAndName, queryString);
            }
        } finally {
            writer.close();
        }

        // now that file is written, copy it to S3
        if (line.hasOption("upload")) {
            LOG.info("Copy to S3");
            copyToS3(fileName);
        }
    }

    private void copyToS3(String fileName) {

        String bucketName = (String) properties.get(BUCKET_PROPNAME);
        String accessId = (String) properties.get(ACCESS_ID_PROPNAME);
        String secretKey = (String) properties.get(SECRET_KEY_PROPNAME);

        Properties overrides = new Properties();
        overrides.setProperty("s3" + ".identity", accessId);
        overrides.setProperty("s3" + ".credential", secretKey);

        final Iterable<? extends Module> MODULES = ImmutableSet.of(new JavaUrlHttpCommandExecutorServiceModule(),
                new Log4JLoggingModule(), new NettyPayloadModule());

        BlobStoreContext context = ContextBuilder.newBuilder("s3").credentials(accessId, secretKey).modules(MODULES)
                .overrides(overrides).buildView(BlobStoreContext.class);

        // Create Container (the bucket in s3)
        try {
            AsyncBlobStore blobStore = context.getAsyncBlobStore(); // it can be changed to sync
            // BlobStore (returns false if it already exists)
            ListenableFuture<Boolean> container = blobStore.createContainerInLocation(null, bucketName);
            if (container.get()) {
                LOG.info("Created bucket " + bucketName);
            }
        } catch (Exception ex) {
            logger.error("Could not start binary service: {}", ex.getMessage());
            throw new RuntimeException(ex);
        }

        try {
            File file = new File(fileName);
            AsyncBlobStore blobStore = context.getAsyncBlobStore();
            BlobBuilder blobBuilder = blobStore.blobBuilder(file.getName()).payload(file).calculateMD5()
                    .contentType("text/plain").contentLength(file.length());

            Blob blob = blobBuilder.build();

            ListenableFuture<String> futureETag = blobStore.putBlob(bucketName, blob,
                    PutOptions.Builder.multipart());

            LOG.info("Uploaded file etag=" + futureETag.get());
        } catch (Exception e) {
            LOG.error("Error uploading to blob store", e);
        }
    }

    @Override
    @SuppressWarnings("static-access")
    public Options createOptions() {

        Options options = super.createOptions();

        Option startTime = OptionBuilder.hasArg().withDescription("minimum modified time -startTime")
                .create(START_TIME);

        Option endTime = OptionBuilder.hasArg().withDescription("maximum modified time -endTime").create(END_TIME);

        Option upload = OptionBuilder.withDescription("upload files to blob-store").create(UPLOAD);

        options.addOption(startTime);
        options.addOption(endTime);
        options.addOption(upload);

        return options;
    }

    private void applyStartTime(CommandLine line) throws Exception {

        if (line.hasOption(START_TIME)) {
            startTime = new Date(Long.parseLong(line.getOptionValue(START_TIME)));
        } else {
            // attempt to read last end time from directory
            File[] files = outputDir.listFiles(new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                    return name.endsWith(".csv");
                }
            });
            long lastEndTime = 0;
            for (File file : files) {
                long endTime = readEndTime(file);
                if (endTime > lastEndTime) {
                    lastEndTime = endTime;
                }
            }
            startTime = new Date(lastEndTime + 1);
        }
    }

    private void applyEndTime(CommandLine line) {
        if (line.hasOption(END_TIME)) {
            endTime = new Date(Long.parseLong(line.getOptionValue(END_TIME)));
        } else {
            endTime = new Date(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(1L, TimeUnit.HOURS));
        }
    }

    private long readEndTime(File file) throws Exception {
        CSVReader reader = new CSVReader(new FileReader(file), SEPARATOR, CSVWriter.NO_QUOTE_CHARACTER, '\'');
        try {
            String[] firstLine = reader.readNext();
            if ("start".equals(firstLine[0]) && "end".equals(firstLine[2])) {
                return Long.parseLong(firstLine[3]);
            }
        } finally {
            reader.close();
        }
        return 0;
    }

    private void writeMetadata() {
        writer.writeNext(new String[] { "start", "" + startTime.getTime(), "end", "" + endTime.getTime() });
    }

    private void writeHeaders() {
        writer.writeNext(getHeaders());
    }

    private String[] getHeaders() {

        List<String> headers = new ArrayList<String>();
        headers.addAll(Arrays.asList(BASE_ATTRIBUTES));

        Map<String, String[]> cfm = getCollectionFieldMap();
        for (Map.Entry<String, String[]> entry : cfm.entrySet()) {
            String collection = entry.getKey();
            String[] attributes = entry.getValue();
            for (String attribute : attributes) {
                headers.add(collection + "_" + attribute);
            }
        }

        String[] stringHeaders = new String[headers.size()];
        return headers.toArray(stringHeaders);
    }

    private Map<String, String[]> getCollectionFieldMap() {

        if (collectionFieldMap != null) {
            return collectionFieldMap;
        }

        // get basic stuff from Schema
        String[] collectionTypes = getCollectionTypes();
        collectionFieldMap = new TreeMap<String, String[]>();
        for (String type : collectionTypes) {
            Set<String> propertyNames = Schema.getDefaultSchema().getPropertyNames(type);
            for (String attr : BASE_ATTRIBUTES) {
                propertyNames.remove(attr);
            }

            Iterator<String> i = propertyNames.iterator();
            while (i.hasNext()) {
                String property = i.next();
                Class cls = Schema.getDefaultSchema().getPropertyType(type, property);
                if (!cls.isPrimitive() && cls != String.class && cls != Date.class) {
                    i.remove();
                }
            }
            String[] props = new String[propertyNames.size()];
            propertyNames.toArray(props);
            Arrays.sort(props);
            collectionFieldMap.put(type, props);
        }

        // add URAP stuff that's not visible to usergrid-stack
        for (Map.Entry<String, String[]> entry : URAP_ATTRIBUTES.entrySet()) {
            String type = entry.getKey();
            String[] attributes = entry.getValue();
            Arrays.sort(attributes);
            collectionFieldMap.put(type, attributes);
        }

        return collectionFieldMap;
    }

    /** @return Map of Organization UUID -> Name */
    private Map<UUID, String> getOrganizations() throws Exception {

        Map<UUID, String> organizationNames;

        if (orgId == null) {
            organizationNames = managementService.getOrganizations();
        } else {

            OrganizationInfo info = managementService.getOrganizationByUuid(orgId);

            if (info == null) {
                LOG.error("Organization info is null!");
                System.exit(1);
            }

            organizationNames = new HashMap<UUID, String>();
            organizationNames.put(orgId, info.getName());
        }

        return organizationNames;
    }

    private String[] getCollectionTypes() {

        if (collectionNames != null) {
            return collectionNames;
        }

        Collection<CollectionInfo> system_collections = getDefaultSchema().getCollections(Application.ENTITY_TYPE)
                .values();

        ArrayList<String> collections = new ArrayList<String>(system_collections.size());
        for (CollectionInfo collection : system_collections) {
            if (!Schema.isAssociatedEntityType(collection.getType())) {
                collections.add(collection.getType());
            }
        }

        collectionNames = new String[collections.size()];
        Collections.sort(collections);
        return collections.toArray(collectionNames);
    }

    private void exportApplicationsForOrg(Entry<UUID, String> orgIdAndName, String queryString) throws Exception {

        LOG.info("organization: {} / {}", orgIdAndName.getValue(), orgIdAndName.getKey());

        String orgName = orgIdAndName.getValue();

        BiMap<UUID, String> applications = managementService.getApplicationsForOrganization(orgIdAndName.getKey());
        for (Entry<UUID, String> appIdAndName : applications.entrySet()) {

            String appName = appIdAndName.getValue();
            appName = appName.substring(appName.indexOf('/') + 1);

            LOG.info("application {} / {}", appName, appIdAndName.getKey());

            EntityManager em = emf.getEntityManager(appIdAndName.getKey());
            Map<String, String[]> cfm = getCollectionFieldMap();

            // Loop through the collections of the Application
            Set<String> collections = em.getApplicationCollections();
            for (String collectionName : collections) {

                // set up for retrieving only the necessary properties
                String entityType = InflectionUtils.singularize(collectionName);
                String[] props = cfm.get(entityType);
                Collection<String> properties = new ArrayList<String>(
                        BASE_ATTRIBUTES.length + (props != null ? props.length : 0));
                properties.addAll(Arrays.asList(BASE_ATTRIBUTES));
                if (props != null) {
                    properties.addAll(Arrays.asList(props));
                }

                Query query = Query.fromQL(queryString);
                query.setLimit(MAX_ENTITY_FETCH);
                query.setResultsLevel(Level.REFS);
                Results results = em.searchCollection(em.getApplicationRef(), collectionName, query);

                while (results.size() > 0) {

                    List<Entity> entities = em.getPartialEntities(results.getIds(), properties);

                    for (Entity entity : entities) {
                        write(orgName, appName, entity, em);
                    }

                    if (results.getCursor() == null) {
                        break;
                    }

                    query.setCursor(results.getCursor());
                    results = em.searchCollection(em.getApplicationRef(), collectionName, query);
                }
            }
        }
    }

    private void write(String orgName, String appName, Entity entity, EntityManager em) throws Exception {

        Map<String, String[]> cfm = getCollectionFieldMap();

        String uuid = entity.getUuid().toString();
        String created = DATE_FORMAT.format(entity.getCreated());
        String modified = DATE_FORMAT.format(entity.getModified());
        String type = entity.getType();

        List<String> values = new ArrayList<String>(30);
        values.add(uuid);
        values.add(orgName);
        values.add(appName);
        values.add(entity.getType());
        values.add(created);
        values.add(modified);

        for (Map.Entry<String, String[]> entry : cfm.entrySet()) {
            String collection = entry.getKey();
            String[] attributes = entry.getValue();
            if (collection.equals(type)) {
                for (String attribute : attributes) {
                    Object prop = entity.getProperty(attribute);
                    values.add(prop != null ? prop.toString() : null);
                }
            } else {
                for (String attribute : attributes) {
                    values.add(null);
                }
            }
        }

        String[] stringValues = new String[values.size()];
        values.toArray(stringValues);
        writer.writeNext(stringValues);
    }
}