extract.ExtractorImpl.java Source code

Java tutorial

Introduction

Here is the source code for extract.ExtractorImpl.java

Source

/*
 * Copyright 2012-2013 inBloom, Inc. and its affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package extract;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import com.mongodb.util.JSON;
import com.mongodb.util.ThreadUtil;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;

import org.slc.sli.common.util.tenantdb.TenantContext;
import org.slc.sli.domain.Entity;
import org.slc.sli.domain.Repository;

import util.OutstreamZipFile;

/**
 * Extractor pulls data from mongo and writes it to file.
 *
 * @author tshewchuk
 *
 */
public class ExtractorImpl implements Extractor {

    private static final Logger LOG = LoggerFactory.getLogger(ExtractorImpl.class);

    private static final int DEFAULT_EXECUTOR_THREADS = 3;
    private static final int DEFAULT_EXTRACTOR_JOB_TIME = 600;
    private static final String ID_STRING = "id";
    private static final String TYPE_STRING = "entityType";

    private List<String> entities;

    private Map<String, String> queriedEntities;

    private Map<String, List<String>> combinedEntities;

    private String extractDir;

    private ExecutorService executor;

    private int executorThreads = DEFAULT_EXECUTOR_THREADS;

    private boolean runOnStartup = false;

    private List<String> tenants;

    private Repository<Entity> entityRepository;

    public void destroy() {
        executor.shutdown();
    }

    public void init() throws FileNotFoundException {
        createExtractDir();
        // create thread pool to process files
        executor = Executors.newFixedThreadPool(executorThreads);
        if (runOnStartup) {
            executor.execute(new Runnable() {
                @Override
                public void run() {
                    execute();
                }
            });
        }
    }

    public void createExtractDir() {
        new File(extractDir).mkdirs();
    }

    @Override
    public void execute() {
        Future<String> call;
        List<Future<String>> futures = new LinkedList<Future<String>>();
        for (String tenant : tenants) {
            try {
                call = executor.submit(new ExtractWorker(tenant));
                futures.add(call);
            } catch (FileNotFoundException e) {
                LOG.error("Error while extracting data for tenant " + tenant, e);
            }
        }

        // Wait for job to be finished.
        for (Future<String> future : futures) {
            processFuture(future);
        }

        // Shutdown.
        destroy();
    }

    /*
     * (non-Javadoc)
     *
     * @see org.slc.sli.search.process.Extractor#execute()
     */
    @Override
    public void execute(String tenant) {
        // TODO: implement isRunning flag to make sure only one extract is
        // running at a time
        LOG.info("Extracting data from tenant " + tenant);
        OutstreamZipFile zipFile = null;
        try {
            zipFile = new OutstreamZipFile(extractDir, tenant);
        } catch (IOException e) {
            LOG.error("Error while extracting data from tenant " + tenant, e);
        }
        TenantContext.setTenantId(tenant);

        for (String entity : entities) {
            extractEntity(tenant, zipFile, entity);
        }

        // Rename temp zip file to permanent.
        try {
            zipFile.renameTempZipFile();
        } catch (IOException e) {
            LOG.error("Error attempting to create zipfile " + zipFile.getZipFile().getPath(), e);
        }
    }

    @Override
    public String getHealth() {
        ThreadPoolExecutor tpe = (ThreadPoolExecutor) executor;
        return getClass() + ": {" + extractDir + " size:" + new File(extractDir).list().length + ", active count:"
                + tpe.getActiveCount() + ", completed count:" + tpe.getCompletedTaskCount() + "}";
    }

    protected void processFuture(Future<String> future) {
        try {
            future.get(DEFAULT_EXTRACTOR_JOB_TIME, TimeUnit.SECONDS);
        } catch (Exception e) {
            LOG.error("Error while waiting for extractor job to be finished", e);
        }
    }

    public File extractEntity(String tenant, OutstreamZipFile zipFile, String entityName) {
        try {
            zipFile.createArchiveEntry(entityName + ".json");
            extractEntity(tenant, zipFile, entityName, 0);
        } catch (IOException e) {
            LOG.error("Error while extracting " + entityName + " from " + tenant, e);
        }
        return zipFile.getZipFile();
    }

    private File extractEntity(String tenant, OutstreamZipFile zipFile, String entityName, int retryCount) {

        LOG.debug("Extracting " + entityName + " from " + tenant);
        Iterable<Entity> records = null;
        String collectionName = entityName;
        Query query = new Query();
        if (queriedEntities.containsKey(entityName)) {
            collectionName = queriedEntities.get(entityName);
            query.addCriteria(Criteria.where("type").is(entityName));
        }
        if (combinedEntities.containsKey(entityName)) {
            query = new Query();
            query.addCriteria(Criteria.where("type").in(combinedEntities.get(entityName)));
        }
        try {
            TenantContext.setTenantId(tenant);
            records = entityRepository.findByQuery(collectionName, query, 0, 0);
            // write each record to file
            for (Entity record : records) {
                addAPIFields(entityName, record);
                zipFile.writeData(toJSON(record));
            }
            LOG.debug("Finished extracting " + entityName + " from " + tenant);
        } catch (IOException e) {
            LOG.error("Error while extracting " + entityName + " from " + tenant, e);
            if (retryCount <= 1) {
                LOG.error("Retrying extract for " + entityName + " from " + tenant);
                ThreadUtil.sleep(1000);
                extractEntity(tenant, zipFile, entityName, retryCount + 1);
            }
        } finally {
            TenantContext.setTenantId(null);
        }
        return zipFile.getZipFile();
    }

    private String toJSON(Entity record) {
        return JSON.serialize(record.getBody());
    }

    private void addAPIFields(String archiveName, Entity entity) {
        entity.getBody().put(TYPE_STRING, entity.getType());
        if (combinedEntities.containsKey(archiveName)) {
            entity.getBody().put(ID_STRING, archiveName);
        } else {
            entity.getBody().put(ID_STRING, entity.getEntityId());
        }
    }

    public void setExtractDir(String extractDir) {
        this.extractDir = extractDir;
    }

    public void setExecutorThreads(int executorThreads) {
        this.executorThreads = executorThreads;
    }

    public void setRunOnStartup(boolean runOnStartup) {
        this.runOnStartup = runOnStartup;
    }

    public void setEntityRepository(Repository<Entity> entityRepository) {
        this.entityRepository = entityRepository;
    }

    public void setTenants(List<String> tenants) {
        this.tenants = tenants;
    }

    public void setEntities(List<String> entities) {
        this.entities = entities;
    }

    public void setQueriedEntities(Map<String, String> queriedEntities) {
        this.queriedEntities = queriedEntities;
    }

    public void setCombinedEntities(Map<String, List<String>> combinedEntities) {
        this.combinedEntities = combinedEntities;
    }

    /**
     * Runnable Thread class to write into file read from Mongo.
     *
     * @author tosako
     *
     */
    private class ExtractWorker implements Callable<String> {

        private final String tenant;

        public ExtractWorker(String tenant) throws FileNotFoundException {
            this.tenant = tenant;
        }

        @Override
        public String call() throws Exception {
            execute(tenant);
            return tenant;
        }
    }

}