org.waterforpeople.mapping.dataexport.RawDataSpreadsheetImporter.java Source code

Java tutorial

Introduction

Here is the source code for org.waterforpeople.mapping.dataexport.RawDataSpreadsheetImporter.java

Source

/*
 *  Copyright (C) 2010-2012 Stichting Akvo (Akvo Foundation)
 *
 *  This file is part of Akvo FLOW.
 *
 *  Akvo FLOW is free software: you can redistribute it and modify it under the terms of
 *  the GNU Affero General Public License (AGPL) as published by the Free Software Foundation,
 *  either version 3 of the License or any later version.
 *
 *  Akvo FLOW is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *  See the GNU Affero General Public License included below for more details.
 *
 *  The full license text can also be seen at <http://www.gnu.org/licenses/agpl.html>.
 */

package org.waterforpeople.mapping.dataexport;

import java.awt.GraphicsEnvironment;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.security.MessageDigest;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import javax.swing.SwingUtilities;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.waterforpeople.mapping.app.gwt.client.survey.QuestionDto;
import org.waterforpeople.mapping.app.gwt.client.survey.QuestionDto.QuestionType;
import org.waterforpeople.mapping.app.web.dto.RawDataImportRequest;
import org.waterforpeople.mapping.dataexport.service.BulkDataServiceClient;

import com.gallatinsystems.common.util.StringUtil;
import com.gallatinsystems.framework.dataexport.applet.DataImporter;
import com.gallatinsystems.framework.dataexport.applet.ProgressDialog;

public class RawDataSpreadsheetImporter implements DataImporter {

    private static final Logger log = Logger.getLogger(RawDataSpreadsheetImporter.class);

    private static final String SERVLET_URL = "/rawdatarestapi";
    private static final String DEFAULT_LOCALE = "en";
    public static final String SURVEY_CONFIG_KEY = "surveyId";
    protected static final String KEY_PARAM = "apiKey";
    private static final Map<String, String> SAVING_DATA;
    private static final Map<String, String> COMPLETE;
    private Long surveyId;
    private InputStream stream;
    private ProgressDialog progressDialog;
    private String locale = DEFAULT_LOCALE;
    private ThreadPoolExecutor threadPool;
    private BlockingQueue<Runnable> jobQueue;
    private List<String> errorIds;
    private volatile int currentStep;

    private static final ThreadLocal<DateFormat> DATE_FMT = new ThreadLocal<DateFormat>() {
        @Override
        protected DateFormat initialValue() {
            return new SimpleDateFormat("dd-MM-yyyy HH:mm:ss z");
        };
    };
    private static final int SIZE_THRESHOLD = 2000 * 400;

    static {
        SAVING_DATA = new HashMap<String, String>();
        SAVING_DATA.put("en", "Saving Data");

        COMPLETE = new HashMap<String, String>();
        COMPLETE.put("en", "Complete");
    }

    /**
     * opens a file input stream using the file passed in and tries to return the first worksheet in
     * that file
     *
     * @param file
     * @return
     * @throws Exception
     */
    protected Sheet getDataSheet(File file) throws Exception {
        stream = new PushbackInputStream(new FileInputStream(file));
        Workbook wb = null;
        try {
            wb = WorkbookFactory.create(stream);
        } catch (Exception e) {
        }
        return wb.getSheetAt(0);
    }

    /**
     * closes open input streams
     */
    protected void cleanup() {
        if (stream != null) {
            try {
                stream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }

    protected void setSurveyId(Map<String, String> criteria) {
        if (criteria != null && criteria.get(SURVEY_CONFIG_KEY) != null) {
            setSurveyId(new Long(criteria.get(SURVEY_CONFIG_KEY).trim()));
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public void executeImport(File file, String serverBase, Map<String, String> criteria) {
        try {

            int rows = 0;
            errorIds = new ArrayList<String>();
            jobQueue = new LinkedBlockingQueue<Runnable>();
            threadPool = new ThreadPoolExecutor(5, 5, 10, TimeUnit.SECONDS, jobQueue);
            DateFormat df = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss z");
            setSurveyId(criteria);

            Sheet sheet1 = getDataSheet(file);
            if (!GraphicsEnvironment.isHeadless()) {
                progressDialog = new ProgressDialog(sheet1.getLastRowNum(), locale);
                progressDialog.setVisible(true);
            }
            HashMap<Integer, String> questionIDColMap = new HashMap<Integer, String>();
            Object[] results = BulkDataServiceClient.loadQuestions(getSurveyId().toString(), serverBase,
                    criteria.get("apiKey"));
            Map<String, QuestionDto> questionMap = null;

            if (results != null) {
                questionMap = (Map<String, QuestionDto>) results[1];

            }

            boolean hasDurationCol = true;
            boolean setFirstQuestionColumnIdx = true;
            int firstQuestionCol = 0;

            currentStep = 0;
            MessageDigest digest = MessageDigest.getInstance("MD5");
            for (Row row : sheet1) {
                rows++;
                if (row.getRowNum() == 0) {
                    // Process headers
                    for (Cell cell : row) {
                        if (cell.getStringCellValue().indexOf("|") > -1) {
                            if (setFirstQuestionColumnIdx) {
                                firstQuestionCol = cell.getColumnIndex();
                                setFirstQuestionColumnIdx = false;
                            }

                            String[] parts = cell.getStringCellValue().split("\\|");
                            if (parts[0].trim().length() > 0) {
                                questionIDColMap.put(cell.getColumnIndex(), parts[0].trim());
                            }
                        }
                    }
                    continue; // move to next row (data)
                }
                digest.reset();

                String instanceId = null;
                String dateString = null;
                String submitter = null;
                String duration = null;
                String durationSeconds = null;
                StringBuilder sb = new StringBuilder();

                // Monitoring headers
                // [identifier, displayName, instanceId, date, submitter, duration, questions...]

                // Non-monitoring headers
                // [instanceId, date, submitter, duration, questions...]

                int instanceIdx = firstQuestionCol - 4;
                int dateIdx = firstQuestionCol - 3;
                int submitterIdx = firstQuestionCol - 2;
                int durationIdx = firstQuestionCol - 1;

                sb.append("action=" + RawDataImportRequest.SAVE_SURVEY_INSTANCE_ACTION + "&"
                        + RawDataImportRequest.SURVEY_ID_PARAM + "=" + getSurveyId() + "&");
                boolean needUpload = true;

                for (Cell cell : row) {
                    if (cell.getColumnIndex() == instanceIdx) {
                        if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
                            instanceId = new Double(cell.getNumericCellValue()).intValue() + "";
                        } else if (cell.getCellType() == Cell.CELL_TYPE_STRING) {
                            instanceId = cell.getStringCellValue();

                        }
                        if (instanceId != null) {
                            sb.append(RawDataImportRequest.SURVEY_INSTANCE_ID_PARAM + "=" + instanceId + "&");
                        }
                    }
                    if (cell.getColumnIndex() == dateIdx) {
                        if (cell.getCellType() == Cell.CELL_TYPE_STRING) {
                            dateString = cell.getStringCellValue();
                        } else if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
                            Date date = HSSFDateUtil.getJavaDate(cell.getNumericCellValue());
                            dateString = df.format(date);
                        }
                        if (dateString != null) {
                            sb.append(RawDataImportRequest.COLLECTION_DATE_PARAM + "="
                                    + URLEncoder.encode(dateString, "UTF-8") + "&");
                        }
                    }
                    if (cell.getColumnIndex() == submitterIdx) {
                        if (cell.getCellType() == Cell.CELL_TYPE_STRING) {
                            submitter = cell.getStringCellValue();
                            sb.append("submitter=" + URLEncoder.encode(submitter, "UTF-8") + "&");
                        }
                    }
                    // Survey Duration
                    if (cell.getColumnIndex() == durationIdx) {
                        if (hasDurationCol) {
                            switch (cell.getCellType()) {
                            // if the cell type is string, we expect hh:mm:ss format
                            case Cell.CELL_TYPE_STRING:
                                duration = cell.getStringCellValue();
                                durationSeconds = String.valueOf(durationToSeconds(duration));
                                digest.update(duration.getBytes());
                                break;
                            // if the cell type if numeric, we expect a single seconds value
                            case Cell.CELL_TYPE_NUMERIC:
                                durationSeconds = String.valueOf(cell.getNumericCellValue());
                                digest.update(durationSeconds.getBytes());
                                break;
                            default:
                                durationSeconds = "0";
                                // don't update the digest, because we want this value to be
                                // saved.
                                break;
                            }
                            sb.append("duration=" + URLEncoder.encode(durationSeconds, "UTF-8") + "&");
                        }
                    }

                    boolean hasValue = false;
                    String qId = questionIDColMap.get(cell.getColumnIndex());

                    if (cell.getColumnIndex() >= firstQuestionCol && qId != null && !qId.trim().equals("")) {
                        QuestionDto question = questionMap.get(questionIDColMap.get(cell.getColumnIndex()));
                        QuestionType type = null;
                        // VALUE is default, it is valid for NUMBER, FREE_TEXT, SCAN, OPTION
                        String typeString = "VALUE";
                        if (question != null) {
                            type = question.getType();
                            if (QuestionType.GEO == type) {
                                typeString = "GEO";
                            } else if (QuestionType.PHOTO == type) {
                                typeString = "IMAGE";
                            } else if (QuestionType.VIDEO == type) {
                                typeString = "VIDEO";
                            } else if (QuestionType.DATE == type) {
                                typeString = "DATE";
                            }
                        } else if (questionIDColMap.get(cell.getColumnIndex()).startsWith("--")) {
                            continue;
                        }

                        String cellVal = parseCellAsString(cell);
                        if (cellVal != null) {
                            cellVal = cellVal.trim();
                            // need to update digest before manipulating the
                            // data
                            digest.update(cellVal.getBytes());
                            if (cellVal.contains("|")) {
                                cellVal = cellVal.replaceAll("\\|", "^^");
                            }
                            if (cellVal.endsWith(".jpg")) {
                                if (cellVal.contains("/")) {
                                    cellVal = cellVal.substring(cellVal.lastIndexOf("/"));
                                }
                                cellVal = "/sdcard" + cellVal;
                            }
                            if (cellVal.endsWith("UTC")) {
                                try {
                                    cellVal = DATE_FMT.get().parse(cellVal).getTime() + "";
                                } catch (Exception e) {
                                    log.error("bad date format: " + cellVal + "\n" + e.getMessage(), e);
                                }
                            }
                        }
                        if (cellVal == null) {
                            cellVal = "";
                        }

                        if (type != QuestionType.GEO) {
                            hasValue = true;
                            sb.append("questionId=" + questionIDColMap.get(cell.getColumnIndex()) + "|value=")
                                    .append(cellVal != null ? URLEncoder.encode(cellVal, "UTF-8") : "");
                        } else {
                            hasValue = true;
                            sb.append("questionId=" + questionIDColMap.get(cell.getColumnIndex()) + "|value=");
                            if (questionIDColMap.get(cell.getColumnIndex() + 1) != null
                                    && questionIDColMap.get(cell.getColumnIndex() + 1).startsWith("--")) {

                                for (int i = 1; i < 4; i++) {
                                    String nextVal = parseCellAsString(row.getCell(cell.getColumnIndex() + i));
                                    cellVal += "|" + (nextVal != null ? nextVal : "");
                                }
                                // if the length of the cellVal is too small, which means there is
                                // no valid info, skip.
                                if (cellVal.length() < 5) {
                                    cellVal = "";
                                }
                                sb.append(cellVal != null ? URLEncoder.encode(cellVal, "UTF-8") : "");
                            } else {
                                sb.append(cellVal != null ? URLEncoder.encode(cellVal, "UTF-8") : "");
                            }
                        }

                        if (hasValue) {
                            sb.append("|type=").append(typeString).append("&");
                        }
                    } else if (cell.getColumnIndex() >= firstQuestionCol) {
                        // we should only get here if we have a column that
                        // isn't in the header
                        // as long as the user hasn't messed up the sheet, this
                        // is the md5 digest of the original data
                        try {
                            String md5 = parseCellAsString(cell);
                            String digestVal = StringUtil.toHexString(digest.digest());
                            if (md5 != null && md5.equals(digestVal)) {
                                needUpload = false;
                            } else if (md5 != null && log.isDebugEnabled()) {
                                log.debug("Row: " + row.getRowNum() + " MD5: " + digestVal + " orig md5: " + md5);
                            }
                        } catch (Exception e) {
                            // if we can't handle the md5, then just assume we
                            // need to update the row
                            log.error("Couldn't process md5 for row: " + row.getRowNum() + " - " + e.getMessage(),
                                    e);
                        }
                    }
                }
                if (needUpload) {
                    sendDataToServer(serverBase,
                            instanceId == null ? null
                                    : getResetUrlString(instanceId, dateString, submitter, durationSeconds),
                            sb.toString(), criteria.get(KEY_PARAM));

                } else {
                    // if we didn't need to upload, then just increment our
                    // progress counter
                    SwingUtilities.invokeLater(new StatusUpdater(currentStep++, SAVING_DATA.get(locale)));
                }
            }
            while (!jobQueue.isEmpty() && threadPool.getActiveCount() > 0) {
                Thread.sleep(5000);
            }
            if (errorIds.size() > 0) {
                log.error("There were ERRORS: ");
                for (String line : errorIds) {
                    log.error(line);
                }
            }
            Thread.sleep(5000);
            log.debug("Updating summaries");
            // now update the summaries
            if ((questionIDColMap.size() * rows) < SIZE_THRESHOLD) {
                invokeUrl(serverBase,
                        "action=" + RawDataImportRequest.UPDATE_SUMMARIES_ACTION + "&"
                                + RawDataImportRequest.SURVEY_ID_PARAM + "=" + surveyId,
                        true, criteria.get(KEY_PARAM));
            }

            invokeUrl(serverBase, "action=" + RawDataImportRequest.SAVE_MESSAGE_ACTION + "&"
                    + RawDataImportRequest.SURVEY_ID_PARAM + "=" + surveyId, true, criteria.get(KEY_PARAM));

            SwingUtilities.invokeLater(new StatusUpdater(currentStep++, COMPLETE.get(locale), true));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            cleanup();
        }
    }

    private Integer durationToSeconds(String duration) {
        if (duration == null || duration.length() == 0)
            return 0;

        // try to parse as integer
        if (!duration.contains(":")) {
            try {
                int seconds = Integer.parseInt(duration);
                return seconds;
            } catch (Exception e) {
                return 0;
            }
        }

        // try do parse as hh:mm:ss
        String[] tokens = duration.split(":");
        if (tokens.length != 3)
            return 0;
        try {
            int hours = Integer.parseInt(tokens[0]);
            int minutes = Integer.parseInt(tokens[1]);
            int seconds = Integer.parseInt(tokens[2]);
            return 3600 * hours + 60 * minutes + seconds;
        } catch (Exception e) {
            return 0;
        }
    }

    private String getResetUrlString(String instanceId, String dateString, String submitter, String durationSeconds)
            throws UnsupportedEncodingException {
        String url = "action=" + RawDataImportRequest.RESET_SURVEY_INSTANCE_ACTION + "&"
                + RawDataImportRequest.SURVEY_INSTANCE_ID_PARAM + "=" + instanceId + "&"
                + RawDataImportRequest.SURVEY_ID_PARAM + "=" + getSurveyId() + "&"
                + RawDataImportRequest.COLLECTION_DATE_PARAM + "=" + URLEncoder.encode(dateString, "UTF-8") + "&"
                + RawDataImportRequest.SUBMITTER_PARAM + "=" + URLEncoder.encode(submitter, "UTF-8");

        // Duration might be missing in old reports
        if (durationSeconds != null) {
            url += "&" + RawDataImportRequest.DURATION_PARAM + "=" + URLEncoder.encode(durationSeconds, "UTF-8");
        }

        return url;
    }

    /**
     * handles calling invokeURL twice (once to reset the instance and again to save the new one) as
     * a separate job submitted to the thread pool
     *
     * @param serverBase
     * @param resetUrlString
     * @param saveUrlString
     */
    private void sendDataToServer(final String serverBase, final String resetUrlString, final String saveUrlString,
            final String key) {
        threadPool.execute(new Runnable() {

            @Override
            public void run() {
                try {
                    SwingUtilities.invokeLater(new StatusUpdater(currentStep++, SAVING_DATA.get(locale)));
                    if (resetUrlString != null) {
                        invokeUrl(serverBase, resetUrlString, true, key);
                    }
                    invokeUrl(serverBase, saveUrlString, true, key);
                } catch (Exception e) {
                    errorIds.add(saveUrlString);
                    System.err.println("Could not invoke rest services: " + e);
                    e.printStackTrace(System.err);
                }
            }
        });
    }

    /**
     * calls a remote api by posting to the url passed in.
     *
     * @param serverBase
     * @param urlString
     * @throws Exception
     */
    protected void invokeUrl(String serverBase, String urlString, boolean shouldSign, String key) throws Exception {

        BulkDataServiceClient.fetchDataFromServer(serverBase + SERVLET_URL, urlString, shouldSign, key);
    }

    @Override
    public Map<Integer, String> validate(File file) {
        Map<Integer, String> errorMap = new HashMap<Integer, String>();

        try {
            Sheet sheet = getDataSheet(file);
            Row headerRow = sheet.getRow(0);
            boolean firstQuestionFound = false;

            for (Cell cell : headerRow) {
                String cellValue = cell.getStringCellValue();
                if (firstQuestionFound && !cellValue.matches(".+\\|.+")) {
                    errorMap.put(cell.getColumnIndex(),
                            String.format("The header \"%s\" can not be imported", cellValue));
                    break;
                } else {
                    if (!firstQuestionFound && cellValue.matches("[0-9]+\\|.+")) {
                        firstQuestionFound = true;
                        int idx = cell.getColumnIndex();
                        if (!(idx == 4 || idx == 6)) {
                            errorMap.put(idx, "Found the first question at the wrong column index");
                            break;
                        }
                    }
                }
            }
            if (!firstQuestionFound) {
                errorMap.put(-1, "A question could not be found");
            }

        } catch (Exception e) {
            errorMap.put(-1, e.getMessage());
        }

        return errorMap;
    }

    public static void main(String[] args) {
        if (args.length != 4) {
            log.error(
                    "Error.\nUsage:\n\tjava org.waterforpeople.mapping.dataexport.RawDataSpreadsheetImporter <file> <serverBase> <surveyId>");
            System.exit(1);
        }
        File file = new File(args[0].trim());
        String serverBaseArg = args[1].trim();
        RawDataSpreadsheetImporter r = new RawDataSpreadsheetImporter();
        Map<String, String> configMap = new HashMap<String, String>();
        configMap.put(SURVEY_CONFIG_KEY, args[2].trim());
        configMap.put("apiKey", args[3].trim());
        r.executeImport(file, serverBaseArg, configMap);
    }

    public Long getSurveyId() {
        return surveyId;
    }

    public void setSurveyId(Long surveyId) {
        this.surveyId = surveyId;
    }

    private String parseCellAsString(Cell cell) {
        String val = null;
        if (cell != null) {
            switch (cell.getCellType()) {
            case Cell.CELL_TYPE_BOOLEAN:
                val = cell.getBooleanCellValue() + "";
                break;
            case Cell.CELL_TYPE_NUMERIC:
                val = cell.getNumericCellValue() + "";
                break;
            default:
                val = cell.getStringCellValue();
                break;
            }
        }
        return val;
    }

    /**
     * Private class to handle updating of the UI thread from our worker thread
     */
    private class StatusUpdater implements Runnable {

        private int step;
        private String msg;
        private boolean isComplete;

        public StatusUpdater(int step, String message) {
            this(step, message, false);
        }

        public StatusUpdater(int step, String message, boolean isComplete) {
            msg = message;
            this.step = step;
            this.isComplete = isComplete;
        }

        @Override
        public void run() {
            if (progressDialog != null) {
                progressDialog.update(step, msg, isComplete);
            }
        }
    }

}