solidbase.core.plugins.DumpJSON.java Source code

Java tutorial

Introduction

Here is the source code for solidbase.core.plugins.DumpJSON.java

Source

/*--
 * Copyright 2011 Ren M. de Bloois
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package solidbase.core.plugins;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.lang.StringUtils;

import solidbase.core.Command;
import solidbase.core.CommandListener;
import solidbase.core.CommandProcessor;
import solidbase.core.FatalException;
import solidbase.core.SourceException;
import solidbase.core.SystemException;
import solidbase.util.Assert;
import solidbase.util.Counter;
import solidbase.util.FixedCounter;
import solidbase.util.JDBCSupport;
import solidbase.util.JSONArray;
import solidbase.util.JSONObject;
import solidbase.util.JSONWriter;
import solidbase.util.SQLTokenizer;
import solidbase.util.SQLTokenizer.Token;
import solidbase.util.TimedCounter;
import solidstack.io.DeferringWriter;
import solidstack.io.FileResource;
import solidstack.io.Resource;
import solidstack.io.Resources;
import solidstack.io.SourceReaders;
import solidstack.script.scopes.AbstractScope;

/**
 * This plugin executes EXPORT CSV statements.
 *
 * @author Ren M. de Bloois
 * @since Aug 12, 2011
 */
// TODO To compressed file
public class DumpJSON implements CommandListener {
    static private final Pattern triggerPattern = Pattern.compile("\\s*DUMP\\s+JSON\\s+.*",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    //@Override
    // TODO Escape dynamic file names, because illegal characters may be generated
    // TODO Export multiple tables to a single file. If no PK than sort on all columns. Schema name for import or not?
    public boolean execute(CommandProcessor processor, Command command, boolean skip) throws SQLException {
        if (!triggerPattern.matcher(command.getCommand()).matches())
            return false;

        if (command.isTransient()) {
            /* DUMP JSON DATE_CREATED ON | OFF */

            SQLTokenizer tokenizer = new SQLTokenizer(
                    SourceReaders.forString(command.getCommand(), command.getLocation()));

            // TODO Maybe DUMP JSON CONFIG or DUMP JSON SET
            // TODO What about other configuration settings?
            tokenizer.get("DUMP");
            tokenizer.get("JSON");
            tokenizer.get("DATE_CREATED"); // FIXME This should be CREATED_DATE
            Token t = tokenizer.get("ON", "OFF");
            tokenizer.get((String) null);

            // TODO I think we should have a scope that is restricted to the current file and a scope that gets inherited when running or including another file.
            AbstractScope scope = processor.getContext().getScope();
            scope.set("solidbase.dump_json.dateCreated", t.eq("ON")); // TODO Make this a constant

            return true;
        }

        if (skip)
            return true;

        Parsed parsed = parse(command);

        AbstractScope scope = processor.getContext().getScope();
        Object object = scope.get("solidbase.dump_json.dateCreated");
        boolean dateCreated = object == null || object instanceof Boolean && (Boolean) object;

        Resource jsvResource = new FileResource(new File(parsed.fileName)); // Relative to current folder

        try {
            OutputStream out = jsvResource.getOutputStream();
            if (parsed.gzip)
                out = new BufferedOutputStream(new GZIPOutputStream(out, 65536), 65536); // TODO Ctrl-C, close the outputstream?

            JSONWriter jsonWriter = new JSONWriter(out);
            try {
                Statement statement = processor.createStatement();
                try {
                    ResultSet result = statement.executeQuery(parsed.query);
                    ResultSetMetaData metaData = result.getMetaData();

                    // Define locals

                    int columns = metaData.getColumnCount();
                    int[] types = new int[columns];
                    String[] names = new String[columns];
                    boolean[] ignore = new boolean[columns];
                    FileSpec[] fileSpecs = new FileSpec[columns];
                    String schemaNames[] = new String[columns];
                    String tableNames[] = new String[columns];

                    // Analyze metadata

                    for (int i = 0; i < columns; i++) {
                        int col = i + 1;
                        String name = metaData.getColumnName(col).toUpperCase();
                        types[i] = metaData.getColumnType(col);
                        if (types[i] == Types.DATE && parsed.dateAsTimestamp)
                            types[i] = Types.TIMESTAMP;
                        names[i] = name;
                        if (parsed.columns != null) {
                            ColumnSpec columnSpec = parsed.columns.get(name);
                            if (columnSpec != null)
                                if (columnSpec.skip)
                                    ignore[i] = true;
                                else
                                    fileSpecs[i] = columnSpec.toFile;
                        }
                        if (parsed.coalesce != null && parsed.coalesce.notFirst(name))
                            ignore[i] = true;
                        // TODO STRUCT serialize
                        // TODO This must be optional and not the default
                        else if (types[i] == 2002 || JDBCSupport.toTypeName(types[i]) == null)
                            ignore[i] = true;
                        tableNames[i] = StringUtils
                                .upperCase(StringUtils.defaultIfEmpty(metaData.getTableName(col), null));
                        schemaNames[i] = StringUtils
                                .upperCase(StringUtils.defaultIfEmpty(metaData.getSchemaName(col), null));
                    }

                    if (parsed.coalesce != null)
                        parsed.coalesce.bind(names);

                    // Write header

                    JSONObject properties = new JSONObject();
                    properties.set("version", "1.0");
                    properties.set("format", "record-stream");
                    properties.set("description", "SolidBase JSON Data Dump File");
                    properties.set("createdBy", new JSONObject("product", "SolidBase", "version", "2.0.0"));

                    if (dateCreated) {
                        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                        properties.set("createdDate", format.format(new Date()));
                    }

                    if (parsed.binaryFileName != null) {
                        // TODO FIXME Should be wrapped in a SourceException: solidbase.solidstack.io.FatalURISyntaxException: java.net.URISyntaxException: Illegal character in path at index 1: &{folder}/JIADHOCCH
                        Resource binResource = Resources.getResource(parsed.binaryFileName);
                        Resource resource = Resources.getResource(parsed.fileName);
                        properties.set("binaryFile", binResource.getPathFrom(resource).toString());
                    }

                    JSONArray fields = new JSONArray();
                    properties.set("fields", fields);
                    for (int i = 0; i < columns; i++)
                        if (!ignore[i]) {
                            JSONObject field = new JSONObject();
                            field.set("schemaName", schemaNames[i]);
                            field.set("tableName", tableNames[i]);
                            field.set("name", names[i]);
                            field.set("type", JDBCSupport.toTypeName(types[i])); // TODO Better error message when type is not recognized, for example Oracle's 2007 for a user type
                            FileSpec spec = fileSpecs[i];
                            if (spec != null && !spec.generator.isDynamic()) {
                                Resource fileResource = new FileResource(spec.generator.fileName);
                                field.set("file", fileResource.getPathFrom(jsvResource).toString());
                            }
                            fields.add(field);
                        }

                    FileSpec binaryFile = parsed.binaryFileName != null
                            ? new FileSpec(true, parsed.binaryFileName, 0)
                            : null;

                    jsonWriter.writeFormatted(properties, 120);
                    jsonWriter.getWriter().write('\n');

                    Counter counter = null;
                    if (parsed.logRecords > 0)
                        counter = new FixedCounter(parsed.logRecords);
                    else if (parsed.logSeconds > 0)
                        counter = new TimedCounter(parsed.logSeconds);

                    try {
                        while (result.next()) {
                            Object[] values = new Object[columns];
                            for (int i = 0; i < values.length; i++)
                                values[i] = JDBCSupport.getValue(result, types, i);

                            if (parsed.coalesce != null)
                                parsed.coalesce.coalesce(values);

                            JSONArray array = new JSONArray();
                            for (int i = 0; i < columns; i++)
                                if (!ignore[i]) {
                                    Object value = values[i];
                                    if (value == null) {
                                        array.add(null);
                                        continue;
                                    }

                                    // TODO 2 columns can't be written to the same dynamic filename

                                    FileSpec spec = fileSpecs[i];
                                    if (spec != null) // The column is redirected to its own file
                                    {
                                        String relFileName = null;
                                        int startIndex;
                                        if (spec.binary) {
                                            if (spec.generator.isDynamic()) {
                                                String fileName = spec.generator.generateFileName(result);
                                                Resource fileResource = new FileResource(fileName);
                                                spec.out = fileResource.getOutputStream();
                                                spec.index = 0;
                                                relFileName = fileResource.getPathFrom(jsvResource).toString();
                                            } else if (spec.out == null) {
                                                String fileName = spec.generator.generateFileName(result);
                                                Resource fileResource = new FileResource(fileName);
                                                spec.out = fileResource.getOutputStream();
                                            }
                                            if (value instanceof Blob) {
                                                InputStream in = ((Blob) value).getBinaryStream();
                                                startIndex = spec.index;
                                                byte[] buf = new byte[4096];
                                                for (int read = in.read(buf); read >= 0; read = in.read(buf)) {
                                                    spec.out.write(buf, 0, read);
                                                    spec.index += read;
                                                }
                                                in.close();
                                            } else if (value instanceof byte[]) {
                                                startIndex = spec.index;
                                                spec.out.write((byte[]) value);
                                                spec.index += ((byte[]) value).length;
                                            } else
                                                throw new SourceException(names[i] + " ("
                                                        + value.getClass().getName()
                                                        + ") is not a binary column. Only binary columns like BLOB, RAW, BINARY VARYING can be written to a binary file",
                                                        command.getLocation());
                                            if (spec.generator.isDynamic()) {
                                                spec.out.close();
                                                JSONObject ref = new JSONObject();
                                                ref.set("file", relFileName);
                                                ref.set("size", spec.index - startIndex);
                                                array.add(ref);
                                            } else {
                                                JSONObject ref = new JSONObject();
                                                ref.set("index", startIndex);
                                                ref.set("length", spec.index - startIndex);
                                                array.add(ref);
                                            }
                                        } else {
                                            if (spec.generator.isDynamic()) {
                                                String fileName = spec.generator.generateFileName(result);
                                                Resource fileResource = new FileResource(fileName);
                                                spec.writer = new DeferringWriter(spec.threshold, fileResource,
                                                        jsonWriter.getEncoding());
                                                spec.index = 0;
                                                relFileName = fileResource.getPathFrom(jsvResource).toString();
                                            } else if (spec.writer == null) {
                                                String fileName = spec.generator.generateFileName(result);
                                                Resource fileResource = new FileResource(fileName);
                                                spec.writer = new OutputStreamWriter(fileResource.getOutputStream(),
                                                        jsonWriter.getEncoding());
                                            }
                                            if (value instanceof Blob || value instanceof byte[])
                                                throw new SourceException(names[i]
                                                        + " is a binary column. Binary columns like BLOB, RAW, BINARY VARYING cannot be written to a text file",
                                                        command.getLocation());
                                            if (value instanceof Clob) {
                                                Reader in = ((Clob) value).getCharacterStream();
                                                startIndex = spec.index;
                                                char[] buf = new char[4096];
                                                for (int read = in.read(buf); read >= 0; read = in.read(buf)) {
                                                    spec.writer.write(buf, 0, read);
                                                    spec.index += read;
                                                }
                                                in.close();
                                            } else {
                                                String val = value.toString();
                                                startIndex = spec.index;
                                                spec.writer.write(val);
                                                spec.index += val.length();
                                            }
                                            if (spec.generator.isDynamic()) {
                                                DeferringWriter writer = (DeferringWriter) spec.writer;
                                                if (writer.isBuffered())
                                                    array.add(writer.clearBuffer());
                                                else {
                                                    JSONObject ref = new JSONObject();
                                                    ref.set("file", relFileName);
                                                    ref.set("size", spec.index - startIndex);
                                                    array.add(ref);
                                                }
                                                writer.close();
                                            } else {
                                                JSONObject ref = new JSONObject();
                                                ref.set("index", startIndex);
                                                ref.set("length", spec.index - startIndex);
                                                array.add(ref);
                                            }
                                        }
                                    } else if (value instanceof Clob)
                                        array.add(((Clob) value).getCharacterStream());
                                    else if (binaryFile != null
                                            && (value instanceof Blob || value instanceof byte[])) {
                                        if (binaryFile.out == null) {
                                            String fileName = binaryFile.generator.generateFileName(null);
                                            Resource fileResource = new FileResource(fileName);
                                            binaryFile.out = fileResource.getOutputStream();
                                            if (parsed.binaryGzip)
                                                binaryFile.out = new BufferedOutputStream(
                                                        new GZIPOutputStream(binaryFile.out, 65536), 65536); // TODO Ctrl-C, close the outputstream?
                                        }
                                        int startIndex = binaryFile.index;
                                        if (value instanceof Blob) {
                                            InputStream in = ((Blob) value).getBinaryStream();
                                            byte[] buf = new byte[4096];
                                            for (int read = in.read(buf); read >= 0; read = in.read(buf)) {
                                                binaryFile.out.write(buf, 0, read);
                                                binaryFile.index += read;
                                            }
                                            in.close();
                                        } else {
                                            binaryFile.out.write((byte[]) value);
                                            binaryFile.index += ((byte[]) value).length;
                                        }
                                        JSONObject ref = new JSONObject();
                                        ref.set("index", startIndex);
                                        ref.set("length", binaryFile.index - startIndex);
                                        array.add(ref);
                                    } else
                                        array.add(value);
                                }

                            for (ListIterator<Object> i = array.iterator(); i.hasNext();) {
                                Object value = i.next();
                                if (value instanceof java.sql.Date || value instanceof java.sql.Time
                                        || value instanceof java.sql.Timestamp || value instanceof java.sql.RowId)
                                    i.set(value.toString());
                            }
                            jsonWriter.write(array);
                            jsonWriter.getWriter().write('\n');

                            if (counter != null && counter.next())
                                processor.getProgressListener()
                                        .println("Exported " + counter.total() + " records.");
                        }
                        if (counter != null && counter.needFinal())
                            processor.getProgressListener().println("Exported " + counter.total() + " records.");
                    } finally {
                        // Close files that have been left open
                        for (FileSpec fileSpec : fileSpecs)
                            if (fileSpec != null) {
                                if (fileSpec.out != null)
                                    fileSpec.out.close();
                                if (fileSpec.writer != null)
                                    fileSpec.writer.close();
                            }
                        if (binaryFile != null && binaryFile.out != null)
                            binaryFile.out.close();
                    }
                } finally {
                    processor.closeStatement(statement, true);
                }
            } finally {
                jsonWriter.close();
            }
        } catch (IOException e) {
            throw new SystemException(e);
        }

        return true;
    }

    /**
     * Parses the given command.
     *
     * @param command The command to be parsed.
     * @return A structure representing the parsed command.
     */
    static protected Parsed parse(Command command) {
        /*
        DUMP JSON
        DATE AS TIMESTAMP
        COALESCE "<col1>", "<col2>"
        LOG EVERY n RECORDS|SECONDS
        FILE "file" GZIP
        BINARY FILE "file" GZIP
        COLUMN col1, col2 TO BINARY|TEXT FILE "file" THRESHOLD n
        */

        Parsed result = new Parsed();

        SQLTokenizer tokenizer = new SQLTokenizer(
                SourceReaders.forString(command.getCommand(), command.getLocation()));

        tokenizer.get("DUMP");
        tokenizer.get("JSON");

        Token t = tokenizer.get("DATE", "COALESCE", "LOG", "FILE");

        if (t.eq("DATE")) {
            tokenizer.get("AS");
            tokenizer.get("TIMESTAMP");

            result.dateAsTimestamp = true;

            t = tokenizer.get("COALESCE", "LOG", "FILE");
        }

        while (t.eq("COALESCE")) {
            if (result.coalesce == null)
                result.coalesce = new Coalescer();

            t = tokenizer.get();
            if (!t.isString())
                throw new SourceException("Expecting column name enclosed in double quotes, not [" + t + "]",
                        tokenizer.getLocation());
            result.coalesce.first(t.stripQuotes());

            t = tokenizer.get(",");
            do {
                t = tokenizer.get();
                if (!t.isString())
                    throw new SourceException("Expecting column name enclosed in double quotes, not [" + t + "]",
                            tokenizer.getLocation());
                result.coalesce.next(t.stripQuotes());

                t = tokenizer.get();
            } while (t.eq(","));

            result.coalesce.end();
        }

        tokenizer.expect(t, "LOG", "FILE");

        if (t.eq("LOG")) {
            tokenizer.get("EVERY");
            t = tokenizer.get();
            if (!t.isNumber())
                throw new SourceException("Expecting a number, not [" + t + "]", tokenizer.getLocation());

            int interval = Integer.parseInt(t.getValue());
            t = tokenizer.get("RECORDS", "SECONDS");
            if (t.eq("RECORDS"))
                result.logRecords = interval;
            else
                result.logSeconds = interval;

            tokenizer.get("FILE");
        }

        t = tokenizer.get();
        if (!t.isString())
            throw new SourceException("Expecting filename enclosed in double quotes, not [" + t + "]",
                    tokenizer.getLocation());
        result.fileName = t.stripQuotes();

        t = tokenizer.get();
        if (t.eq("GZIP")) {
            result.gzip = true;
            t = tokenizer.get();
        }

        if (t.eq("BINARY")) {
            tokenizer.get("FILE");
            t = tokenizer.get();
            if (!t.isString())
                throw new SourceException("Expecting filename enclosed in double quotes, not [" + t + "]",
                        tokenizer.getLocation());
            result.binaryFileName = t.stripQuotes();

            t = tokenizer.get();
            if (t.eq("GZIP")) {
                result.binaryGzip = true;
                t = tokenizer.get();
            }
        }

        if (t.eq("COLUMN")) {
            result.columns = new HashMap<String, ColumnSpec>();
            while (t.eq("COLUMN")) {
                List<Token> columns = new ArrayList<Token>();
                columns.add(tokenizer.get());
                t = tokenizer.get();
                while (t.eq(",")) {
                    columns.add(tokenizer.get());
                    t = tokenizer.get();
                }
                tokenizer.push(t);

                ColumnSpec columnSpec;
                t = tokenizer.get("TO", "SKIP");
                if (t.eq("TO")) {
                    t = tokenizer.get("BINARY", "TEXT");
                    boolean binary = t.eq("BINARY");
                    tokenizer.get("FILE");
                    t = tokenizer.get();
                    String fileName = t.getValue();
                    if (!fileName.startsWith("\""))
                        throw new SourceException("Expecting filename enclosed in double quotes, not [" + t + "]",
                                tokenizer.getLocation());
                    fileName = fileName.substring(1, fileName.length() - 1);

                    t = tokenizer.get();
                    int threshold = 0;
                    if (t.eq("THRESHOLD")) {
                        threshold = Integer.parseInt(tokenizer.get().getValue());
                        t = tokenizer.get();
                    }

                    columnSpec = new ColumnSpec(false, new FileSpec(binary, fileName, threshold));
                } else {
                    columnSpec = new ColumnSpec(true, null);
                    t = tokenizer.get();
                }

                for (Token column : columns)
                    result.columns.put(column.getValue().toUpperCase(), columnSpec);
            }
        }
        tokenizer.push(t);

        result.query = tokenizer.getRemaining();

        return result;
    }

    //@Override
    public void terminate() {
        // Nothing to clean up
    }

    /**
     * A parsed command.
     *
     * @author Ren M. de Bloois
     */
    static protected class Parsed {
        /** The file path to export to */
        protected String fileName;
        protected boolean gzip;

        protected String binaryFileName;
        protected boolean binaryGzip;

        /** The query */
        protected String query;

        protected boolean dateAsTimestamp;

        /** Which columns need to be coalesced */
        protected Coalescer coalesce;

        protected int logRecords;
        protected int logSeconds;

        protected Map<String, ColumnSpec> columns;
    }

    static protected class FileSpec {
        protected boolean binary;
        protected int threshold;

        protected FileNameGenerator generator;
        protected OutputStream out;
        protected Writer writer;
        protected int index;

        protected FileSpec(boolean binary, String fileName, int threshold) {
            this.binary = binary;
            this.threshold = threshold;
            this.generator = new FileNameGenerator(fileName);
        }
    }

    static protected class ColumnSpec {
        protected boolean skip;
        protected FileSpec toFile;

        protected ColumnSpec(boolean skip, FileSpec toFile) {
            this.skip = skip;
            this.toFile = toFile;
        }
    }

    static protected class FileNameGenerator {
        protected final Pattern pattern = Pattern.compile("\\?(\\d+)");
        protected String fileName;
        protected boolean generic;

        protected FileNameGenerator(String fileName) {
            this.fileName = fileName;
            this.generic = this.pattern.matcher(fileName).find();
        }

        protected boolean isDynamic() {
            return this.generic;
        }

        protected String generateFileName(ResultSet resultSet) {

            Matcher matcher = this.pattern.matcher(this.fileName);
            StringBuffer result = new StringBuffer();
            while (matcher.find()) {
                int index = Integer.parseInt(matcher.group(1));
                try {
                    matcher.appendReplacement(result, resultSet.getString(index));
                } catch (SQLException e) {
                    throw new SystemException(e);
                }
            }
            matcher.appendTail(result);
            return result.toString();
        }
    }

    static protected class Coalescer {
        //      protected Set< String > first = new HashSet();
        protected Set<String> next = new HashSet<String>();
        protected List<List<String>> names = new ArrayList<List<String>>();
        protected List<List<Integer>> indexes = new ArrayList<List<Integer>>();
        protected List<String> temp;
        protected List<Integer> temp2;

        public void first(String name) {
            //         this.first.add( name );

            Assert.isNull(this.temp);
            this.temp = new ArrayList<String>();
            this.temp.add(name);
            this.temp2 = new ArrayList<Integer>();
            this.temp2.add(null);
        }

        public void next(String name) {
            this.next.add(name);

            Assert.notNull(this.temp);
            this.temp.add(name);
            this.temp2.add(null);
        }

        public void end() {
            this.names.add(this.temp);
            this.indexes.add(this.temp2);
            this.temp = null;
            this.temp2 = null;
        }

        public boolean notFirst(String name) {
            return this.next.contains(name);
        }

        public void bind(String[] names) {
            for (int i = 0; i < this.names.size(); i++) {
                List<String> nams = this.names.get(i);
                List<Integer> indexes = this.indexes.get(i);
                for (int j = 0; j < nams.size(); j++) {
                    String name = nams.get(j);
                    int found = -1;
                    for (int k = 0; k < names.length; k++)
                        if (name.equals(names[k])) {
                            found = k;
                            break;
                        }
                    if (found < 0)
                        throw new FatalException("Coalesce column " + name + " not in result set");
                    indexes.set(j, found);
                }
            }
        }

        public void coalesce(Object[] values) {
            for (int i = 0; i < this.indexes.size(); i++) {
                List<Integer> indexes = this.indexes.get(i);
                int firstIndex = indexes.get(0);
                if (values[firstIndex] == null) {
                    Object found = null;
                    for (int j = 1; j < indexes.size(); j++) {
                        found = values[indexes.get(j)];
                        if (found != null)
                            break;
                    }
                    values[firstIndex] = found;
                }
            }
        }
    }
}