fr.inria.oak.paxquery.pact.io.XmlConsTreePatternOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for fr.inria.oak.paxquery.pact.io.XmlConsTreePatternOutputFormat.java

Source

/*******************************************************************************
 * Copyright (C) 2013, 2014, 2015 by Inria and Paris-Sud University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package fr.inria.oak.paxquery.pact.io;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.xml.bind.DatatypeConverter;

import org.apache.commons.lang.SerializationUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.flink.api.java.record.io.FileOutputFormat;
import org.apache.flink.api.java.record.operators.FileDataSink;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileSystem.WriteMode;
import org.apache.flink.types.Record;
import org.apache.flink.types.StringValue;

import fr.inria.oak.paxquery.common.datamodel.metadata.NestedMetadata;
import fr.inria.oak.paxquery.common.xml.construction.ConstructionTreePattern;
import fr.inria.oak.paxquery.common.xml.construction.ConstructionTreePatternEdge;
import fr.inria.oak.paxquery.common.xml.construction.ConstructionTreePatternNode;
import fr.inria.oak.paxquery.common.xml.construction.ConstructionTreePatternNode.ContentType;
import fr.inria.oak.paxquery.pact.configuration.PACTOperatorsConfiguration;
import fr.inria.oak.paxquery.pact.datamodel.type.RecordList;

/**
 * Output format that generates XML results out of records based on
 * a construction tree pattern.
 *
 */
public class XmlConsTreePatternOutputFormat extends FileOutputFormat {

    private static final Log logger = LogFactory.getLog(XmlConsTreePatternOutputFormat.class);

    private Writer wrt;

    private NestedMetadata signature;

    private ConstructionTreePattern ctp;

    @Override
    public void configure(Configuration parameters) {
        super.configure(parameters);

        this.setWriteMode(WriteMode.OVERWRITE);

        // read your own parameters
        String recordsSignatureEncoded = parameters.getString(PACTOperatorsConfiguration.NRSMD1_BINARY.toString(),
                null);
        byte[] recordsSignatureBytes = DatatypeConverter.parseBase64Binary(recordsSignatureEncoded);
        final NestedMetadata signature = (NestedMetadata) SerializationUtils.deserialize(recordsSignatureBytes);
        this.signature = signature;

        String ctpEncoded = parameters.getString(PACTOperatorsConfiguration.CTP_BINARY.toString(), null);
        byte[] ctpBytes = DatatypeConverter.parseBase64Binary(ctpEncoded);
        final ConstructionTreePattern ctp = (ConstructionTreePattern) SerializationUtils.deserialize(ctpBytes);
        this.ctp = ctp;
    }

    /*
    @Override
    public void open(int taskNumber) throws IOException {
       super.open(taskNumber);
            
       this.wrt = new OutputStreamWriter(new BufferedOutputStream(this.stream, 4096));
       this.wrt.write(this.apply.getBefore());
    }
    */
    @Override
    public void open(int taskNumber, int numTasks) throws IOException {
        super.open(taskNumber, numTasks);

        this.wrt = new OutputStreamWriter(new BufferedOutputStream(this.stream, 4096));
    }

    @Override
    public void close() throws IOException {
        this.wrt.close();
        super.close();
    }

    @Override
    public void writeRecord(Record record) throws IOException {
        RecordList recordList = new RecordList();
        recordList.add(record);
        StringBuilder sb = writeRecord(recordList, this.signature, new ConstructionTreePattern[] { this.ctp },
                new AtomicBoolean[] { new AtomicBoolean() })[0][0];
        this.wrt.append(sb);
    }

    private void printRecord(Record record) {
        System.out.println("RECORD: ");
        for (int i = 0; i < record.getNumFields(); i++) {
            System.out.print(i + ": " + record.getField(i, StringValue.class));
        }
        System.out.println();
    }

    private StringBuilder[][] writeRecord(RecordList listRecords, NestedMetadata signature,
            ConstructionTreePattern[] ctps, AtomicBoolean[] nullResults) throws IOException {
        StringBuilder[][] result = new StringBuilder[listRecords.size()][ctps.length];
        for (int i = 0; i < listRecords.size(); i++) {
            for (int j = 0; j < ctps.length; j++) {
                result[i][j] = new StringBuilder();
            }
        }

        for (int i = 0; i < listRecords.size(); i++) { //For each record
            Record record = listRecords.get(i);

            //printRecord(record);

            for (int j = 0; j < ctps.length; j++) { //For each CTP
                ConstructionTreePattern ctp = ctps[j];
                ConstructionTreePatternNode ctpNode = ctp.getRoot();
                List<ConstructionTreePatternEdge> childrenEdges = ctp.getChildrenEdges().get(ctpNode);

                StringBuilder[][] resultChildren = null;
                AtomicBoolean[] nullResultChildren = null;
                if (childrenEdges != null && childrenEdges.size() != 0) {
                    //Create list CTPs from child nodes
                    ConstructionTreePattern[] newCtps = new ConstructionTreePattern[childrenEdges.size()];
                    for (int k = 0; k < newCtps.length; k++) {
                        newCtps[k] = ConstructionTreePattern.deepCopySubtree(childrenEdges.get(k).getChild());
                    }
                    //Holder for booleans for null results
                    nullResultChildren = new AtomicBoolean[childrenEdges.size()];
                    for (int k = 0; k < nullResultChildren.length; k++) {
                        nullResultChildren[k] = new AtomicBoolean();
                    }
                    //Create list records
                    RecordList newListRecords;
                    NestedMetadata newSignature;
                    if (ctpNode.getContentType() == ContentType.VARIABLE_PATH) {
                        newListRecords = record.getField(ctpNode.getVarPath().get(0), RecordList.class);
                        newSignature = signature.getNestedChild(ctpNode.getVarPath().get(0));
                    } else {
                        newListRecords = new RecordList();
                        newListRecords.add(record);
                        newSignature = signature;
                    }
                    //Obtain result children
                    resultChildren = writeRecord(newListRecords, newSignature, newCtps, nullResultChildren);
                }

                //Construct the subtree starting at this node
                StringBuilder ctpNodeResult = new StringBuilder();
                boolean allNull = allNullUnderNode(nullResultChildren);
                if (!ctpNode.isOptional() || !allNull) {
                    //
                    if (ctpNode.getContentType() == ContentType.ELEMENT) {
                        ctpNodeResult.append("<" + ctpNode.getValue());
                        int k;
                        for (k = 0; childrenEdges != null && k < childrenEdges.size()
                                && childrenEdges.get(k).getChild().getContentType() == ContentType.ATTRIBUTE; k++) {
                            ctpNodeResult.append(" " + resultChildren[0][k].toString());
                        }
                        if (childrenEdges == null || k == childrenEdges.size()) {
                            ctpNodeResult.append("/>");
                        } else {
                            ctpNodeResult.append(">");
                            for (; k < childrenEdges.size(); k++) {
                                ctpNodeResult.append(resultChildren[0][k].toString());
                            }
                            ctpNodeResult.append("</" + ctpNode.getValue() + ">");
                        }
                    } else if (ctpNode.getContentType() == ContentType.ATTRIBUTE) {
                        ctpNodeResult.append(ctpNode.getValue() + "=\"" + resultChildren[0][0].toString() + "\"");
                    } else if (ctpNode.getContentType() == ContentType.ELEMENT_VALUE) {
                        ctpNodeResult.append(ctpNode.getValue());
                    } else if (ctpNode.getContentType() == ContentType.ATTRIBUTE_VALUE) {
                        ctpNodeResult.append(ctpNode.getValue());
                    } else if (ctpNode.getContentType() == ContentType.VARIABLE_PATH
                            && (childrenEdges == null || childrenEdges.size() == 0)) {
                        allNull = true;
                        //Create content from the record
                        List<Integer> varPath = ctpNode.getVarPath();
                        if (varPath.size() == 1) {
                            StringValue v = record.getField(varPath.get(0), StringValue.class);
                            if (!v.getValue().equals("\0")) {
                                ctpNodeResult.append(v);
                                allNull = false;
                            }
                        } else {
                            RecordList list = record.getField(varPath.get(0), RecordList.class);
                            for (int k = 1; k < varPath.size() - 1; k++) {
                                RecordList newList = new RecordList();
                                for (Record nestedRecord : list) {
                                    newList.addAll(nestedRecord.getField(varPath.get(k), RecordList.class));
                                }
                                list = newList;
                            }
                            for (Record nestedRecord : list) {
                                StringValue v = nestedRecord.getField(varPath.get(varPath.size() - 1),
                                        StringValue.class);
                                if (!v.getValue().equals("\0")) {
                                    ctpNodeResult.append(v);
                                    allNull = false;
                                }
                            }
                        }
                    } else { //childrenEdges != null
                        //Copy content from children
                        for (int x = 0; x < resultChildren.length; x++) {
                            for (int k = 0; k < resultChildren[x].length; k++) {
                                ctpNodeResult.append(resultChildren[x][k].toString());
                            }
                        }
                    }
                }
                result[i][j].append(ctpNodeResult);
                nullResults[j].set(allNull);
            }
        }

        return result;
    }

    //Return true if all elements in the array are true
    private boolean allNullUnderNode(AtomicBoolean[] nullResults) {
        if (nullResults == null) {
            return false;
        }

        for (AtomicBoolean nullResult : nullResults) {
            if (!nullResult.get()) {
                return false;
            }
        }

        return true;
    }

    // ============================================================================================

    /**
     * Creates a configuration builder that can be used to set the input format's parameters to the config in a fluent
     * fashion.
     * 
     * @return A config builder for setting parameters.
     */
    public static ConfigBuilder configureRecordFormat(FileDataSink target) {
        return new ConfigBuilder(target.getParameters());
    }

    /**
     * Abstract builder used to set parameters to the input format's configuration in a fluent way.
     */
    protected static abstract class AbstractConfigBuilder<T> extends FileOutputFormat.AbstractConfigBuilder<T> {
        // --------------------------------------------------------------------

        /**
         * Creates a new builder for the given configuration.
         * 
         * @param targetConfig The configuration into which the parameters will be written.
         */
        protected AbstractConfigBuilder(Configuration config) {
            super(config);
        }

        // --------------------------------------------------------------------

        public T setSignature(NestedMetadata signature) {
            final String encodedSignature = DatatypeConverter
                    .printBase64Binary(SerializationUtils.serialize(signature));
            this.config.setString(PACTOperatorsConfiguration.NRSMD1_BINARY.toString(),
                    String.valueOf(encodedSignature));

            @SuppressWarnings("unchecked")
            T ret = (T) this;
            return ret;
        }

        public T setConstructionTreePattern(ConstructionTreePattern ctp) {
            final String encodedCtp = DatatypeConverter.printBase64Binary(SerializationUtils.serialize(ctp));
            this.config.setString(PACTOperatorsConfiguration.CTP_BINARY.toString(), String.valueOf(encodedCtp));

            @SuppressWarnings("unchecked")
            T ret = (T) this;
            return ret;
        }
    }

    /**
     * A builder used to set parameters to the input format's configuration in a fluent way.
     */
    public static final class ConfigBuilder extends AbstractConfigBuilder<ConfigBuilder> {
        /**
         * Creates a new builder for the given configuration.
         * 
         * @param targetConfig The configuration into which the parameters will be written.
         */
        protected ConfigBuilder(Configuration targetConfig) {
            super(targetConfig);
        }

    }
}