com.ge.research.semtk.load.utility.ImportSpecHandler.java Source code

Java tutorial

Introduction

Here is the source code for com.ge.research.semtk.load.utility.ImportSpecHandler.java

Source

/**
 ** Copyright 2016 General Electric Company
 **
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** 
 **     http://www.apache.org/licenses/LICENSE-2.0
 ** 
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 */

package com.ge.research.semtk.load.utility;

import java.net.URI;
import java.sql.Time;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.UUID;

import org.apache.commons.lang.StringUtils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;

import com.ge.research.semtk.belmont.Node;
import com.ge.research.semtk.belmont.NodeGroup;
import com.ge.research.semtk.belmont.PropertyItem;
import com.ge.research.semtk.load.transform.Transform;
import com.ge.research.semtk.load.transform.TransformInfo;
import com.ge.research.semtk.load.utility.UriResolver;
import com.ge.research.semtk.ontologyTools.OntologyInfo;
import com.ge.research.semtk.sparqlX.SparqlToXUtils;
import com.ge.research.semtk.utility.Utility;

public class ImportSpecHandler {

    JSONObject importspec = null; // ultimately, this should be replaced by a more complext object 
    // that does not endlessly reprocessthe same json.
    HashMap<String, Integer> headerPositioningInfo = new HashMap<String, Integer>();
    HashMap<String, Transform> transformsAvailable = new HashMap<String, Transform>();
    HashMap<String, String> textsAvailable = new HashMap<String, String>();
    HashMap<String, String> colsAvailable = new HashMap<String, String>();
    HashMap<String, Integer> colsUsed = new HashMap<String, Integer>(); // count of cols used.  Only includes counts > 0

    UriResolver uriResolver;

    public ImportSpecHandler(JSONObject spec, OntologyInfo oInf) throws Exception {
        this.importspec = spec;
        this.setupColumns();
        this.setupColsUsed();
        this.setupTransforms();
        this.setupTexts();
        String userUriPrefixValue = (String) this.importspec.get("baseURI");

        // check the value of the UserURI Prefix
        // System.err.println("User uri prefix set to: " +  userUriPrefixValue);

        this.uriResolver = new UriResolver(userUriPrefixValue, oInf);
    }

    public ImportSpecHandler(JSONObject spec, ArrayList<String> headers, OntologyInfo oInf) throws Exception {
        this(spec, oInf);
        this.setHeaders(headers);
    }

    public void setHeaders(ArrayList<String> headers) {
        int counter = 0;
        for (String h : headers) {
            this.headerPositioningInfo.put(h, counter);
            counter += 1;
        }
    }

    public String getUriPrefix() {
        return uriResolver.getUriPrefix();
    }

    /**
     * Populate the transforms with the correct instances based on the inportspec.
     * @throws Exception 
     */
    private void setupTransforms() throws Exception {
        // get the transforms, if any
        JSONArray transformInfo = (JSONArray) this.importspec.get("transforms");
        if (transformInfo == null) {
            // in the event there was no transform block found in the JSON, just return.
            // thereafter, there are no transforms looked up or found.
            return;
        }

        for (int j = 0; j < transformInfo.size(); ++j) {
            JSONObject xform = (JSONObject) transformInfo.get(j);
            String instanceID = (String) xform.get("transId"); // get the instanceID for the transform
            String transType = (String) xform.get("transType"); // get the xform type 

            // go through all the entries besides "name", "transType", "transId" and 
            // add them to the outgoing HashMap to be sent to the transform creation.
            int totalArgs = TransformInfo.getArgCount(transType);

            // get the args.
            HashMap<String, String> args = new HashMap<String, String>();
            for (int argCounter = 1; argCounter <= totalArgs; argCounter += 1) {
                // get the current argument
                args.put("arg" + argCounter, (String) xform.get("arg" + argCounter));
            }

            // get the transform instance.
            Transform currXform = TransformInfo.buildTransform(transType, instanceID, args);

            // add it to the hashMap.
            this.transformsAvailable.put(instanceID, currXform);
        }
    }

    /**
     * Populate the texts with the correct instances based on the importspec.
     * @throws Exception 
     */
    private void setupTexts() throws Exception {
        // get the texts, if any
        JSONArray textsInfo = (JSONArray) this.importspec.get("texts");
        if (textsInfo == null) {
            return;
        }

        for (int j = 0; j < textsInfo.size(); ++j) {
            JSONObject textJson = (JSONObject) textsInfo.get(j);
            String instanceID = (String) textJson.get("textId");
            String textVal = (String) textJson.get("text");
            this.textsAvailable.put(instanceID, textVal);
        }
    }

    /**
     * Populate the texts with the correct instances based on the importspec.
     * @throws Exception 
     */
    private void setupColumns() throws Exception {
        // get the texts, if any
        JSONArray colsInfo = (JSONArray) this.importspec.get("columns");
        if (colsInfo == null) {
            return;
        }

        for (int j = 0; j < colsInfo.size(); ++j) {
            JSONObject colsJson = (JSONObject) colsInfo.get(j);
            String colId = (String) colsJson.get("colId");
            String colName = ((String) colsJson.get("colName")).toLowerCase();
            this.colsAvailable.put(colId, colName);
        }
    }

    /**
     * Populate nodegroup with a single record (row) of data
     */
    public NodeGroup importRecord(NodeGroup ng, ArrayList<String> record) throws Exception {
        // this is a really naive implementation. it could probably be sped up drastically by caching
        // the digested template instead of re-processing it for eac incoming record.
        // the lack of conditional logic in this method would make that do-able. 

        NodeGroup retval = ng;
        if (ng == null) {
            throw new Exception("Null nodegroup passed to ImportSpecHandler.getValues");
        }
        if (record == null) {
            throw new Exception("incoming record cannot be null for ImportSpecHandler.getValues");
        }
        if (this.headerPositioningInfo.isEmpty()) {
            throw new Exception("the header positions were never set for the importspechandler");
        }

        JSONArray nodes = (JSONArray) this.importspec.get("nodes"); // the "nodes" part of the JSON
        int nodesArraySize = nodes.size();

        for (int i = 0; i < nodesArraySize; i++) {
            // loop through all of the values and get the parts we need to fill in the nodegroup entries. 
            JSONObject currnode = (JSONObject) nodes.get(i);
            JSONArray uriMap = (JSONArray) currnode.get("mapping");
            JSONArray props = (JSONArray) currnode.get("props");

            // get the related node from the NodeGroup
            String sparqlID = currnode.get("sparqlID").toString();
            //System.out.println("sparqlID: " + sparqlID);
            Node curr = ng.getNodeBySparqlID(sparqlID);

            // generate the uri value. all of this can be simplified later into a structure that actually remembers the format 
            // because position info will not drift as each line is processed. this was done for expedience of development. 

            String uri = this.buildMappingString(uriMap, record);

            // check for a null column mapping having been found. if so, change the URI to a guid and make this a blank node. 
            // encode uri and set it. 
            if (StringUtils.isBlank(uri)) {
                curr.setInstanceValue(null);
            } else {
                uri = this.uriResolver.getInstanceUriWithPrefix(curr.getFullUriName(), uri);
                if (!SparqlToXUtils.isLegalURI(uri)) {
                    throw new Exception("Attempting to insert ill-formed URI: " + uri);
                }
                curr.setInstanceValue(uri);
            }

            // set any applicable property values. again, this could be greatly simplified in terms of number of look ups and, 
            // potentially, running time. it should be encapsulated into an object that understands the nodes itself and stores
            // this sort of info. this was done the hard way in the interest of simplifying implementation and debugging. 

            ArrayList<PropertyItem> inScopeProperties = curr.getPropertyItems();

            Iterator<JSONObject> pMap = props.iterator();
            while (pMap.hasNext()) {
                String[] namesOfTransformsToApplyProp = null;

                // go through all the parts of the property as well. 
                JSONObject currProp = pMap.next();
                String uriRelation = currProp.get("URIRelation").toString();

                JSONArray mapping = (JSONArray) currProp.get("mapping");

                String instanceValue = this.buildMappingString(mapping, record);

                // find and set the actual property value. 
                for (PropertyItem pi : inScopeProperties) {
                    if (pi.getUriRelationship().equals(uriRelation)) { // e.g. http://research.ge.com/print/testconfig#cellId

                        if (this.notEmpty(instanceValue)) {
                            if (pi.getValueType().equalsIgnoreCase("string")) {
                                instanceValue = SparqlToXUtils.safeSparqlString(instanceValue);
                            }

                            instanceValue = this.validateDataType(instanceValue, pi.getValueType());
                            pi.addInstanceValue(instanceValue);
                        }
                        break;
                    }
                }
            }
        }

        // prune nodes that no longer belong (no uri and no properties)
        ng.pruneAllUnused(true);

        // set URI for nulls
        ng = this.setURIsForBlankNodes(ng);

        return retval;
    }

    /**
     * Return a pointer to every PropertyItem in ng that has a mapping in the import spec
     * @param ng
     * @return
     */
    public ArrayList<PropertyItem> getMappedPropItems(NodeGroup ng) {
        ArrayList<PropertyItem> ret = new ArrayList<PropertyItem>();

        JSONArray nodes = (JSONArray) this.importspec.get("nodes");

        // loop through the json nodes in the import spec
        for (int i = 0; i < nodes.size(); i++) {
            JSONObject nodeJson = (JSONObject) nodes.get(i);

            // get the related node from the NodeGroup
            String sparqlID = nodeJson.get("sparqlID").toString();
            Node snode = ng.getNodeBySparqlID(sparqlID);

            // loop through Json node's properties
            JSONArray propsJArr = (JSONArray) nodeJson.get("props");
            for (int j = 0; j < propsJArr.size(); j++) {
                JSONObject propJson = (JSONObject) propsJArr.get(j);
                String uriRelation = propJson.get("URIRelation").toString();

                // if propertyJson has a mapping, return the PropertyItem
                JSONArray propMapJArr = (JSONArray) propJson.get("mapping");
                if (propMapJArr != null && propMapJArr.size() > 0) {
                    PropertyItem pItem = snode.getPropertyByURIRelation(uriRelation);
                    ret.add(pItem);
                }
            }
        }

        return ret;
    }

    /**
     * Build a value from a mapping array and a data record
     * @param mappingArrayJson
     * @param record
     * @return result or Null if any column is empty
     * @throws Exception
     */
    private String buildMappingString(JSONArray mappingArrayJson, ArrayList<String> record) throws Exception {
        String ret = "";

        // loop through the mapping array
        Iterator<JSONObject> it = mappingArrayJson.iterator();
        for (int i = 0; i < mappingArrayJson.size(); i++) {
            JSONObject mapItem = (JSONObject) mappingArrayJson.get(i);

            if (mapItem.containsKey("textId")) {
                String text = null;
                // get text id
                String id = mapItem.get("textId").toString();

                // look up text
                try {
                    text = this.textsAvailable.get(id);
                } catch (Exception e) {
                    throw new Exception("Failed to look up textId: " + id);
                }

                // if all is well, append the value
                if (!StringUtils.isEmpty(text)) {
                    ret += text;
                }

            } else if (mapItem.containsKey("colId")) {
                String colText = null;
                Integer pos = null;
                String id = mapItem.get("colId").toString();
                try {
                    String textColLabel = this.colsAvailable.get(id);
                    pos = this.headerPositioningInfo.get(textColLabel);
                    colText = record.get(pos); // set the text equal to the correct column.                
                } catch (Exception e) {
                    colText = "";
                    if (pos == null) {
                        throw new Exception("Cannot find column in header list.");
                    }
                }

                if (!StringUtils.isBlank(colText)) {
                    ret += this.applyTransforms(colText, mapItem);
                } else {
                    // found an empty column
                    return null;
                }
            } else {
                throw new Exception("importSpec mapping item has no known type: " + mapItem.toString());
            }
        }
        return ret;
    }

    /**
     * Sets this.colsUsed to number of times each column is used.  Skipping ZEROS.
     */
    private void setupColsUsed() {
        // clear cols used
        colsUsed = new HashMap<String, Integer>();

        JSONArray nodes = (JSONArray) importspec.get("nodes");

        for (int i = 0; i < nodes.size(); i++) {
            JSONObject node = (JSONObject) nodes.get(i);

            // check mappings in nodes
            if (node.containsKey("mapping")) {
                JSONArray mapItems = (JSONArray) node.get("mapping");
                for (int j = 0; j < mapItems.size(); j++) {
                    JSONObject item = (JSONObject) mapItems.get(j);
                    if (item.containsKey("colId")) {
                        String colId = (String) item.get("colId");
                        if (colsUsed.containsKey(colId)) {
                            colsUsed.put(colId, colsUsed.get(colId) + 1);
                        } else {
                            colsUsed.put(colId, 1);
                        }
                    }
                }
            }
            if (node.containsKey("props")) {
                JSONArray propItems = (JSONArray) node.get("props");
                for (int p = 0; p < propItems.size(); p++) {
                    JSONObject prop = (JSONObject) propItems.get(p);

                    // check mappings in props
                    if (prop.containsKey("mapping")) {
                        JSONArray mapItems = (JSONArray) prop.get("mapping");
                        for (int j = 0; j < mapItems.size(); j++) {
                            JSONObject item = (JSONObject) mapItems.get(j);
                            if (item.containsKey("colId")) {
                                String colId = (String) item.get("colId");
                                if (colsUsed.containsKey(colId)) {
                                    colsUsed.put(colId, colsUsed.get(colId) + 1);
                                } else {
                                    colsUsed.put(colId, 1);
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    /**
     * Get all column names that were actually used (lowercased)
     * @return
     */
    public String[] getColNamesUsed() {
        // ugly betrayal of Paul's lack of Java skills...
        Set<String> colIds = colsUsed.keySet();
        String[] ret = new String[colIds.size()];
        int i = 0;
        for (String colId : colIds) {
            ret[i++] = colsAvailable.get(colId);
        }
        return ret;
    }

    /**
     * If a column has transforms in the mapping, apply them to the input raw text
     * @param raw - untransformed string
     * @param mappingJson - single mapping entry
     * @return
     */
    private String applyTransforms(String raw, JSONObject mappingJson) {
        if (mappingJson.containsKey("transformList")) {
            String ret = raw;
            JSONArray transforms = (JSONArray) mappingJson.get("transformList");
            for (int i = 0; i < transforms.size(); i += 1) {
                ret = transformsAvailable.get((String) transforms.get(i)).applyTransform(ret);
            }
            return ret;

        } else {
            return raw;
        }
    }

    private NodeGroup setURIsForBlankNodes(NodeGroup ng) throws Exception {
        for (Node n : ng.getNodeList()) {
            if (n.getInstanceValue() == null) {
                n.setInstanceValue(this.uriResolver.getInstanceUriWithPrefix(n.getFullUriName(),
                        UUID.randomUUID().toString()));
            }
        }
        // return the patched results.
        return ng;
    }

    private Boolean notEmpty(String instVal) {
        Boolean retval = true;
        // simple checks for "emptiness"
        if (instVal == null || instVal.isEmpty() || instVal == "" || instVal.length() == 0) {
            retval = false;
        }

        return retval;
    }

    /**
     * Check that an input string is loadable as a certain SPARQL data type, and tweak it if necessary.
     * Throws exception if not.
     * Expects to only get the last part of the type, e.g. "float"
     */
    @SuppressWarnings("deprecation")
    public static String validateDataType(String input, String expectedSparqlGraphType) throws Exception {

        //   from the XSD data types:
        //   string | boolean | decimal | int | integer | negativeInteger | nonNegativeInteger | 
        //   positiveInteger | nonPositiveInteger | long | float | double | duration | 
        //   dateTime | time | date | unsignedByte | unsignedInt | anySimpleType |
        //   gYearMonth | gYear | gMonthDay;

        //      added for the runtimeConstraint:
        //     NODE_URI

        /**
         *  Please keep the wiki up to date
         *  https://github.com/ge-semtk/semtk/wiki/Ingestion-type-handling
         */
        String ret = input;

        if (expectedSparqlGraphType.equalsIgnoreCase("string")) {

        } else if (expectedSparqlGraphType.equalsIgnoreCase("boolean")) {
            try {
                Boolean.parseBoolean(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("decimal")) {
            try {
                Double.parseDouble(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("int")) {
            try {
                Integer.parseInt(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("integer")) {
            try {
                Integer.parseInt(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("negativeInteger")) {
            try {
                int test = Integer.parseInt(input);
                if (test >= 0) {
                    throw new Exception("value in model is negative integer. non-negative integer given as input");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("nonNegativeInteger")) {
            try {
                int test = Integer.parseInt(input);
                if (test < 0) {
                    throw new Exception("value in model is nonnegative integer. negative integer given as input");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("positiveInteger")) {
            try {
                int test = Integer.parseInt(input);
                if (test <= 0) {
                    throw new Exception("value in model is positive integer. integer <= 0 given as input");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("nonPositiveInteger")) {
            try {
                int test = Integer.parseInt(input);
                if (test > 0) {
                    throw new Exception("value in model is nonpositive integer. integer > 0 given as input");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("long")) {
            try {
                long test = Long.parseLong(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("float")) {
            try {
                float test = Float.parseFloat(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("double")) {
            try {
                double test = Double.parseDouble(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("duration")) {
            // not sure how to check this one. this might not match the expectation from SADL
            try {
                Duration.parse(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("dateTime")) {
            try {
                return Utility.getSPARQLDateTimeString(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("time")) {
            try {
                Time.parse(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("date")) {
            try {
                return Utility.getSPARQLDateString(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("unsignedByte")) {
            try {
                Byte.parseByte(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("unsignedint")) {
            try {
                Integer.parseUnsignedInt(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("anySimpleType")) {
            // do nothing. 
        } else if (expectedSparqlGraphType.equalsIgnoreCase("gYearMonth")) {
            try {
                String[] all = input.split("-");
                // check them all
                if (all.length != 2) {
                    throw new Exception("year-month did not have two parts.");
                }
                if (all[0].length() != 4 && all[1].length() != 2) {
                    throw new Exception("year-month format was wrong. " + input + " given was not YYYY-MM");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("gYear")) {
            try {
                if (input.length() != 4) {
                    throw new Exception("year-month format was wrong. " + input + " given was not YYYY-MM");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("gMonthDay")) {
            try {
                String[] all = input.split("-");
                // check them all
                if (all.length != 2) {
                    throw new Exception("month-day did not have two parts.");
                }
                if (all[0].length() != 2 && all[1].length() != 2) {
                    throw new Exception("month-day format was wrong. " + input + " given was not MM-dd");
                }
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause:" + e.getMessage());
            }
        } else if (expectedSparqlGraphType.equalsIgnoreCase("NODE_URI")) {
            try {
                // check that this looks like a URI
                URI uri = new URI(input);
            } catch (Exception e) {
                throw new Exception("attempt to use value \"" + input + "\" as type \"" + expectedSparqlGraphType
                        + "\" failed. assumed cause: " + e.getMessage());
            }

        }

        else {
            // assume it is cool for now.
        }

        return ret;
    }

}