com.ibm.research.rdf.store.sparql11.SparqlParserUtilities.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.research.rdf.store.sparql11.SparqlParserUtilities.java

Source

/******************************************************************************
 * Copyright (c) 2015 IBM Corporation.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *****************************************************************************/
package com.ibm.research.rdf.store.sparql11;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenStream;
import org.antlr.runtime.tree.BaseTree;
import org.antlr.runtime.tree.BufferedTreeNodeStream;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeAdaptor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.ibm.research.rdf.store.sparql11.model.AltPath;
import com.ibm.research.rdf.store.sparql11.model.Constant;
import com.ibm.research.rdf.store.sparql11.model.ConstantExpression;
import com.ibm.research.rdf.store.sparql11.model.Expression;
import com.ibm.research.rdf.store.sparql11.model.Expression.EExpressionType;
import com.ibm.research.rdf.store.sparql11.model.IExpressionTraversalListener;
import com.ibm.research.rdf.store.sparql11.model.IRI;
import com.ibm.research.rdf.store.sparql11.model.InvPath;
import com.ibm.research.rdf.store.sparql11.model.NegatedProperySetPath;
import com.ibm.research.rdf.store.sparql11.model.OneOrMorePath;
import com.ibm.research.rdf.store.sparql11.model.PathVisitor;
import com.ibm.research.rdf.store.sparql11.model.Pattern;
import com.ibm.research.rdf.store.sparql11.model.PropertyTerm;
import com.ibm.research.rdf.store.sparql11.model.Query;
import com.ibm.research.rdf.store.sparql11.model.QueryExt;
import com.ibm.research.rdf.store.sparql11.model.QueryTriple;
import com.ibm.research.rdf.store.sparql11.model.SeqPath;
import com.ibm.research.rdf.store.sparql11.model.SimplePath;
import com.ibm.research.rdf.store.sparql11.model.SimplePattern;
import com.ibm.research.rdf.store.sparql11.model.ZeroOrMorePath;
import com.ibm.research.rdf.store.sparql11.model.ZeroOrOnePath;

/**
 * utilities for parsing sparql queries
 */
public class SparqlParserUtilities {

    public static final Log log = LogFactory.getLog(SparqlParserUtilities.class);
    public static boolean USE_EXTENSIONS = true;

    public static Query parseSparqlFile(String sparqlFile, Map<String, String> rdfStorePrefixes) {
        Query q;
        CharStream stream;
        try {
            stream = new ANTLRFileStream(sparqlFile, "UTF8");
        } catch (IOException e) {
            log.error("Error opening file " + sparqlFile);
            throw new RuntimeException("Error opening file " + sparqlFile, e);
        }
        q = parseSparql(stream, rdfStorePrefixes);
        return q;
    }

    public static Query parseSparqlFile(URL sparqlFile, Map<String, String> rdfStorePrefixes) {
        Query q;
        CharStream stream;
        try {
            stream = new ANTLRInputStream(sparqlFile.openStream(), "UTF8");
        } catch (IOException e) {
            log.error("Error opening file " + sparqlFile);
            throw new RuntimeException("Error opening file " + sparqlFile, e);
        }
        q = parseSparql(stream, rdfStorePrefixes);
        return q;
    }

    public static Query parseSparql(File sparqlFile, Map<String, String> rdfStorePrefixes) {
        Query q;
        CharStream stream;
        try {
            // uncomment for mac
            // String str = "/" + sparqlFile.toString();
            // stream = new ANTLRFileStream(str , "UTF8");
            stream = new ANTLRFileStream(sparqlFile.getAbsolutePath(), "UTF8");
        } catch (IOException e) {
            log.error("Error opening file " + sparqlFile.getPath());
            throw new RuntimeException("Error reading file " + sparqlFile, e);
        }
        q = parseSparql(stream, rdfStorePrefixes);
        return q;
    }

    public static Query parseSparql(CharStream sparqlFile, Map<String, String> rdfStorePrefixes) {
        try {
            Query q;
            if (USE_EXTENSIONS) {
                q = getQueryExt(sparqlFile);
            } else {
                q = getQuery(sparqlFile);
            }
            // System.out.println(q);
            q.expandPrefixes(rdfStorePrefixes);
            // System.out.println(q);
            // q.reverseIRIs();
            // System.out.println("After replacement \n");
            // System.out.println(q);
            Pattern p = q.getMainPattern();
            if (p != null) {
                // p.computePatternIndex();
                p.killUnscopedAccesses();
                p.pushFilters();
                p.pushGraphRestrictions();
                p.replaceFilterBindings();
            }
            return q;
        } catch (SPARQLsyntaxError se) {
            throw se;
        } catch (RecognitionException e) {
            throw new SPARQLsyntaxError(e);
        }
    }

    private static Query getQueryExt(CharStream sparqlFile) throws RecognitionException {
        // System.out.println("Parsing: "+sparqlFile+"\n");
        IbmSparqlExtLexer lex = new IbmSparqlExtLexer(sparqlFile);
        CommonTokenStream tokens = new CommonTokenStream(lex);
        IbmSparqlExtParser parser = new IbmSparqlExtParser(tokens);

        parser.setTreeAdaptor(new CommonTreeAdaptor() {
            @Override
            public Object create(Token t) {
                return new XTree(t);
            }
        });

        IbmSparqlExtParser.queryUnit_return ret = parser.queryUnit();
        CommonTree ast = (CommonTree) ret.getTree();

        //
        System.out.println(ast.toStringTree());
        // SparqlParserUtilities.dump_tree(ast, tokens, 0);

        BufferedTreeNodeStream nodes = new BufferedTreeNodeStream(ast);
        nodes.setTokenStream(tokens);
        IbmSparqlExtAstWalker walker = new IbmSparqlExtAstWalker(nodes);
        QueryExt query = walker.queryUnit();
        return query;
    }

    static Query getQuery(CharStream sparqlFile) throws RecognitionException {
        CommonTree ast = getParseTree(sparqlFile);
        // System.out.println(ast.toStringTree());
        BufferedTreeNodeStream nodes = new BufferedTreeNodeStream(ast);
        // nodes.setTokenStream(tokens);
        // IbmSparqlAstRewriter astRewriter = new IbmSparqlAstRewriter(nodes);
        // CommonTree ast2 = (CommonTree)astRewriter.downup(ast, false);
        // System.out.println(ast2.toStringTree());
        // BufferedTreeNodeStream nodes2 = new BufferedTreeNodeStream(ast2);
        // nodes2.setTokenStream(tokens);
        IbmSparqlAstWalker walker = new IbmSparqlAstWalker(nodes);
        Query q = walker.queryUnit();
        return q;
    }

    static XTree getParseTree(CharStream sparqlFile) throws RecognitionException {
        IbmSparqlLexer lex = new IbmSparqlLexer(sparqlFile);
        CommonTokenStream tokens = new CommonTokenStream(lex);
        IbmSparqlParser parser = new IbmSparqlParser(tokens);
        parser.setTreeAdaptor(new CommonTreeAdaptor() {
            @Override
            public Object create(Token t) {
                return new XTree(t);
            }
        });
        IbmSparqlParser.queryUnit_return ret = parser.queryUnit();
        XTree ast = (XTree) ret.getTree();
        return ast;
    }

    public static Query parseSparqlString(String sparql, Map<String, String> rdfStorePrefixes) {
        CharStream input = new ANTLRStringStream(sparql);
        Query q = null;
        try {
            q = parseSparql(input, rdfStorePrefixes);
        } catch (SPARQLsyntaxError se) {
            se.setSQL(sparql);
            throw se;
        }

        return q;
    }

    public static Query parseSparqlString(String sparql) {
        return parseSparqlString(sparql, Collections.<String, String>emptyMap());
    }

    private static void dump_tree(CommonTree tree, TokenStream s, int depth) {
        int i;
        int n;
        CommonTree t;

        for (i = 0; i < depth; i++) {
            System.out.print("  ");
        }
        if ((tree != null) && (!tree.isNil())) {
            Location loc = get_tree_position(s, tree);
            if (loc != null) {
                ParsePosition start_position = loc.start_position;
                ParsePosition stop_position = loc.end_position;
                // System.out.println(tree.toString() + " ["
                // + start_position.getLine() + "," + start_position.getCol()
                // + "] -> [" + stop_position.getLine() + ","
                // + stop_position.getCol() + "]");
            } else {
                // System.out.println(tree.toString());
            }
        } else {
            // System.out.println("<no text>\n");
        }

        if ((tree != null) && (!tree.isNil()) && (tree.getChildCount() != 0)) {
            n = tree.getChildCount();

            for (i = 0; i < n; i++) {
                t = (CommonTree) tree.getChild(i);
                dump_tree(t, s, depth + 1);
            }
        }
    }

    /**
     * Computes line and col
     * 
     * @param line
     * @param col
     * @param text
     * @return
     */
    private static ParsePosition adjust_position(ParsePosition position, char[] text) {
        int i;
        int line = position.getLine();
        int col = position.getCol();

        for (i = 1; i < text.length; i++) {
            if (text[i] == '\n') {
                line++;
                col = 1;
            } else {
                col++;
            }
        }

        position.setLine(line);
        position.setCol(col);
        return position;
    }

    private static ParsePosition get_token_start_position(TokenStream tokenStream, int tokenIndex) {
        CommonToken token = (CommonToken) tokenStream.get(tokenIndex);
        int line = token.getLine();
        int column = token.getCharPositionInLine();
        return new ParsePosition(line, column);
    }

    private static ParsePosition get_token_end_position(TokenStream tokenStream, int tokenIndex) {
        CommonToken token;

        ParsePosition pos = get_token_start_position(tokenStream, tokenIndex);

        token = (CommonToken) tokenStream.get(tokenIndex);
        return adjust_position(pos, token.getText().toCharArray());
    }

    private static Location get_tree_position(TokenStream tokenStream, BaseTree tree) {
        int startIndex;
        int stopIndex;
        CommonToken stopToken;
        int stopColumn;

        ParsePosition end_position = null;
        startIndex = tree.getTokenStartIndex();
        ParsePosition start_position = null;
        try {
            start_position = get_token_start_position(tokenStream, startIndex);
        } catch (RuntimeException e) {
            return null;
        }

        stopIndex = tree.getTokenStopIndex();
        stopToken = (CommonToken) tokenStream.get(stopIndex);
        stopColumn = stopToken.getCharPositionInLine();
        if (stopColumn == -1) {
            int end_line = tree.getLine();
            int end_column = tree.getCharPositionInLine();
            end_position = adjust_position(new ParsePosition(end_line, end_column), tree.getText().toCharArray());
        } else {
            end_position = get_token_end_position(tokenStream, stopIndex);
        }

        return new Location(start_position, end_position);
    }

    static class Location {

        public ParsePosition start_position;
        public ParsePosition end_position;

        public Location(ParsePosition x, ParsePosition y) {
            start_position = x;
            end_position = y;
        }
    }

    static class ParsePosition {

        int line;
        int col;

        public ParsePosition(int x, int y) {
            line = x;
            col = y;
        }

        public String toString() {
            return "[" + line + "," + col + "]";
        }

        public int getLine() {
            return line;
        }

        public void setLine(int x) {
            line = x;
        }

        public int getCol() {
            return col;
        }

        public void setCol(int y) {
            col = y;
        }
    }

    public static Set<String> gatherQueryPredicates(Query q) {
        if (q.getMainPattern() == null) {
            return Collections.emptySet();
        }
        final Set<String> preds = new HashSet<String>();
        for (Pattern p : q.getMainPattern().gatherSubPatterns(true)) {
            if (p instanceof SimplePattern) {
                for (QueryTriple t : ((SimplePattern) p).getQueryTriples()) {
                    PropertyTerm pred = t.getPredicate();
                    if (pred.isIRI()) {
                        preds.add(pred.getIRI().getValue());
                    } else if (pred.isPath()) {
                        PathVisitor visitor = new PathVisitor() {
                            @Override
                            public void visit(ZeroOrOnePath p) {
                                p.getSubPath().visit(this);
                            }

                            @Override
                            public void visit(ZeroOrMorePath p) {
                                p.getSubPath().visit(this);
                            }

                            @Override
                            public void visit(OneOrMorePath p) {
                                p.getSubPath().visit(this);
                            }

                            @Override
                            public void visit(InvPath p) {
                                p.getSubPath().visit(this);
                            }

                            @Override
                            public void visit(SimplePath p) {
                                preds.add(p.getIRI().getValue());
                            }

                            @Override
                            public void visit(NegatedProperySetPath prop) {
                                for (IRI p : prop.getFowardProperties()) {
                                    preds.add(p.getValue());
                                }
                                for (IRI p : prop.getBackwardProperties()) {
                                    preds.add(p.getValue());
                                }
                            }

                            @Override
                            public void visit(SeqPath p) {
                                p.getLeft().visit(this);
                                p.getRight().visit(this);
                            }

                            @Override
                            public void visit(AltPath p) {
                                p.getLeft().visit(this);
                                p.getRight().visit(this);
                            }
                        };
                        pred.getPath().visit(visitor);
                    } else {
                        assert pred.isVariable();
                        for (Expression expr : p.getFilters()) {
                            if (expr.gatherVariables().contains(pred.getVariable())) {
                                expr.traverse(new IExpressionTraversalListener() {
                                    public void startExpression(Expression e) {
                                        if (e.getType() == EExpressionType.CONSTANT) {
                                            Constant c = ((ConstantExpression) e).getConstant();
                                            if (c.getIRI() != null) {
                                                preds.add(c.getIRI().getValue());
                                            }
                                        }
                                    }

                                    public void endExpression(Expression e) {

                                    }
                                });
                            }
                        }
                    }
                }
            }
        }
        return preds;
    }
}