jflowmap.data.StaxGraphMLReader.java Source code

Java tutorial

Introduction

Here is the source code for jflowmap.data.StaxGraphMLReader.java

Source

/*
 * This file is part of JFlowMap.
 *
 * Copyright 2009 Ilya Boyandin
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package jflowmap.data;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import jflowmap.FlowMapAttrSpec;
import jflowmap.FlowMapGraph;

import org.apache.log4j.Logger;

import prefuse.data.Graph;
import prefuse.data.Schema;
import prefuse.data.Table;
import prefuse.data.parser.DataParseException;
import prefuse.data.parser.ParserFactory;
import prefuse.util.collections.IntIterator;
import prefuse.util.io.IOLib;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
 * GraphML reader able of loading several graphs in one file. It's based on StAX and
 * therefore requires much less memory than GraphMLReader2 and works faster.
 *
 * @author Ilya Boyandin
 */
public class StaxGraphMLReader {

    public static Logger logger = Logger.getLogger(StaxGraphMLReader.class);

    private static final String DEFAULT_CHARSET = "utf-8";
    private static final String NAMESPACE = null; //  "http://graphml.graphdrawing.org/xmlns"

    private String charset = DEFAULT_CHARSET;

    private final ParserFactory dataParser;
    private LineNumberReader lineNumberReader;

    private Schema nodeSchema, edgeSchema;
    private Map<String, String> attrIdToName;
    private Map<String, Integer> nodeIdToIndex;

    public StaxGraphMLReader() {
        dataParser = ParserFactory.getDefaultFactory();
    }

    public void setCharset(String charset) {
        this.charset = charset;
    }

    public Iterable<Graph> readFromLocation(String location) throws IOException {
        logger.info("Loading file \"" + location + "\"");
        InputStream is = IOLib.streamFromString(location);
        if (is == null) {
            throw new IOException("Couldn't read from location: \"" + location + "\"");
        }
        Iterable<Graph> graphs = readFromStream(is);
        logger.info("Finished loading file. Number of graphs loaded: " + Iterables.size(graphs));

        return graphs;
    }

    public static Iterable<Graph> readGraphs(String filename) throws IOException {
        return new StaxGraphMLReader().readFromLocation(filename);
    }

    public static Graph readFirstGraph(String filename) throws IOException {
        Iterator<Graph> it = readGraphs(filename).iterator();
        if (!it.hasNext()) {
            throw new IOException("No graphs found in " + filename);
        }
        return it.next();
    }

    public static FlowMapGraph readFlowMapGraph(String location, FlowMapAttrSpec attrSpec) throws IOException {
        return new FlowMapGraph(readFirstGraph(location), attrSpec);
    }

    public Iterable<Graph> readFromStream(InputStream is) throws IOException {
        XMLInputFactory inputFactory = XMLInputFactory.newInstance();
        XMLStreamReader in;
        try {
            lineNumberReader = new LineNumberReader(new InputStreamReader(is, charset));
            in = inputFactory.createXMLStreamReader(lineNumberReader);

            List<Graph> graphs = Lists.newArrayList();
            initSchemas();

            attrIdToName = Maps.newHashMap();
            String graphId = null;
            boolean graphDirected = false;
            Table nodeTable = null, edgeTable = null;

            OUTER: while (in.hasNext()) {
                int eventType = in.nextTag();
                String tag = in.getLocalName();

                switch (eventType) {
                case XMLStreamReader.START_ELEMENT:
                    if (tag.equals("key")) {
                        readKey(in);

                    } else if (tag.equals("graph")) {
                        lockSchemas();

                        nodeIdToIndex = Maps.newHashMap();
                        nodeTable = nodeSchema.instantiate();
                        edgeTable = edgeSchema.instantiate();

                        graphId = in.getAttributeValue(NAMESPACE, "id");
                        graphDirected = ("directed".equals(in.getAttributeValue(NAMESPACE, "edgedefault")));

                    } else if (tag.equals("node")) {

                        int ri = nodeTable.addRow();

                        String nodeId = in.getAttributeValue(NAMESPACE, "id");
                        if (nodeIdToIndex.containsKey(nodeId)) {
                            throw new IOException("Duplicate node id: '" + nodeId + "'");
                        }
                        nodeIdToIndex.put(nodeId, ri);

                        nodeTable.set(ri, FlowMapGraph.GRAPH_NODE_ID_COLUMN, nodeId);

                        readData(in, nodeTable, ri, "node");

                    } else if (tag.equals("edge")) {

                        int ri = edgeTable.addRow();

                        edgeTable.setString(ri, FlowMapGraph.SRC_TEMP_ID,
                                in.getAttributeValue(NAMESPACE, "source"));
                        edgeTable.setString(ri, FlowMapGraph.TRG_TEMP_ID,
                                in.getAttributeValue(NAMESPACE, "target"));

                        readData(in, edgeTable, ri, "edge");
                    }
                    break;

                case XMLStreamReader.END_ELEMENT:

                    if (tag.equals("graph")) {
                        assert (nodeTable != null);
                        assert (edgeTable != null);

                        // Map edge starts/ends to nodes
                        IntIterator rows = edgeTable.rows();
                        while (rows.hasNext()) {
                            int ri = rows.nextInt();

                            String src = edgeTable.getString(ri, FlowMapGraph.SRC_TEMP_ID);
                            if (!nodeIdToIndex.containsKey(src)) {
                                throw new IOException("Tried to create edge with source node id=" + src
                                        + " which does not exist.");
                            }
                            edgeTable.setInt(ri, FlowMapGraph.SRC, nodeIdToIndex.get(src));

                            String trg = edgeTable.getString(ri, FlowMapGraph.TRG_TEMP_ID);
                            if (!nodeIdToIndex.containsKey(trg)) {
                                throw new IOException("Tried to create edge with target node id=" + trg
                                        + " which does not exist.");
                            }
                            edgeTable.setInt(ri, FlowMapGraph.TRG, nodeIdToIndex.get(trg));
                        }
                        edgeTable.removeColumn(FlowMapGraph.SRC_TEMP_ID);
                        edgeTable.removeColumn(FlowMapGraph.TRG_TEMP_ID);

                        // Finally, create the graph
                        Graph graph = new Graph(nodeTable, edgeTable, graphDirected);
                        FlowMapGraph.setGraphId(graph, graphId);
                        graphs.add(graph);

                        logger.info("Loaded graph '" + graphId + "'," + " nodes: " + nodeTable.getRowCount()
                                + ", edges: " + edgeTable.getRowCount());

                    } else if (tag.equals("graphml")) {
                        break OUTER;
                    }
                    break;
                }
            }

            return graphs;
        } catch (XMLStreamException e) {
            throw new IOException("Parse error in line " + lineNumberReader.getLineNumber() + ": " + e.getMessage(),
                    e);
        }
    }

    private void readData(XMLStreamReader in, Table table, int tableRowIdx, String untilEndOf)
            throws IOException, XMLStreamException {

        OUTER: while (in.hasNext()) {
            int eventType = in.nextTag();
            String tag = in.getLocalName();

            switch (eventType) {
            case XMLStreamReader.START_ELEMENT:
                if (tag.equals("data")) {
                    String attrId = in.getAttributeValue(NAMESPACE, "key");
                    String attrName = attrIdToName.get(attrId);
                    String valueStr = in.getElementText();
                    Object value;
                    if (valueStr == null) {
                        value = null;
                    } else {
                        Class<?> columnType = table.getColumnType(attrName);
                        if (columnType == null) {
                            throw new IOException("Type of column '" + attrId + "' not found");
                        }
                        value = parseData(valueStr, columnType);
                        table.set(tableRowIdx, attrName, value);
                    }
                }
                break;

            case XMLStreamReader.END_ELEMENT:
                if (tag.equals(untilEndOf)) {
                    break OUTER;
                }
                break;
            }
        }
    }

    private void lockSchemas() {
        nodeSchema.lockSchema();
        edgeSchema.lockSchema();
    }

    private void initSchemas() {
        nodeSchema = new Schema();
        nodeSchema.addColumn(FlowMapGraph.GRAPH_NODE_ID_COLUMN, String.class);
        edgeSchema = new Schema();
        edgeSchema.addColumn(FlowMapGraph.SRC, int.class);
        edgeSchema.addColumn(FlowMapGraph.TRG, int.class);
        edgeSchema.addColumn(FlowMapGraph.SRC_TEMP_ID, String.class);
        edgeSchema.addColumn(FlowMapGraph.TRG_TEMP_ID, String.class);
    }

    private void readKey(XMLStreamReader in) throws IOException {
        String id = in.getAttributeValue(NAMESPACE, "id");
        String forWhat = in.getAttributeValue(NAMESPACE, "for");
        String name = in.getAttributeValue(NAMESPACE, "attr.name");
        AttrDataTypes type = AttrDataTypes.parse(in.getAttributeValue(NAMESPACE, "attr.type"));

        attrIdToName.put(id, name);

        /*
        String defaultValStr = in.getAttributeValue(NAMESPACE, "default");
        final Object defaultVal;
        if (defaultValStr == null) {
          defaultVal = null;
        } else {
          defaultVal = parseData(defaultValStr, type.klass);
        }
        */
        Object defaultVal = type.getDefaultValue();

        if (forWhat == null || forWhat.equals("all")) {
            nodeSchema.addColumn(name, type.klass, defaultVal);
            edgeSchema.addColumn(name, type.klass, defaultVal);
        } else if (forWhat.equals("node")) {
            nodeSchema.addColumn(name, type.klass, defaultVal);
        } else if (forWhat.equals("edge")) {
            edgeSchema.addColumn(name, type.klass, defaultVal);
        } else {
            throw new IOException("Unrecognized 'for' value: " + forWhat);
        }
    }

    private Object parseData(String defaultValStr, Class<?> klass) throws IOException {
        try {
            return dataParser.getParser(klass).parse(defaultValStr);
        } catch (DataParseException e) {
            throw new IOException(e);
        }
    }

}