org.hbs.neo4j.importers.CsvImporter.java Source code

Java tutorial

Introduction

Here is the source code for org.hbs.neo4j.importers.CsvImporter.java

Source

/**
 * Copyright to the original author or authors.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at http://www.apache.org/licenses/LICENSE-2.0.
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.hbs.neo4j.importers;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import org.hbs.neo4j.model.NodeEntity;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.index.Index;
import org.neo4j.kernel.AbstractGraphDatabase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;

/**
 * @author Christophe Heubs
 */
public class CsvImporter {

    private AbstractGraphDatabase db;
    private Index<Node> index;

    private static final char SEPARATOR = '\t';
    private static final int NODES_FILE_HEADER_MIN_SIZE = 2;
    private static final int NODES_FILE_LINE_KEY_IDX = 0;
    private static final int NODES_FILE_LINE_DISP_NANE_IDX = 1;
    private static final int RELS_FILE_HEADER_MIN_SIZE = 3;
    private static final int RELS_FILE_LINE_FROM_KEY_IDX = 0;
    private static final int RELS_FILE_LINE_TO_KEY_IDX = 1;
    private static final int RELS_FILE_LINE_REL_TYPE_KEY_IDX = 2;

    /**
     * Rels file header for start node key.
     */
    public static final String RELS_FILE_FROM_KEY_HEADER = "from_key";
    /**
     * Rels file header for end node key.
     */
    public static final String RELS_FILE_TO_KEY_HEADER = "to_key";
    /**
     * Rels file header for rel type.
     */
    public static final String RELS_FILE_REL_TYPE_HEADER = "rel_type";

    private static final Logger LOGGER = LoggerFactory.getLogger(CsvImporter.class);

    /**
     * @param gds GraphDatabaseService to work with.
     * @param indexName Name of the index to work with.
     */
    public CsvImporter(AbstractGraphDatabase gds, String indexName) {
        this.db = gds;
        this.index = this.db.index().forNodes(indexName);
    }

    /**
     * Import nodes from a CSV file to graph DB.
     * 
     * @param nodesInputStream A stream to the CSV file.
     * @throws IOException If there is an issue reading the CSV stream.
     */
    public void importNodes(InputStream nodesInputStream) throws IOException {
        BufferedReader bf = new BufferedReader(new InputStreamReader(nodesInputStream));
        Iterable<String> header = getNodesFileHeader(bf.readLine());
        String line;
        Transaction tx = db.beginTx();
        try {
            while ((line = bf.readLine()) != null) {
                try {
                    insertNode(header, line);
                } catch (IllegalArgumentException iae) {
                    LOGGER.warn(iae.getMessage());
                }
            }
            tx.success();
        } catch (Exception e) {
            tx.failure();
        } finally {
            tx.finish();
        }
    }

    /**
     * Import rels from a CSV file to graph DB.
     * 
     * @param relsInputStream A stream to the CSV file.
     * @throws IOException If there is an issue reading the CSV stream.
     */
    public void importRels(InputStream relsInputStream) throws IOException {
        BufferedReader bf = new BufferedReader(new InputStreamReader(relsInputStream));
        Iterable<String> header = getRelsFileHeader(bf.readLine());
        String line;
        Transaction tx = db.beginTx();
        try {
            while ((line = bf.readLine()) != null) {
                try {
                    insertRel(header, line);
                } catch (IllegalArgumentException iae) {
                    LOGGER.warn(iae.getMessage());
                }
            }
            tx.success();
        } catch (Exception e) {
            tx.failure();
        } finally {
            tx.finish();
        }
    }

    /**
     * Import nodes then rels from a CSV file to graph DB.
     * 
     * @param nodesInputStream A stream to the CSV file for nodes.
     * @param relsInputStream A stream to the CSV file for rels.
     * @throws IOException If there is an issue reading the CSV files.
     */
    public void importNodesAndRels(InputStream nodesInputStream, InputStream relsInputStream) throws IOException {
        this.importNodes(nodesInputStream);
        this.importRels(relsInputStream);
    }

    /**
     * Read and validate a Nodes file header.
     * 
     * @param headerString The file header as a String.
     * @return The file header as an Iterable<String>.
     */
    protected Iterable<String> getNodesFileHeader(String headerString) {
        return getAndValidateHeader(headerString, NODES_FILE_HEADER_MIN_SIZE, NodeEntity.Keys.KEY,
                NodeEntity.Keys.DISPLAY_NAME);
    }

    /**
     * Read and validate a Rels file header.
     * 
     * @param headerString The file header as a String.
     * @return The file header as an Iterable<String>.
     */
    protected Iterable<String> getRelsFileHeader(String headerString) {
        return getAndValidateHeader(headerString, RELS_FILE_HEADER_MIN_SIZE, RELS_FILE_FROM_KEY_HEADER,
                RELS_FILE_TO_KEY_HEADER, RELS_FILE_REL_TYPE_HEADER);
    }

    private Iterable<String> getAndValidateHeader(String headerString, int minSize, String... keys) {
        boolean isHeaderValid = true;
        Iterable<String> header = Splitter.on(SEPARATOR).trimResults().split(headerString);
        if (Iterables.size(header) < minSize) {
            isHeaderValid = false;
        } else {
            for (int i = 0; i < keys.length; i++) {
                if (keys[i].compareTo(Iterables.get(header, i)) != 0) {
                    isHeaderValid = false;
                }
            }
        }
        if (!isHeaderValid) {
            StringBuilder exMsg = new StringBuilder("Invalid Header for CSV file.");
            exMsg.append(" Header have to start with \"");
            for (int i = 0; i < keys.length; i++) {
                exMsg.append(keys[i]).append(SEPARATOR);
            }
            exMsg.append(" followed by a list proprerty names separated by tabs.");
            throw new IllegalArgumentException(exMsg.toString());
        }
        return header;
    }

    /**
     * Insert a node from a CSV line.
     * 
     * @param header CSV file Header.
     * @param line CSV line to insert.
     */
    protected void insertNode(Iterable<String> header, String line) {
        Iterable<String> values = Splitter.on(SEPARATOR).trimResults().split(line);
        String nodeKey = Iterables.get(values, NODES_FILE_LINE_KEY_IDX);
        String nodeDisplayName = Iterables.get(values, NODES_FILE_LINE_DISP_NANE_IDX);
        Node alreadyExist = index.get(NodeEntity.Keys.KEY, nodeKey).getSingle();
        if (alreadyExist != null) {
            throw new IllegalArgumentException("Node with key " + nodeKey + " already exists. Won't be inserted.");
        }
        Node node = db.createNode();
        node.setProperty(NodeEntity.Keys.KEY, nodeKey);
        index.add(node, NodeEntity.Keys.KEY, nodeKey);
        node.setProperty(NodeEntity.Keys.DISPLAY_NAME, nodeDisplayName);
        index.add(node, NodeEntity.Keys.DISPLAY_NAME, nodeDisplayName);
        for (int idx = NODES_FILE_HEADER_MIN_SIZE; idx < Iterables.size(header); idx++) {
            node.setProperty(Iterables.get(header, idx), Iterables.get(values, idx));
        }
    }

    /**
     * Insert a relationship from a CSV line.
     * 
     * @param header CSV file Header.
     * @param line CSV line to insert.
     */
    protected void insertRel(Iterable<String> header, String line) {
        Iterable<String> values = Splitter.on(SEPARATOR).trimResults().split(line);
        String fromNodeKey = Iterables.get(values, RELS_FILE_LINE_FROM_KEY_IDX);
        Node startNode = index.get(NodeEntity.Keys.KEY, fromNodeKey).getSingle();
        if (startNode == null) {
            throw new IllegalArgumentException(
                    "Start node with key " + fromNodeKey + " does not exist. Rel won't be created.");
        }
        String toNodeKey = Iterables.get(values, RELS_FILE_LINE_TO_KEY_IDX);
        Node endNode = index.get(NodeEntity.Keys.KEY, toNodeKey).getSingle();
        if (endNode == null) {
            throw new IllegalArgumentException(
                    "End node with key " + toNodeKey + " does not exist. Rel won't be created.");
        }
        Relationship relationship = startNode.createRelationshipTo(endNode,
                DynamicRelationshipType.withName(Iterables.get(values, RELS_FILE_LINE_REL_TYPE_KEY_IDX)));
        for (int idx = RELS_FILE_HEADER_MIN_SIZE; idx < Iterables.size(header); idx++) {
            relationship.setProperty(Iterables.get(header, idx), Iterables.get(values, idx));
        }
    }

}