org.gradoop.benchmark.fsm.TransactionalFSMBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for org.gradoop.benchmark.fsm.TransactionalFSMBenchmark.java

Source

/*
 * This file is part of Gradoop.
 *
 * Gradoop is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Gradoop is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Gradoop. If not, see <http://www.gnu.org/licenses/>.
 */

package org.gradoop.benchmark.fsm;

import com.google.common.collect.Lists;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.ProgramDescription;
import org.apache.flink.api.java.DataSet;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.io.impl.tlf.TLFDataSource;
import org.gradoop.io.impl.tlf.tuples.TLFGraph;
import org.gradoop.model.impl.algorithms.fsm.config.FSMConfig;
import org.gradoop.model.impl.algorithms.fsm.gspan.api.GSpanEncoder;
import org.gradoop.model.impl.algorithms.fsm.gspan.api.GSpanMiner;
import org.gradoop.model.impl.algorithms.fsm.gspan.encoders.GSpanTLFGraphEncoder;
import org.gradoop.model.impl.algorithms.fsm.gspan.miners.bulkiteration.GSpanBulkIteration;
import org.gradoop.model.impl.algorithms.fsm.gspan.miners.filterrefine.GSpanFilterRefine;
import org.gradoop.model.impl.algorithms.fsm.gspan.pojos.CompressedDFSCode;
import org.gradoop.model.impl.algorithms.fsm.gspan.pojos.GSpanGraph;
import org.gradoop.model.impl.pojo.EdgePojo;
import org.gradoop.model.impl.pojo.GraphHeadPojo;
import org.gradoop.model.impl.pojo.VertexPojo;
import org.gradoop.model.impl.tuples.WithCount;
import org.gradoop.util.GradoopFlinkConfig;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.concurrent.TimeUnit;

/**
 * A dedicated program for parametrized transactional FSM benchmark.
 */
public class TransactionalFSMBenchmark extends AbstractRunner implements ProgramDescription {

    /**
     * Option to declare path to input graph
     */
    private static final String OPTION_INPUT_PATH = "i";

    /**
     * Path to CSV log file
     */
    private static final String OPTION_LOG_PATH = "log";

    /**
     * directed flag
     */
    private static final String OPTION_DIRECTED = "d";

    /**
     * implementation parameter
     */
    private static final String OPTION_IMPLEMENTATION = "impl";

    /**
     * implementation parameter values
     */
    private static final Collection<String> VALUES_IMPLEMENTATION = Lists.newArrayList("it", // iterative
            "fr" // filter refinement
    );

    /**
     * Minimum support threshold
     */
    private static final String OPTION_THRESHOLD = "t";

    static {
        OPTIONS.addOption(OPTION_INPUT_PATH, "input-path", true, "path of graph files (hdfs)");
        OPTIONS.addOption(OPTION_LOG_PATH, "log path", true, "path of the generated log file");
        OPTIONS.addOption(OPTION_DIRECTED, "directed", false, "Flag for directed graphs");
        OPTIONS.addOption(OPTION_IMPLEMENTATION, "implementation", true, "gSpan implementation");
        OPTIONS.addOption(OPTION_THRESHOLD, "minimum-support", true, "minimum support threshold");
    }

    /**
     * Main program to run the benchmark. Arguments are the available options.
     *
     * @param args program arguments
     * @throws Exception
     */
    @SuppressWarnings("unchecked")
    public static void main(String[] args) throws Exception {
        CommandLine cmd = parseArguments(args, TransactionalFSMBenchmark.class.getName());
        if (cmd == null) {
            return;
        }
        performSanityCheck(cmd);

        // read cmd arguments
        String inputPath = cmd.getOptionValue(OPTION_INPUT_PATH);
        String logPath = cmd.getOptionValue(OPTION_LOG_PATH);
        float threshold = Float.parseFloat(cmd.getOptionValue(OPTION_THRESHOLD));
        boolean directed = cmd.hasOption(OPTION_DIRECTED);
        String implementation = cmd.getOptionValue(OPTION_IMPLEMENTATION);

        // create gradoop conf
        GradoopFlinkConfig gradoopConfig = GradoopFlinkConfig.createDefaultConfig(getExecutionEnvironment());

        // read tlf graph
        TLFDataSource<GraphHeadPojo, VertexPojo, EdgePojo> tlfSource = new TLFDataSource<>(inputPath,
                gradoopConfig);

        // create input dataset
        DataSet<TLFGraph> graphs = tlfSource.getTLFGraphs();

        // set encoder
        GSpanEncoder encoder = new GSpanTLFGraphEncoder<>();

        // set miner
        GSpanMiner miner;

        miner = implementation.equals("it") ? new GSpanBulkIteration() : new GSpanFilterRefine();

        miner.setExecutionEnvironment(getExecutionEnvironment());

        // set config for synthetic or real world dataset
        FSMConfig fsmConfig = new FSMConfig(threshold, directed);

        // encode
        DataSet<GSpanGraph> gsGraph = encoder.encode(graphs, fsmConfig);

        // mine
        DataSet<WithCount<CompressedDFSCode>> frequentSubgraphs = miner.mine(gsGraph, encoder.getMinFrequency(),
                fsmConfig);

        // write statistics
        writeCSV(inputPath, directed, implementation, threshold, logPath, frequentSubgraphs.count());
    }

    /**
     * Checks if the minimum of arguments is provided
     *
     * @param cmd command line
     */
    private static void performSanityCheck(final CommandLine cmd) {
        if (!cmd.hasOption(OPTION_INPUT_PATH)) {
            throw new IllegalArgumentException("Input path must not be empty.");
        }
        if (!cmd.hasOption(OPTION_LOG_PATH)) {
            throw new IllegalArgumentException("Log file path must not be empty.");
        }
        if (!cmd.hasOption(OPTION_THRESHOLD)) {
            throw new IllegalArgumentException("Minimum support must be specified.");
        }
        if (cmd.hasOption(OPTION_IMPLEMENTATION)
                && !VALUES_IMPLEMENTATION.contains(cmd.getOptionValue(OPTION_IMPLEMENTATION))) {
            throw new IllegalArgumentException(
                    "Invalid Implementation specified. " + "Must be on of " + VALUES_IMPLEMENTATION);
        }
    }

    /**
     * Method to create and add lines to a csv-file
     * @throws IOException
     * @param inputPath input file path
     * @param directed true, if directed graph
     * @param implementation gSpan implementation
     * @param threshold minimum support
     * @param logPath log path
     * @param subgraphCount subgraph count
     */
    private static void writeCSV(String inputPath, boolean directed, String implementation, float threshold,
            String logPath, long subgraphCount) throws IOException {

        String head = String.format("%s|%s|%s|%s|%s|%s|%s%n", "Parallelism", "Implementation", "Dataset",
                "Directed", "Threshold", "Subgraphs", "Runtime");

        String tail = String.format("%s|%s|%s|%s|%s|%s|%s%n", getExecutionEnvironment().getParallelism(),
                implementation, StringUtils.substringAfterLast(inputPath, "/"), directed, threshold, subgraphCount,
                getExecutionEnvironment().getLastJobExecutionResult().getNetRuntime(TimeUnit.SECONDS));

        File f = new File(logPath);
        if (f.exists() && !f.isDirectory()) {
            FileUtils.writeStringToFile(f, tail, true);
        } else {
            PrintWriter writer = new PrintWriter(logPath, "UTF-8");
            writer.print(head);
            writer.print(tail);
            writer.close();
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String getDescription() {
        return TransactionalFSMBenchmark.class.getName();
    }
}