com.ironiacorp.statistics.r.AbstractRClient.java Source code

Java tutorial

Introduction

Here is the source code for com.ironiacorp.statistics.r.AbstractRClient.java

Source

/*
 * The baseCode project
 * 
 * Copyright (c) 2008 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package com.ironiacorp.statistics.r;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.collections.Transformer;
import org.apache.commons.collections.functors.StringValueTransformer;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;

import org.rosuda.REngine.REXP;
import org.rosuda.REngine.REXPDouble;
import org.rosuda.REngine.REXPFactor;
import org.rosuda.REngine.REXPGenericVector;
import org.rosuda.REngine.REXPInteger;
import org.rosuda.REngine.REXPList;
import org.rosuda.REngine.REXPLogical;
import org.rosuda.REngine.REXPMismatchException;
import org.rosuda.REngine.REXPString;
import org.rosuda.REngine.RList;

import com.ironiacorp.statistics.r.type.DoubleMatrix;
import com.ironiacorp.statistics.r.type.HTest;
import com.ironiacorp.statistics.r.type.LinearModelSummary;
import com.ironiacorp.statistics.r.type.ObjectMatrix;
import com.ironiacorp.statistics.r.type.ObjectMatrixImpl;
import com.ironiacorp.statistics.r.type.OneWayAnovaResult;
import com.ironiacorp.statistics.r.type.TwoWayAnovaResult;

/**
 * Base class for RClients
 * 
 * @author Paul
 * @version $Id: AbstractRClient.java,v 1.2 2010/05/01 02:45:59 paul Exp $
 */
public abstract class AbstractRClient implements RClient {
    /**
     * @param ob
     * @return
     */
    public static String variableIdentityNumber(Object ob) {
        return Integer.toString(Math.abs(ob.hashCode() + 1)) + RandomStringUtils.randomAlphabetic(6);
    }

    /**
     * Copy a matrix into an array, so that rows are represented consecutively in the array. (RServe has no interface
     * for passing a 2-d array).
     * 
     * @param matrix
     * @return
     */
    private static double[] unrollMatrix(double[][] matrix) {
        int rows = matrix.length;
        int cols = matrix[0].length;
        double[] unrolledMatrix = new double[rows * cols];

        int k = 0;
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                unrolledMatrix[k] = matrix[i][j];
                k++;
            }
        }
        return unrolledMatrix;
    }

    /**
     * Copy a matrix into an array, so that rows are represented consecutively in the array. (RServe has no interface
     * for passing a 2-d array).
     * 
     * @param matrix
     * @return array representation of the matrix.
     */
    private static double[] unrollMatrix(DoubleMatrix<?, ?> matrix) {
        // unroll the matrix into an array Unfortunately this makes a
        // copy of the data...and R will probably make yet
        // another copy. If there was a way to get the raw element array from the DoubleMatrixNamed, that would
        // be better.
        int rows = matrix.rows();
        int cols = matrix.columns();
        double[] unrolledMatrix = new double[rows * cols];

        int k = 0;
        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < cols; j++) {
                unrolledMatrix[k] = matrix.get(i, j);
                k++;
            }
        }
        return unrolledMatrix;
    }

    @Override
    public String assignFactor(List<String> strings) {
        String variableName = "f." + variableIdentityNumber(strings);
        return assignFactor(variableName, strings);
    }

    @Override
    public String assignFactor(String factorName, List<String> list) {
        String l = assignStringList(list);
        this.voidEval(factorName + "<-factor(" + l + ")");
        return factorName;
    }

    @Override
    public String assignMatrix(double[][] matrix) {
        String matrixVarName = "Matrix_" + variableIdentityNumber(matrix);
        int rows = matrix.length;
        int cols = matrix[0].length;
        if (rows == 0 || cols == 0)
            throw new IllegalArgumentException("Empty matrix?");
        double[] unrolledMatrix = unrollMatrix(matrix);
        assign("U" + matrixVarName, unrolledMatrix); // temporary
        voidEval(matrixVarName + "<-matrix(" + "U" + matrixVarName + ", nrow=" + rows + " , ncol=" + cols
                + ", byrow=TRUE)");
        voidEval("rm(U" + matrixVarName + ")"); // maybe this saves memory...

        return matrixVarName;
    }

    @Override
    public String assignMatrix(DoubleMatrix<?, ?> matrix) {
        return assignMatrix(matrix, StringValueTransformer.getInstance());
    }

    @Override
    public String assignMatrix(DoubleMatrix<?, ?> matrix, Transformer rowNameExtractor) {
        String matrixVarName = "Matrix_" + variableIdentityNumber(matrix);
        int rows = matrix.rows();
        int cols = matrix.columns();
        if (rows == 0 || cols == 0) {
            throw new IllegalArgumentException("Empty matrix?");
        }
        double[] unrolledMatrix = unrollMatrix(matrix);
        assert (unrolledMatrix != null);
        this.assign("U" + matrixVarName, unrolledMatrix);
        this.voidEval(matrixVarName + "<-matrix(" + "U" + matrixVarName + ", nrow=" + rows + ", ncol=" + cols
                + ", byrow=TRUE)");
        this.voidEval("rm(U" + matrixVarName + ")"); // maybe this saves memory...

        if (matrix.hasColNames() && matrix.hasRowNames())
            assignRowAndColumnNames(matrix, matrixVarName, rowNameExtractor);
        return matrixVarName;
    }

    @Override
    public String assignStringList(List<?> strings) {
        String variableName = "stringList." + variableIdentityNumber(strings);

        Object[] array = strings.toArray();
        String[] sa = new String[array.length];
        for (int i = 0; i < array.length; i++) {
            sa[i] = array[i].toString();
        }

        assign(variableName, sa);
        return variableName;
    }

    @Override
    public boolean booleanDoubleArrayEval(String command, String argName, double[] arg) {
        this.assign(argName, arg);
        REXP x = this.eval(command);
        if (x.isLogical()) {
            try {
                REXPLogical b = new REXPLogical(new boolean[1], new REXPList(x.asList()));
                return b.isTRUE()[0];
            } catch (REXPMismatchException e) {
                throw new RuntimeException(e);
            }
        }
        return false;
    }

    public ObjectMatrix<String, String, Object> dataFrameEval(String command) {

        REXP df = eval(command);

        try {

            RList dfl = df.asList();

            if (!df.getAttribute("class").asString().equals("data.frame")) {
                throw new IllegalArgumentException("Command did not return a dataframe");
            }

            String[] rowNames = df.getAttribute("row.names").asStrings();
            String[] colNames = df.getAttribute("names").asStrings();

            assert dfl.size() == colNames.length;

            ObjectMatrix<String, String, Object> result = new ObjectMatrixImpl<String, String, Object>(
                    rowNames.length, colNames.length);

            result.setRowNames(Arrays.asList(rowNames));
            result.setColumnNames(Arrays.asList(colNames));

            for (int i = 0; i < dfl.size(); i++) {
                REXP column = (REXP) dfl.get(i);

                if (column.isNumeric()) {
                    double[] asDoubles = column.asDoubles();

                    for (int j = 0; j < rowNames.length; j++) {
                        result.set(j, i, asDoubles[j]);
                    }

                } else {
                    String[] asStrings = column.asStrings();

                    for (int j = 0; j < rowNames.length; j++) {
                        result.set(j, i, asStrings[j]);
                    }

                }

            }

            return result;
        } catch (REXPMismatchException e) {

            throw new RuntimeException(e);

        }

    }

    @Override
    public String dataFrame(ObjectMatrix<String, String, Object> matrix) {

        /*
         * Extract columns, convert
         */

        List<String> colNames = matrix.getColNames();
        List<String> rowNames = matrix.getRowNames();

        assert colNames.size() == matrix.columns();

        String colV = assignStringList(colNames);
        String rowV = assignStringList(rowNames);

        List<String> terms = new ArrayList<String>();
        for (int j = 0; j < colNames.size(); j++) {

            Object[] column;

            Object v = matrix.getEntry(0, j);

            if (v instanceof Number) {
                column = new Double[matrix.rows()];
            } else if (v instanceof Boolean) {
                column = new String[matrix.rows()];
            } else if (v instanceof String) {
                column = new String[matrix.rows()];
            } else {
                throw new IllegalArgumentException(
                        "Sorry, can't make data frame from values of type " + v.getClass().getName());
            }

            for (int i = 0; i < matrix.rows(); i++) {

                Object value = matrix.getEntry(i, j);

                if (matrix.isMissing(i, j)) {
                    column[i] = null;
                } else if (value instanceof Number) {
                    column[i] = ((Number) value).doubleValue();
                } else if (value instanceof Boolean) {
                    column[i] = ((Boolean) value) ? "T" : "F";
                } else if (value instanceof String) {
                    column[i] = value;
                }
            }

            if (v instanceof Number) {
                assign(colNames.get(j), ArrayUtils.toPrimitive((Double[]) column));
                terms.add(colNames.get(j));
            } else if (v instanceof Boolean) {
                assignFactor(colNames.get(j), Arrays.asList((String[]) column));
                terms.add(colNames.get(j));
            } else if (v instanceof String) {
                assignFactor(colNames.get(j), Arrays.asList((String[]) column));
                terms.add(colNames.get(j));
            }

        }

        String varName = "df." + variableIdentityNumber(matrix);

        String command = varName + "<-data.frame(" + StringUtils.join(terms, ",") + ", row.names=" + rowV + " )";
        eval(command);
        eval("names(" + varName + ")<-" + colV);

        return varName;

    }

    @Override
    public double[] doubleArrayDoubleArrayEval(String command, String argName, double[] arg) {
        try {
            this.assign(argName, arg);
            RList l = this.eval(command).asList();
            return l.at(argName).asDoubles();
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public double[] doubleArrayEval(String command) {
        REXP r = this.eval(command);
        if (r == null) {
            return null;
        }

        if (!r.isNumeric()) {
            throw new RuntimeException("Command did not return numbers: " + command + ", result was: " + r);
        }

        try {
            return r.asDoubles();
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public double[] doubleArrayTwoDoubleArrayEval(String command, String argName, double[] arg, String argName2,
            double[] arg2) {
        this.assign(argName, arg);
        this.assign(argName2, arg2);
        try {
            return this.eval(command).asDoubles();
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public double doubleTwoDoubleArrayEval(String command, String argName, double[] arg, String argName2,
            double[] arg2) {
        this.assign(argName, arg);
        this.assign(argName2, arg2);
        REXP x = this.eval(command);
        try {
            return x.asDouble();
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public int[] intArrayEval(String command) {
        try {
            return eval(command).asIntegers();
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public LinearModelSummary linearModel(double[] data, ObjectMatrix<String, String, Object> d) {

        String datName = RandomStringUtils.randomAlphabetic(10);
        assign(datName, data);

        String df = dataFrame(d);

        String varNames = StringUtils.join(d.getColNames(), "+");

        String lmName = RandomStringUtils.randomAlphabetic(10);
        String command = lmName + "<-lm(" + datName + " ~ " + varNames + ", data=" + df + ", na.action=na.exclude)";
        voidEval(command);

        REXP lmsum = eval("summary(" + lmName + ")");
        REXP anova = eval("anova(" + lmName + ")");

        return new LinearModelSummary(lmsum, anova, d.getColNames().toArray(new String[] {}));

    }

    @Override
    @SuppressWarnings({ "unchecked", "cast" })
    public LinearModelSummary linearModel(double[] data, Map<String, List<?>> factors) {

        String datName = RandomStringUtils.randomAlphabetic(10);
        assign(datName, data);

        for (String factorName : factors.keySet()) {
            List<?> list = factors.get(factorName);
            if (list.iterator().next() instanceof String) {
                assignFactor(factorName, (List<String>) list);
            } else {
                // treat is as a numeric covariate
                List<Double> d = new ArrayList<Double>();
                for (Object object : list) {
                    d.add((Double) object);
                }

                assign(factorName, ArrayUtils.toPrimitive(d.toArray(new Double[] {})));
            }
        }

        String modelDeclaration = datName + " ~ " + StringUtils.join(factors.keySet(), "+");

        String lmName = RandomStringUtils.randomAlphabetic(10);
        String command = lmName + "<-lm(" + modelDeclaration + ", na.action=na.exclude)";
        voidEval(command);

        REXP lmsum = eval("summary(" + lmName + ")");
        REXP anova = eval("anova(" + lmName + ")");

        return new LinearModelSummary(lmsum, anova, factors.keySet().toArray(new String[] {}));

    }

    @Override
    public Map<String, LinearModelSummary> rowApplyLinearModel(String dataMatrixVarName, String modelFormula,
            String[] factorNames) {

        String lmres = "lmlist." + RandomStringUtils.randomAlphanumeric(10);

        loadScript(this.getClass().getResourceAsStream("/com/ironiacorp/statistics/r/linearModels.R"));
        String command = lmres + "<-rowlm(" + modelFormula + ", data.frame(" + dataMatrixVarName + ") )";
        this.voidEval(command);

        REXP rawLmSummaries = this.eval("lapply(" + lmres + ", function(x){ try(summary(x), silent=T)})");
        if (rawLmSummaries == null) {
            return null;
        }

        REXP rawAnova = this.eval("lapply(" + lmres + ", function(x){ try(anova(x), silent=T)})");

        Map<String, LinearModelSummary> result = new HashMap<String, LinearModelSummary>();
        try {
            RList rawLmList = rawLmSummaries.asList();
            if (rawLmList == null) {
                return null;
            }

            RList rawAnovaList = rawAnova.asList();
            if (rawAnovaList == null) {
                return null;
            }

            for (int i = 0; i < rawLmList.size(); i++) {
                REXP lmSummary = rawLmList.at(i);
                REXP anova = rawAnovaList.at(i);
                String elementIdentifier = rawLmList.keyAt(i);
                if (!lmSummary.isList() || !lmSummary.getAttribute("class").asString().equals("summary.lm")) {
                    result.put(elementIdentifier, new LinearModelSummary());
                } else {
                    result.put(elementIdentifier, new LinearModelSummary(lmSummary, anova, factorNames));
                }

            }
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }

        return result;
    }

    /**
     * There is a pretty annoying limitation of this. The file must contain only one statement. You can get around this
     * by using c(x<-1,x<-2). See testScript.R
     * 
     * @param is
     */
    protected void loadScript(InputStream is) {
        try {

            BufferedReader reader = new BufferedReader(new InputStreamReader(is));
            String line = null;
            StringBuilder buf = new StringBuilder();
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("#") || StringUtils.isBlank(line)) {
                    continue;
                }
                buf.append(StringUtils.trim(line) + "\n");
            }
            is.close();
            this.voidEval(buf.toString());

        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * FIXME only partly implemented, possibly not going to stay.
     */
    @SuppressWarnings("unchecked")
    public List<?> listEval(Class<?> listEntryType, String command) {

        REXP rexp = this.eval(command);

        List<Object> result = new ArrayList<Object>();
        try {
            if (!rexp.isVector()) {
                throw new IllegalArgumentException("Command did not return some kind of vector");
            }

            if (rexp instanceof REXPInteger) {
                double[][] asDoubleMatrix = rexp.asDoubleMatrix();
                for (double[] ds : asDoubleMatrix) {
                    result.add(ds);
                }

                if (rexp instanceof REXPFactor) {
                    // not sure what to do...
                }
            } else if (rexp instanceof REXPGenericVector) {
                REXPGenericVector v = (REXPGenericVector) rexp;
                List<?> tmp = new ArrayList<Object>(v.asList().values());

                if (tmp.size() == 0)
                    return tmp;

                for (Object t : tmp) {
                    String clazz = ((REXP) t).getAttribute("class").asString();
                    /*
                     * FIXME!!!!
                     */
                    if (clazz.equals("htest")) {
                        try {
                            result.add(new HTest(((REXP) t).asList()));
                        } catch (REXPMismatchException e) {
                            result.add(new HTest());
                        }
                    } else if (clazz.equals("lm")) {
                        throw new UnsupportedOperationException();
                    } else {
                        result.add(new HTest()); // e.g. failed result or something we don't know about yet
                    }
                    /*
                     * todo: support lm objects, anova objects others? pair.htest?
                     */
                }

            } else if (rexp instanceof REXPDouble) {
                double[][] asDoubleMatrix = rexp.asDoubleMatrix();
                for (double[] ds : asDoubleMatrix) {
                    result.add(ds);
                }

            } else if (rexp instanceof REXPList) {
                if (rexp.isPairList()) {
                    // log.info( "pairlist" ); always true for REXPList.
                }
                if (rexp.isLanguage()) {
                    throw new UnsupportedOperationException(
                            "Don't know how to deal with vector type of " + rexp.getClass().getName());
                } else {
                    result = new ArrayList<Object>(rexp.asList().values());
                }
            } else {
                throw new UnsupportedOperationException(
                        "Don't know how to deal with vector type of " + rexp.getClass().getName());
            }
            return result;
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }

    }

    @Override
    public boolean loadLibrary(String libraryName) {
        List<String> libraries = stringListEval("installed.packages()[,1]");
        if (!libraries.contains(libraryName)) {
            return false;
        }

        try {
            voidEval("library(" + libraryName + ")");
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    @Override
    public OneWayAnovaResult oneWayAnova(double[] data, List<String> factor) {
        String f = assignFactor(factor);
        StringBuffer command = new StringBuffer();

        assign("foo", data);

        String modelDeclaration;

        modelDeclaration = "foo  ~ " + f;

        command.append("anova(aov(" + modelDeclaration + "))");

        REXP eval = eval(command.toString());

        return new OneWayAnovaResult(eval);
    }

    @Override
    public Map<String, OneWayAnovaResult> oneWayAnovaEval(String command) {
        REXP rawResult = this.eval(command);

        if (rawResult == null) {
            return null;
        }

        Map<String, OneWayAnovaResult> result = new HashMap<String, OneWayAnovaResult>();
        try {
            RList mainList = rawResult.asList();
            if (mainList == null) {
                return null;
            }

            for (int i = 0; i < mainList.size(); i++) {
                REXP anovaTable = mainList.at(i);
                String elementIdentifier = mainList.keyAt(i);
                if (!anovaTable.isList() || !anovaTable.hasAttribute("row.names")) {
                    result.put(elementIdentifier, new OneWayAnovaResult());
                    continue;
                }
                result.put(elementIdentifier, new OneWayAnovaResult(anovaTable));
            }
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }

        return result;

    }

    @Override
    public void remove(String variableName) {
        this.voidEval("rm(" + variableName + ")");
    }

    @Override
    public String stringEval(String command) {
        try {
            return this.eval(command).asString();
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public List<String> stringListEval(String command) {
        try {
            REXP eval = this.eval(command);

            RList v;
            List<String> results = new ArrayList<String>();
            if (eval instanceof REXPString) {
                String[] strs = ((REXPString) eval).asStrings();
                for (String string : strs) {
                    results.add(string);
                }
            } else {
                v = eval.asList();
                for (Iterator<?> it = v.iterator(); it.hasNext();) {
                    results.add(((REXPString) it.next()).asString());
                }
            }

            return results;
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public TwoWayAnovaResult twoWayAnova(double[] data, List<String> factor1, List<String> factor2,
            boolean includeInteraction) {

        String factorA = assignFactor(factor1);
        String factorB = assignFactor(factor2);
        StringBuffer command = new StringBuffer();

        assign("foo", data);

        String modelDeclaration;

        if (includeInteraction) {
            modelDeclaration = "foo  ~ " + factorA + "*" + factorB;
        } else {
            modelDeclaration = "foo  ~ " + factorA + "+" + factorB;
        }

        command.append("anova(aov(" + modelDeclaration + "))");

        REXP eval = eval(command.toString());

        return new TwoWayAnovaResult(eval);
    }

    @Override
    public Map<String, TwoWayAnovaResult> twoWayAnovaEval(String command, boolean withInteractions) {
        REXP rawResult = this.eval(command);

        if (rawResult == null) {
            return null;
        }

        Map<String, TwoWayAnovaResult> result = new HashMap<String, TwoWayAnovaResult>();
        try {
            RList mainList = rawResult.asList();
            if (mainList == null) {
                return null;
            }

            for (int i = 0; i < mainList.size(); i++) {
                REXP anovaTable = mainList.at(i);
                String elementIdentifier = mainList.keyAt(i);
                if (!anovaTable.isList() || !anovaTable.hasAttribute("row.names")) {
                    result.put(elementIdentifier, new TwoWayAnovaResult());
                    continue;
                }
                result.put(elementIdentifier, new TwoWayAnovaResult(anovaTable));
            }
        } catch (REXPMismatchException e) {
            throw new RuntimeException(e);
        }

        return result;

    }

    /**
     * @param matrix
     * @param matrixVarName
     * @param rowNameExtractor e.g. you could use StringValueTransformer.getInstance()
     * @return
     */
    private void assignRowAndColumnNames(DoubleMatrix<?, ?> matrix, String matrixVarName,
            Transformer rowNameExtractor) {

        List<Object> rown = new ArrayList<Object>();
        for (Object o : matrix.getRowNames()) {
            rown.add(rowNameExtractor.transform(o));
        }

        String rowNameVar = assignStringList(rown);
        String colNameVar = assignStringList(matrix.getColNames());

        String dimcmd = "dimnames(" + matrixVarName + ")<-list(" + rowNameVar + ", " + colNameVar + ")";
        this.voidEval(dimcmd);
    }

    /**
     * Get the dimnames associated with the matrix variable row and column names, if any, and assign them to the
     * resultObject NamedMatrix
     * 
     * @param variableName a matrix in R
     * @param resultObject corresponding NamedMatrix we are filling in.
     */
    // TODO: Consider moving all common code to here
    protected abstract void retrieveRowAndColumnNames(String variableName,
            DoubleMatrix<String, String> resultObject);

    // TODO: Consider moving all common code to here
    public abstract DoubleMatrix<String, String> retrieveMatrix(String variableName);

}