edu.mit.ll.graphulo.pig.backend.AbstractGraphuloStorage.java Source code

Introduction

Here is the source code for edu.mit.ll.graphulo.pig.backend.AbstractGraphuloStorage.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package edu.mit.ll.graphulo.pig.backend;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriterConfig;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.Pair;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.LoadCaster;
import org.apache.pig.LoadFunc;
import org.apache.pig.LoadStoreCaster;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.accumulo.AbstractAccumuloStorage;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.backend.hadoop.hbase.HBaseBinaryConverter;
import org.apache.pig.builtin.Utf8StorageConverter;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.UDFContext;
import org.joda.time.DateTime;

/**
 * A LoadStoreFunc for retrieving data from and storing data to Graphulo
 * 
 * A Key/Val pair will be returned as tuples: (key, colfam, colqual, colvis,
 * timestamp, value). All fields except timestamp are DataByteArray, timestamp
 * is a long.
 * 
 * Tuples can be written in 2 forms: (key, colfam, colqual, colvis, value) OR
 * (key, colfam, colqual, value)
 * 
 */
public abstract class AbstractGraphuloStorage extends AbstractAccumuloStorage implements StoreFuncInterface {
    private static final Log log = LogFactory.getLog(AbstractGraphuloStorage.class);

    protected static final char COLON = ':', COMMA = ',';
    protected static final String ASTERISK = "*";

    private static final String INPUT_PREFIX = AccumuloInputFormat.class.getSimpleName();
    private static final String OUTPUT_PREFIX = AccumuloOutputFormat.class.getSimpleName();

    private final static String STRING_CASTER = "UTF8StorageConverter";
    private final static String BYTE_CASTER = "GraphuloBinaryConverter";
    private final static String CASTER_PROPERTY = "pig.graphulo.caster";

    protected final GraphuloStorageOptions storageOptions;
    protected final CommandLine commandLine;

    private RecordReader<Key, Value> reader;
    private RecordWriter<Text, Mutation> writer;

    protected String inst;
    protected String zookeepers;
    protected String user;
    protected String password;
    protected String table;
    protected Text tableName;
    protected Authorizations authorizations;

    protected List<Column> columns;

    protected String start = null;
    protected String end = null;

    // Defaults from BatchWriterConfig
    protected int maxWriteThreads = 3;
    protected long maxMutationBufferSize = 50 * 1024 * 1024l;
    protected long maxLatency = Long.MAX_VALUE;

    protected String columnSeparator = ",";
    protected boolean ignoreWhitespace = true;

    protected LoadStoreCaster caster;
    protected ResourceSchema schema;
    protected String contextSignature = null;

    public AbstractGraphuloStorage(String columns, String args) throws ParseException, IOException {
        super(columns, args);
        storageOptions = new GraphuloStorageOptions();
        commandLine = storageOptions.getCommandLine(args);

        // Extract any command line args
        extractArgs(commandLine, storageOptions);

        // Split out the user provided columns
        parseColumns(columns);
    }

    /**
     * Initializes {@link #columnDefs} and splits columns on {@link #COMMA}
     * 
     * @param columns
     *            CSV of columns
     */
    private void parseColumns(String columnStr) {
        columns = new LinkedList<Column>();
        if (ignoreWhitespace) {
            columnStr = StringUtils.strip(columnStr);
        }

        if (!columnStr.isEmpty()) {
            for (String column : StringUtils.split(columnStr, columnSeparator)) {
                columns.add(new Column(ignoreWhitespace ? StringUtils.strip(column) : column));
            }
        } else {
            // Preserve original functionality for empty columns to fetch all
            // data in a map
            columns.add(new Column("*"));
        }
    }

    /**
     * Extract arguments passed into the constructor to avoid the URI
     * 
     * @param cli
     * @param opts
     */
    protected void extractArgs(CommandLine cli, GraphuloStorageOptions opts) throws IOException {
        if (opts.hasAuthorizations(cli)) {
            authorizations = opts.getAuthorizations(cli);
        }

        this.start = cli.getOptionValue(GraphuloStorageOptions.START_ROW_OPTION.getOpt(), null);
        this.end = cli.getOptionValue(GraphuloStorageOptions.END_ROW_OPTION.getOpt(), null);

        if (cli.hasOption(GraphuloStorageOptions.MAX_LATENCY_OPTION.getOpt())) {
            this.maxLatency = opts.getInt(cli, GraphuloStorageOptions.MAX_LATENCY_OPTION);
        }

        if (cli.hasOption(GraphuloStorageOptions.WRITE_THREADS_OPTION.getOpt())) {
            this.maxWriteThreads = opts.getInt(cli, GraphuloStorageOptions.WRITE_THREADS_OPTION);
        }

        if (cli.hasOption(GraphuloStorageOptions.MUTATION_BUFFER_SIZE_OPTION.getOpt())) {
            this.maxMutationBufferSize = opts.getLong(cli, GraphuloStorageOptions.MUTATION_BUFFER_SIZE_OPTION);
        }

        Properties clientSystemProps = UDFContext.getUDFContext().getClientSystemProps();
        String defaultCaster = STRING_CASTER;
        if (null != clientSystemProps) {
            defaultCaster = clientSystemProps.getProperty(CASTER_PROPERTY, defaultCaster);
        }

        String casterOption = cli.getOptionValue("caster", defaultCaster);
        if (STRING_CASTER.equalsIgnoreCase(casterOption)) {
            caster = new Utf8StorageConverter();
        } else if (BYTE_CASTER.equalsIgnoreCase(casterOption)) {
            caster = new HBaseBinaryConverter();
        } else {
            try {
                caster = (LoadStoreCaster) PigContext.instantiateFuncFromSpec(casterOption);
            } catch (ClassCastException e) {
                log.error("Configured caster does not implement LoadCaster interface.");
                throw new IOException(e);
            } catch (RuntimeException e) {
                log.error("Configured caster class not found.", e);
                throw new IOException(e);
            }
        }
        log.debug("Using caster " + caster.getClass());

        if (cli.hasOption(GraphuloStorageOptions.COLUMN_SEPARATOR_OPTION.getOpt())) {
            columnSeparator = cli.getOptionValue(GraphuloStorageOptions.COLUMN_SEPARATOR_OPTION.getOpt());
        }

        if (cli.hasOption(GraphuloStorageOptions.COLUMN_IGNORE_WHITESPACE_OPTION.getOpt())) {
            String value = cli.getOptionValue(GraphuloStorageOptions.COLUMN_IGNORE_WHITESPACE_OPTION.getOpt());
            if ("false".equalsIgnoreCase(value)) {
                ignoreWhitespace = false;
            } else if ("true".equalsIgnoreCase(value)) {
                ignoreWhitespace = true;
            } else {
                log.warn("Ignoring unknown value for "
                        + GraphuloStorageOptions.COLUMN_IGNORE_WHITESPACE_OPTION.getOpt() + ": " + value);
            }
        }
    }

    protected CommandLine getCommandLine() {
        return commandLine;
    }

    protected Map<String, String> getInputFormatEntries(Configuration conf) {
        return getEntries(conf, INPUT_PREFIX);
    }

    protected Map<String, String> getOutputFormatEntries(Configuration conf) {
        return getEntries(conf, OUTPUT_PREFIX);
    }

    /**
     * Removes the given values from the configuration, accounting for changes
     * in the Configuration API given the version of Hadoop being used.
     * 
     * @param conf
     * @param entriesToUnset
     */
    protected void unsetEntriesFromConfiguration(Configuration conf, Map<String, String> entriesToUnset) {
        boolean configurationHasUnset = true;
        try {
            conf.getClass().getMethod("unset", String.class);
        } catch (NoSuchMethodException e) {
            configurationHasUnset = false;
        } catch (SecurityException e) {
            configurationHasUnset = false;
        }

        // Only Hadoop >=1.2.0 and >=0.23 actually contains the method
        // Configuration#unset
        if (configurationHasUnset) {
            simpleUnset(conf, entriesToUnset);
        } else {
            // If we're running on something else, we have to remove everything
            // and re-add it
            clearUnset(conf, entriesToUnset);
        }
    }

    /**
     * Unsets elements in the Configuration using the unset method
     * 
     * @param conf
     * @param entriesToUnset
     */
    protected void simpleUnset(Configuration conf, Map<String, String> entriesToUnset) {
        try {
            Method unset = conf.getClass().getMethod("unset", String.class);

            for (String key : entriesToUnset.keySet()) {
                unset.invoke(conf, key);
            }
        } catch (NoSuchMethodException e) {
            log.error("Could not invoke Configuration.unset method", e);
            throw new RuntimeException(e);
        } catch (IllegalAccessException e) {
            log.error("Could not invoke Configuration.unset method", e);
            throw new RuntimeException(e);
        } catch (IllegalArgumentException e) {
            log.error("Could not invoke Configuration.unset method", e);
            throw new RuntimeException(e);
        } catch (InvocationTargetException e) {
            log.error("Could not invoke Configuration.unset method", e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Replaces the given entries in the configuration by clearing the
     * Configuration and re-adding the elements that aren't in the Map of
     * entries to unset
     * 
     * @param conf
     * @param entriesToUnset
     */
    protected void clearUnset(Configuration conf, Map<String, String> entriesToUnset) {
        // Gets a copy of the entries
        Iterator<Entry<String, String>> originalEntries = conf.iterator();
        conf.clear();

        while (originalEntries.hasNext()) {
            Entry<String, String> originalEntry = originalEntries.next();

            // Only re-set() the pairs that aren't in our collection of keys to
            // unset
            if (!entriesToUnset.containsKey(originalEntry.getKey())) {
                conf.set(originalEntry.getKey(), originalEntry.getValue());
            }
        }
    }

    @Override
    public Tuple getNext() throws IOException {
        try {
            // load the next pair
            if (!reader.nextKeyValue())
                return null;

            Key key = (Key) reader.getCurrentKey();
            Value value = (Value) reader.getCurrentValue();
            assert key != null && value != null;
            return getTuple(key, value);
        } catch (InterruptedException e) {
            throw new IOException(e.getMessage());
        }
    }

    protected abstract Tuple getTuple(Key key, Value value) throws IOException;

    @Override
    @SuppressWarnings("rawtypes")
    public InputFormat getInputFormat() {
        return new AccumuloInputFormat();
    }

    @Override
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public void prepareToRead(RecordReader reader, PigSplit split) {
        this.reader = reader;
    }

    private void setLocationFromUri(String location) throws IOException {
        // ex:
        // accumulo://table1?instance=myinstance&user=root&password=secret&zookeepers=127.0.0.1:2181&auths=PRIVATE,PUBLIC&fetch_columns=col1:cq1,col2:cq2&start=abc&end=z
        String columns = "", auths = "";
        try {
            if (!location.startsWith("accumulo://"))
                throw new Exception("Bad scheme.");
            String[] urlParts = location.split("\\?");
            if (urlParts.length > 1) {
                for (String param : urlParts[1].split("&")) {
                    String[] pair = param.split("=");
                    if (pair[0].equals("instance"))
                        inst = pair[1];
                    else if (pair[0].equals("user"))
                        user = pair[1];
                    else if (pair[0].equals("password"))
                        password = pair[1];
                    else if (pair[0].equals("zookeepers"))
                        zookeepers = pair[1];
                    else if (pair[0].equals("auths"))
                        auths = pair[1];
                    else if (pair[0].equals("fetch_columns"))
                        columns = pair[1];
                    else if (pair[0].equals("start"))
                        start = pair[1];
                    else if (pair[0].equals("end"))
                        end = pair[1];
                    else if (pair[0].equals("write_buffer_size_bytes"))
                        maxMutationBufferSize = Long.parseLong(pair[1]);
                    else if (pair[0].equals("write_threads"))
                        maxWriteThreads = Integer.parseInt(pair[1]);
                    else if (pair[0].equals("write_latency_ms"))
                        maxLatency = Long.parseLong(pair[1]);
                }
            }
            String[] parts = urlParts[0].split("/+");
            table = parts[1];
            tableName = new Text(table);

            if (null == authorizations && auths == null) {
                authorizations = new Authorizations();
            } else {
                authorizations = new Authorizations(StringUtils.split(auths, COMMA));
            }

            if (!StringUtils.isEmpty(columns)) {
                parseColumns(columns);
            }

        } catch (Exception e) {
            throw new IOException(
                    "Expected 'accumulo://<table>[?instance=<instanceName>&user=<user>&password=<password>&zookeepers=<zookeepers>&auths=<authorizations>&"
                            + "[start=startRow,end=endRow,fetch_columns=[cf1:cq1,cf2:cq2,...],write_buffer_size_bytes=10000000,write_threads=10,write_latency_ms=30000]]': "
                            + e.getMessage());
        }
    }

    protected RecordWriter<Text, Mutation> getWriter() {
        return writer;
    }

    /**
     * Extract elements from the Configuration whose keys match the given prefix
     * 
     * @param conf
     * @param prefix
     * @return
     */
    protected Map<String, String> getEntries(Configuration conf, String prefix) {
        Map<String, String> entries = new HashMap<String, String>();

        for (Entry<String, String> entry : conf) {
            String key = entry.getKey();
            if (key.startsWith(prefix)) {
                entries.put(key, entry.getValue());
            }
        }

        return entries;
    }

    @Override
    public void setLocation(String location, Job job) throws IOException {
        setLocationFromUri(location);

        loadDependentJars(job.getConfiguration());

        Map<String, String> entries = getInputFormatEntries(job.getConfiguration());
        unsetEntriesFromConfiguration(job.getConfiguration(), entries);

        try {
            AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(password));
        } catch (AccumuloSecurityException e) {
            throw new IOException(e);
        }

        AccumuloInputFormat.setInputTableName(job, table);
        AccumuloInputFormat.setScanAuthorizations(job, authorizations);
        AccumuloInputFormat.setZooKeeperInstance(job, inst, zookeepers);

        List<Pair<Text, Text>> inputFormatColumns = new LinkedList<Pair<Text, Text>>();
        int colfamPrefix = 0;

        for (Column c : columns) {
            switch (c.getType()) {
            case LITERAL:
                // Pull the colf[:colq] individually
                inputFormatColumns.add(makePair(c.getColumnFamily(), c.getColumnQualifier()));
                break;
            case COLFAM_PREFIX:
                // Some colfams
                colfamPrefix++;
                break;
            case COLQUAL_PREFIX:
                // Some colquals in a given colfam
                inputFormatColumns.add(makePair(c.getColumnFamily(), null));
                break;
            default:
                log.info("Ignoring unhandled column type");
                break;
            }
        }

        // If we have colfam prefixes, we have to pull all columns and filter on
        // client-side
        // TODO Create an iterator that lets us push-down *all* of the filter
        // logic
        if (0 == colfamPrefix && !inputFormatColumns.isEmpty()) {
            AccumuloInputFormat.fetchColumns(job, inputFormatColumns);
        }

        Collection<Range> ranges = Collections.singleton(new Range(start, end));

        log.info("Scanning Graphulo for " + ranges + " for table " + table);

        AccumuloInputFormat.setRanges(job, ranges);

        configureInputFormat(job);
    }

    protected Pair<Text, Text> makePair(String first, String second) {
        return new Pair<Text, Text>((null == first) ? null : new Text(first),
                (null == second) ? null : new Text(second));
    }

    /**
     * Ensure that Graphulo's dependent jars are added to the Configuration to
     * alleviate the need for clients to REGISTER dependency jars.
     * 
     * @param job
     *            The Mapreduce Job object
     * @throws IOException
         
    protected void loadDependentJars(Configuration conf) throws IOException {
    // Thank you, HBase.
    Utils.addDependencyJars(conf,
            org.apache.accumulo.trace.instrument.Tracer.class,
            org.apache.accumulo.core.client.Instance.class,
            org.apache.accumulo.fate.Fate.class,
            org.apache.zookeeper.ZooKeeper.class,
            org.apache.thrift.TServiceClient.class);
    }
        
    /**
     * Method to allow specific implementations to add more elements to the Job
     * for reading data from Graphulo.
     * 
     * @param job
     */
    protected void configureInputFormat(Job job) {
    }

    /**
     * Method to allow specific implementations to add more elements to the Job
     * for writing data to Graphulo.
     * 
     * @param job
     */
    protected void configureOutputFormat(Job job) {
    }

    @Override
    public String relativeToAbsolutePath(String location, Path curDir) throws IOException {
        return location;
    }

    @Override
    public void setUDFContextSignature(String signature) {
        this.contextSignature = signature;
    }

    /* StoreFunc methods */
    public void setStoreFuncUDFContextSignature(String signature) {
        this.contextSignature = signature;

    }

    /**
     * Returns UDFProperties based on <code>contextSignature</code>.
     */
    protected Properties getUDFProperties() {
        return UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { contextSignature });
    }

    public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException {
        return relativeToAbsolutePath(location, curDir);
    }

    public void setStoreLocation(String location, Job job) throws IOException {
        setLocationFromUri(location);

        loadDependentJars(job.getConfiguration());

        Map<String, String> entries = getOutputFormatEntries(job.getConfiguration());
        unsetEntriesFromConfiguration(job.getConfiguration(), entries);

        try {
            AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(password));
        } catch (AccumuloSecurityException e) {
            throw new IOException(e);
        }

        AccumuloOutputFormat.setCreateTables(job, true);
        AccumuloOutputFormat.setZooKeeperInstance(job, inst, zookeepers);

        BatchWriterConfig bwConfig = new BatchWriterConfig();
        bwConfig.setMaxLatency(maxLatency, TimeUnit.MILLISECONDS);
        bwConfig.setMaxMemory(maxMutationBufferSize);
        bwConfig.setMaxWriteThreads(maxWriteThreads);
        AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

        log.info("Writing data to " + table);

        configureOutputFormat(job);
    }

    @SuppressWarnings("rawtypes")
    public OutputFormat getOutputFormat() {
        return new AccumuloOutputFormat();
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    public void prepareToWrite(RecordWriter writer) {
        this.writer = writer;
    }

    protected abstract Collection<Mutation> getMutations(Tuple tuple) throws ExecException, IOException;

    public void putNext(Tuple tuple) throws ExecException, IOException {
        Collection<Mutation> muts = getMutations(tuple);
        for (Mutation mut : muts) {
            try {
                getWriter().write(tableName, mut);
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }
    }

    public void cleanupOnFailure(String failure, Job job) {
    }

    public void cleanupOnSuccess(String location, Job job) {
    }

    @Override
    public void checkSchema(ResourceSchema s) throws IOException {
        if (!(caster instanceof LoadStoreCaster)) {
            log.error("Caster must implement LoadStoreCaster for writing to Graphulo.");
            throw new IOException("Bad Caster " + caster.getClass());
        }
        schema = s;
        getUDFProperties().setProperty(contextSignature + "_schema", ObjectSerializer.serialize(schema));
    }

    protected Text tupleToText(Tuple tuple, int i, ResourceFieldSchema[] fieldSchemas) throws IOException {
        Object o = tuple.get(i);
        byte type = schemaToType(o, i, fieldSchemas);

        return objToText(o, type);
    }

    protected Text objectToText(Object o, ResourceFieldSchema fieldSchema) throws IOException {
        byte type = schemaToType(o, fieldSchema);

        return objToText(o, type);
    }

    protected byte schemaToType(Object o, ResourceFieldSchema fieldSchema) {
        return (fieldSchema == null) ? DataType.findType(o) : fieldSchema.getType();
    }

    protected byte schemaToType(Object o, int i, ResourceFieldSchema[] fieldSchemas) {
        return (fieldSchemas == null) ? DataType.findType(o) : fieldSchemas[i].getType();
    }

    protected byte[] tupleToBytes(Tuple tuple, int i, ResourceFieldSchema[] fieldSchemas) throws IOException {
        Object o = tuple.get(i);
        byte type = schemaToType(o, i, fieldSchemas);

        return objToBytes(o, type);

    }

    protected Text objToText(Object o, byte type) throws IOException {
        byte[] bytes = objToBytes(o, type);

        if (null == bytes) {
            log.warn("Creating empty text from null value");
            return new Text();
        }

        return new Text(bytes);
    }

    @SuppressWarnings("unchecked")
    protected byte[] objToBytes(Object o, byte type) throws IOException {
        if (o == null)
            return null;
        switch (type) {
        case DataType.BYTEARRAY:
            return ((DataByteArray) o).get();
        case DataType.BAG:
            return caster.toBytes((DataBag) o);
        case DataType.CHARARRAY:
            return caster.toBytes((String) o);
        case DataType.DOUBLE:
            return caster.toBytes((Double) o);
        case DataType.FLOAT:
            return caster.toBytes((Float) o);
        case DataType.INTEGER:
            return caster.toBytes((Integer) o);
        case DataType.LONG:
            return caster.toBytes((Long) o);
        case DataType.BIGINTEGER:
            return caster.toBytes((BigInteger) o);
        case DataType.BIGDECIMAL:
            return caster.toBytes((BigDecimal) o);
        case DataType.BOOLEAN:
            return caster.toBytes((Boolean) o);
        case DataType.DATETIME:
            return caster.toBytes((DateTime) o);

        // The type conversion here is unchecked.
        // Relying on DataType.findType to do the right thing.
        case DataType.MAP:
            return caster.toBytes((Map<String, Object>) o);

        case DataType.NULL:
            return null;
        case DataType.TUPLE:
            return caster.toBytes((Tuple) o);
        case DataType.ERROR:
            throw new IOException("Unable to determine type of " + o.getClass());
        default:
            throw new IOException("Unable to find a converter for tuple field " + o);
        }
    }

    @Override
    public LoadCaster getLoadCaster() throws IOException {
        return caster;
    }
}