ubic.gemma.analysis.preprocess.batcheffects.AffyScanDateExtractor.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.analysis.preprocess.batcheffects.AffyScanDateExtractor.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2011 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package ubic.gemma.analysis.preprocess.batcheffects;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.basecode.util.FileTools;

/**
 * Extract the scan date from Affymetrix CEL files. Handles both version 3 (ASCII) and 4 (binary) files.
 * <p />
 * {@link http://www.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/cel.html} and {@link http
 * ://www.affymetrix.com/support/developer/powertools/changelog/gcos-agcc/generic.html}
 * <p/>
 * Note that the Affymetrix documentation does not mention a date, explicitly, but it's in the "DatHeader"
 * 
 * @author paul
 * @version $Id: AffyScanDateExtractor.java,v 1.9 2012/05/27 02:58:42 paul Exp $
 */
public class AffyScanDateExtractor extends BaseScanDateExtractor {

    private static Log log = LogFactory.getLog(AffyScanDateExtractor.class);

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.preprocess.batcheffects.ScanDateExtractor#extract(java.io.InputStream)
     */
    @Override
    public Date extract(InputStream is) {

        DataInputStream str = new DataInputStream(is);
        BufferedReader reader = null;
        Date date = null;

        try {
            int magic = readByteLittleEndian(str);
            if (magic == 64) {

                int version = readIntLittleEndian(str);

                if (version != 4) {
                    // it's always supposed to be.
                    throw new IllegalStateException("Affymetrix CEL format not recognized: " + version);
                }

                log.debug(readShortLittleEndian(str)); // numrows
                log.debug(readShortLittleEndian(str)); // numcols
                log.debug(readIntLittleEndian(str)); // numcells

                int headerLen = readShortLittleEndian(str);

                if (headerLen == 0) {
                    // throw new IllegalStateException( "Zero header length read" );
                    headerLen = 800;
                }

                log.debug(headerLen);

                StringBuilder buf = new StringBuilder();

                for (int i = 0; i < headerLen; i++) {
                    buf.append(new String(new byte[] { str.readByte() }, "US-ASCII"));
                }

                String[] headerLines = StringUtils.split(buf.toString(), "\n");

                for (String string : headerLines) {
                    if (string.startsWith("DatHeader")) {
                        date = parseStandardFormat(string);
                        break;
                    }
                }
            } else if (magic == 59) {

                // Command Console format
                int version = readUnsignedByteLittleEndian(str);
                if (version != 1) {
                    throw new IllegalStateException("Affymetrix CEL format not recognized: " + version);
                }
                log.debug(readIntLittleEndian(str)); // number of data groups
                log.debug(readIntLittleEndian(str)); // file position of first group
                String datatypeIdentifier = readGCOSString(str);

                log.debug(datatypeIdentifier);

                String guid = readGCOSString(str);

                log.debug(guid);

                reader = new BufferedReader(new InputStreamReader(is, "UTF-16BE"));
                String line = null;
                int count = 0;
                while ((line = reader.readLine()) != null) {
                    log.debug(line);
                    if (line.contains("affymetrix-scan-date")) {
                        date = parseISO8601(line);
                    }
                    if (date != null || ++count > 100) {
                        reader.close();
                        break;
                    }
                }

                log.debug(date);

            } else {

                /*
                 * assume version 3 plain text.
                 */
                reader = new BufferedReader(new InputStreamReader(is));
                String line = null;
                int count = 0;
                while ((line = reader.readLine()) != null) {
                    // log.info( line );
                    if (line.startsWith("DatHeader")) {
                        date = parseStandardFormat(line);
                    }
                    if (date != null || ++count > 100) {
                        reader.close();
                        break;
                    }
                }
            }

            if (date == null) {
                throw new IllegalStateException("Failed to find date");
            }
            log.debug(date);

            return date;

        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            try {
                str.close();
                if (reader != null) {
                    reader.close();
                }
            } catch (IOException e) {
                log.error("Failed to close open file handle: " + e.getMessage());
            }
        }

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.preprocess.batcheffects.ScanDateExtractor#extract(java.lang.String)
     */
    @Override
    public Date extract(String fileName) {
        try {
            return extract(FileTools.getInputStreamFromPlainOrCompressedFile(fileName));
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private String readGCOSString(DataInputStream str) throws IOException {
        int fieldLength = readIntLittleEndian(str);
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < fieldLength; i++) {
            if (str.available() == 0)
                throw new IOException("Reached end of file without string end");
            buf.append(new String(new byte[] { str.readByte() }, "US-ASCII"));
        }
        String field = buf.toString();
        return field;
    }

    /**
     * @param dis
     * @return
     * @throws IOException
     */
    private int readIntLittleEndian(DataInputStream dis) throws IOException {
        return dis.readInt();
    }

    private int readByteLittleEndian(DataInputStream dis) throws IOException {
        return dis.readByte();
    }

    private int readUnsignedByteLittleEndian(DataInputStream dis) throws IOException {
        return dis.readUnsignedByte();
    }

    private int readShortLittleEndian(DataInputStream dis) throws IOException {
        return dis.readShort();
    }

}