Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package edu.msViz.mzTree; import com.opencsv.CSVReader; import edu.msViz.mzTree.ImportState.ImportStatus; import edu.msViz.mzTree.storage.StorageFacade; import edu.msViz.mzTree.storage.StorageFacadeFactory; import edu.msViz.mzTree.summarization.SummarizationStrategy; import edu.msViz.mzTree.summarization.SummarizationStrategyFactory; import edu.msViz.mzTree.summarization.SummarizationStrategyFactory.Strategy; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.DataFormatException; import javax.xml.stream.XMLStreamException; import com.opencsv.CSVWriter; import java.io.FileReader; import java.io.FileWriter; import java.util.Collections; import java.util.List; import org.apache.commons.lang.StringUtils; /** * R-Tree implementation for storing and accessing MS data * @author rob */ public final class MzTree { private static final Logger LOGGER = Logger.getLogger(MzTree.class.getName()); // there is really no good reason for this... public static final int DEFAULT_TREE_HEIGHT = 4; // number of points per node should assume an whole number of disk blocks // some disks ship with blocks of size 4096B, some with blocks of 512B. luckily 512 is a multiple of 4096 // an MsDataPoint requires 22B, so we need the LCM of 4096 and 22, which is 45056 // 45056B is 11 4096B disk blocks, 88 512B disk blocks, and 2048 points. public static final int NUM_POINTS_PER_NODE = 8192; // minimum branching factor for a partitioned load // ensures that we don't have a tiny branching factor that produces // a very tall tree (such as 2...) private static final int MINIMUM_BRANCHING_FACTOR = 4; // fraction of heap alloted for MsDataPoints private static final float HEAP_FRACTION = .5f; // the branching factor of the tree (number of children per root/hidden node) public int branchingFactor; // the height of the tree public short treeHeight; // the tree's head node public MzTreeNode head; // the summarizer used to form the summarized collections of data // at the root/intermediate nodes public SummarizationStrategy summarizer; // point map, keyed by pointID for unified point storage public PointCache pointCache; // disk storage implementation public StorageFacade dataStorage; // static storage interface choice private static final StorageFacadeFactory.Facades STORAGE_INTERFACE_CHOICE = StorageFacadeFactory.Facades.Hybrid; // import progress monitor private ImportState importState; /** * No argument constructor for basic initialization * @param importMonitor import progress monitor */ public MzTree() { this.importState = new ImportState(); } // A custom ConvertDestinationProvider can be set on the MzTree // which will be used to determine where to save a converted MzTree file on import of csv or mzML public interface ConvertDestinationProvider { Path getDestinationPath(Path suggestedFilePath) throws Exception; } private ConvertDestinationProvider convertDestinationProvider = null; public void setConvertDestinationProvider(ConvertDestinationProvider convertDestinationProvider) { this.convertDestinationProvider = convertDestinationProvider; } //***********************************************// // LOAD // //***********************************************// private Path getConvertDestinationPath(Path sourceFilePath) throws Exception { // date time file name String suggestedFileName = new SimpleDateFormat("MM-dd-yyyy_HH-mm-ss").format(new java.util.Date()) + ".mzTree"; Path suggestedFilePath = sourceFilePath.resolveSibling(suggestedFileName); // if a ConvertDestinationProvider has been given, use it to determine the output path if (convertDestinationProvider == null) { return suggestedFilePath; } else { return convertDestinationProvider.getDestinationPath(suggestedFilePath); } } /** * * Loads an mzTree either by building from mzML or by reconnecting to an mzTree * @param filePath The path to the mzML or mzTree file * @param summarizationStrategy strategy to be used for summarization of points * @throws Exception */ public void load(String filePath, Strategy summarizationStrategy) throws Exception { // construct the summarizer corresponding to user's choice this.summarizer = SummarizationStrategyFactory.create(summarizationStrategy); // create ImportState and set source file location this.importState.reset(); importState.setSourceFilePath(filePath); try { long start = System.currentTimeMillis(); // csv load if (filePath.endsWith(".csv")) { this.csvLoad(filePath); } // mzml load else { // initialize mzmlParser MzmlParser mzmlParser = new MzmlParser(filePath); // if the user specified a memory-conservative load // and the partitioned load is necessary then perform a partitioned load if (this.partitionedLoadConfiguration(mzmlParser, Paths.get(filePath))) { this.partitionedLoad(mzmlParser); } // else perform standard, memory-apathetic load else { // initialize mzmlParser this.standardLoad(mzmlParser, filePath); } } importState.setImportStatus(ImportStatus.READY); LOGGER.log(Level.INFO, "Tree Build Real Time: " + (System.currentTimeMillis() - start)); } catch (DataFormatException | XMLStreamException ex) { // try as an mzTree file instead of XML+build in the // occurence of DataFormatException or XMLStreamException // initialize data storage on mzTree file this.initDataStorage(STORAGE_INTERFACE_CHOICE, filePath, null); // inform the importState of an .mzTree load this.importState.setImportStatus(ImportStatus.LOADING_MZTREE); // create mzTree from existing file // recursively build tree from root node this.head = dataStorage.loadRootNode(); this.recursiveTreeBuilder(this.head, 0); // inform importState that mzTree load has finished this.importState.setImportStatus(ImportStatus.READY); } } /** * Recursively builds the tree structure starting with the root node. * Retrieves and constructs the MzTreeNode specified by the * @param node Node from which to start recursive MzTree construction */ private void recursiveTreeBuilder(MzTreeNode node, int curDepth) throws Exception { // get all child nodes List<MzTreeNode> childNodes = dataStorage.loadChildNodes(node); // leaf nodes update tree height (results in largest height) if (childNodes.isEmpty()) this.treeHeight = (curDepth > this.treeHeight) ? (short) curDepth : this.treeHeight; // recurse on each child node if there are any for (MzTreeNode childNode : childNodes) { // recursive call at +1 depth this.recursiveTreeBuilder(childNode, curDepth + 1); // add reference to child and keep min/max mz/rt/int node.addChildGetBounds(childNode); } } /** * Performs a partitioned load of the data set, resulting in conservative memory consumption * @param mzmlParser input mzml file parser * @throws XMLStreamException * @throws DataFormatException * @throws IOException */ private void partitionedLoad(MzmlParser mzmlParser) throws XMLStreamException, DataFormatException, IOException { LOGGER.log(Level.INFO, "Partitioned load w/ " + this.branchingFactor + " partitions"); // signal to the import monitor that tree building has begun this.importState.setImportStatus(ImportStatus.CONVERTING); // init head node this.head = new MzTreeNode(this.branchingFactor); // iterate through each level 1 node, loading partition and // constructing separately for (int i = 0; i < this.branchingFactor; i++) { // current level 1 node MzTreeNode curL1Node = new MzTreeNode(this.branchingFactor); // load level 1 node's partition List<MsDataPoint> curPartition = mzmlParser.readPartition(); // recursively construct level 1 node this.divide(true, curPartition, curL1Node, 1); // add the level 1 node to the root node this.head.addChildGetBounds(curL1Node); LOGGER.log(Level.INFO, "Completed partition " + i); this.pointCache.clear(); } // root node summarization!!!!!! this.head.summarizeFromChildren(MzTree.NUM_POINTS_PER_NODE, this.summarizer, this.pointCache); // recursively save node information (only points are saved during construction) this.recursiveNodeSave(this.head, 0); try { // commit all entries this.dataStorage.flush(); } catch (Exception ex) { LOGGER.log(Level.WARNING, "Could not flush entries to storage", ex); } } /** * Performs a standard, memory-apathetic load * @param mzmlParser input file parser initialized w/ target file * @throws IOException * @throws XMLStreamException * @throws DataFormatException */ private void standardLoad(MzmlParser mzmlParser, String filePath) throws Exception { importState.setImportStatus(ImportStatus.PARSING); List<MsDataPoint> dataset = mzmlParser.readAllData(); this.buildTreeFromRoot(dataset, Paths.get(filePath)); } /** * Loads MS data in csv format (mz, rt, intensity, meta1) * @param filePath path to csv file * @throws FileNotFoundException * @throws IOException * @throws Exception */ private void csvLoad(String filePath) throws FileNotFoundException, IOException, Exception { this.importState.setImportStatus(ImportStatus.PARSING); ArrayList<MsDataPoint> points = new ArrayList<>(); // open csv reader on targetted csv file CSVReader reader = new CSVReader(new FileReader(filePath)); // first line might be a header String[] line = reader.readNext(); // if the first line is not a header if (line != null && StringUtils.isNumeric(line[0])) { // convert to msdatapoint, collect MsDataPoint point = this.csvRowToMsDataPoint(line); points.add(point); } // read the remaining lines (now guaranteed no header) while ((line = reader.readNext()) != null) { // convert to msdatapoint, collect MsDataPoint point = this.csvRowToMsDataPoint(line); points.add(point); } // build that tree! this.buildTreeFromRoot(points, Paths.get(filePath)); } /** * Constructs an MzTree from the dataset, starting at the root node (so no partitioned load) * @param dataset */ private void buildTreeFromRoot(List<MsDataPoint> dataset, Path sourceFilePath) throws Exception { LOGGER.log(Level.INFO, "Building MzTree from " + dataset.size() + " points"); this.initDataStorage(STORAGE_INTERFACE_CHOICE, getConvertDestinationPath(sourceFilePath).toString(), dataset.size()); // **************** STEP 1: CONFIGURE TREE **************** // number of leafNodes = globalNumPoints / hdBlockTupleCapacity int numLeafNodes = (int) Math.ceil((float) dataset.size() / (float) MzTree.NUM_POINTS_PER_NODE); this.treeHeight = MzTree.DEFAULT_TREE_HEIGHT; // branching factor = leafnodes ^ (1/treeDepth) this.branchingFactor = (int) Math.ceil(Math.pow(numLeafNodes, 1.0 / (double) this.treeHeight)); // init head node this.head = new MzTreeNode(this.branchingFactor); // inform importState of anticipated amount of work int numPointsToSave = dataset.size(); // save points: dataset.length this.importState.setTotalWork(numPointsToSave); // **************** STEP 2: BUILD **************** this.importState.setImportStatus(ImportStatus.CONVERTING); // divide the head node, do not sort at start (null), mzML data already sorted by RT this.divide(null, dataset, this.head, 0); // recursively save node information (only points are saved during construction) this.recursiveNodeSave(this.head, 0); try { // commit all entries this.dataStorage.flush(); } catch (Exception ex) { LOGGER.log(Level.WARNING, "Could not persist data to storage", ex); } } /** * Recursively divides the dataset into the mzTree, a depth first construction * starting with the head node. * @param sort_by_rt sorting flag, rt or mz * @param dataset The recursive call's data partition * @param head The recursive call's top level node * @param curHeight current height in three (root is 0) */ private void divide(Boolean sort_by_rt, List<MsDataPoint> dataset, MzTreeNode head, int curHeight) { // leaf flag boolean isLeaf = dataset.size() <= MzTree.NUM_POINTS_PER_NODE; // LEAF: save points, get mins/maxes if (isLeaf) { // leaf node submits its dataset to be written to data store try { this.dataStorage.savePoints(new StorageFacade.SavePointsTask(head, dataset), this.importState); this.pointCache.putAll(dataset); } catch (Exception e) { LOGGER.log(Level.WARNING, "Could not save points to datastorage for leaf node: " + head.toString(), e); } // collect point IDs, mz/rt/intensity min/max head.initLeaf(dataset); dataset = null; // garbage collect away } // ROOT/INTERMEDIATE: summarize, partition and recurse else { // if sort_by_rt is null then don't sort, implies initial partition // on mzml sourced data which is already sorted by RT if (sort_by_rt != null) { if (sort_by_rt) Collections.sort(dataset, Comparator.comparingDouble((MsDataPoint dataPoint) -> dataPoint.rt)); else Collections.sort(dataset, Comparator.comparingDouble((MsDataPoint dataPoint) -> dataPoint.mz)); } // the partition size is the subset length divided by the numChildrenPerNode int partitionSize = (int) Math.ceil((double) dataset.size() / (double) this.branchingFactor); // split the dataset into partitions List<List<MsDataPoint>> partitions = new ArrayList<>(); int i = 0; while (i < dataset.size()) { // populate partition final List<MsDataPoint> partition = dataset.subList(i, Math.min(i + partitionSize, dataset.size())); i += partition.size(); // collect partition partitions.add(partition); } // free dataset for GC dataset = null; // distribute the partitions to child nodes for (List<MsDataPoint> partition : partitions) { // instantiate child node MzTreeNode child = new MzTreeNode(this.branchingFactor); // resolve sort_by_rt Boolean my_sort_by_rt = sort_by_rt; // if null (initial call mzML) set to true if (my_sort_by_rt == null) my_sort_by_rt = true; // recursively divide child node (depth first) this.divide(!my_sort_by_rt, partition, child, curHeight + 1); // collect child node head.addChildGetBounds(child); } // collect summary of points from child nodes (additionally saves pointIDs) head.summarizeFromChildren(MzTree.NUM_POINTS_PER_NODE, this.summarizer, this.pointCache); } // END ROOT/INTERMEDIATE NODE } /** * Upon the user selecting a memory-conservative load, configures the tree for * a partitioned load according to available memory. * - A minimum branching factor must be reached by the partitioned configuration, * a branching factor smaller than the minimum creates a very tall tree. * Increasing the branching factor decreases partition size, * thus not endangering memory consumption. * - If the partitioned configuration returns a branching factor of 1 * then the entire dataset will fit into memory. Revert to default configuration. * @param mzmlParser input mzml file parser * @return false if partitioned load is unnecessary (entire dataset will fit in RAM), otherwise true * @throws XMLStreamException * @throws FileNotFoundException * @throws IOException * @throws DataFormatException */ private boolean partitionedLoadConfiguration(MzmlParser mzmlParser, Path sourceFilePath) throws Exception { this.importState.setImportStatus(ImportStatus.PARSING); // count the number of points in the mzML file int numPoints = mzmlParser.countPoints(); // number of available bytes in java heap long numBytesInHeap = Runtime.getRuntime().maxMemory(); // max allowed points to hold in memory at a time // = (heap size * FRACTION) / bytes per point int maxPointsInRam = (int) Math .floor((numBytesInHeap * MzTree.HEAP_FRACTION) / (float) MsDataPoint.MEM_NUM_BYTES_PER_POINT); // number of leafNodes = globalNumPoints / hdBlockTupleCapacity int numLeafNodes = (int) Math.ceil((float) numPoints / (float) MzTree.NUM_POINTS_PER_NODE); // branching factor determined by the max number of points allowed in RAM // if branchingFactor != 1 each level 1 subtree will be processed one at a time this.branchingFactor = (short) Math.ceil((float) numPoints / (float) maxPointsInRam); // branchingFactor != 1 implies the entire dataset won't fit into the heap, // branchingFactor determines treeHeight if (this.branchingFactor != 1) { // ensure we use at least the minimum branching factor this.branchingFactor = Math.max(this.branchingFactor, MzTree.MINIMUM_BRANCHING_FACTOR); // treeHeight = log_branchingFactor(numLeafNodes) // cool logarithmic identity: logb(n) = log(n) / log(b) this.treeHeight = (short) (Math.ceil(Math.log(numLeafNodes) / Math.log(this.branchingFactor))); // if branchingFactor unchanged by max call partitionSize == maxPointsInRam // else partitionSize < maxPointsInRam -> SAFE int partitionSize = (int) Math.floor((float) numPoints / (float) this.branchingFactor); // prepare parser for partitioned read // recalculate partition size mzmlParser.initPartitionedRead(partitionSize); this.initDataStorage(STORAGE_INTERFACE_CHOICE, getConvertDestinationPath(sourceFilePath).toString(), numPoints); // inform importState of the amount of work to do this.importState.setTotalWork(numPoints); return true; } // else branching factor is 1, implying the entire dataset can fit into heap // return false to signal a regular load should ensue else return false; } /** * Recursively saves an mzTree starting at curNode * @param curNode node to recursively save * @param parentNodeID ID of the node's parent (0 if no parent) */ private void recursiveNodeSave(MzTreeNode curNode, int parentNodeID) { try { // save node to db curNode.nodeID = this.dataStorage.saveNode(curNode, parentNodeID); // save node points to db this.dataStorage.saveNodePoints(curNode, this.importState); // recurse on chilren for (MzTreeNode childNode : curNode.children) this.recursiveNodeSave(childNode, curNode.nodeID); } catch (Exception e) { LOGGER.log(Level.WARNING, "Could not save node", e); } } public ImportState getImportState() { return importState; } public ImportState.ImportStatus getLoadStatus() { return importState.getImportStatus(); } public String getLoadStatusString() { return importState.getStatusString(); } /** * inits the data storage module * @param storageChoice Storage interface selection * @param filePath (optional) location to create storage file * @param numPoints (Hybrid only) number of points that will be saved in file * @throws Exception */ private void initDataStorage(StorageFacadeFactory.Facades storageChoice, String filePath, Integer numPoints) throws Exception { // init data storage module this.dataStorage = StorageFacadeFactory.create(storageChoice); this.dataStorage.init(filePath, numPoints); this.pointCache = new PointCache(this.dataStorage); this.importState.setMzTreeFilePath(this.dataStorage.getFilePath()); } //***********************************************// // QUERY // //***********************************************// /** * Queries the MzTree for points contained with the mz, rt bounds * * @param mzMin query mz lower bound * @param mzMax query mz upper bound * @param rtMin query rt lower bound * @param rtMax query rt upper bound * @param numPoints number of points to be returned; 0 to return all points possible from the leaf depth and not use the cache * @return 2-dimensional double array */ public List<MsDataPoint> query(double mzMin, double mzMax, float rtMin, float rtMax, int numPoints) { boolean useSummary = (numPoints > 0); // if zero passed for any query bound use global min/max mzMin = (mzMin == 0) ? this.head.mzMin : mzMin; mzMax = (mzMax == 0) ? this.head.mzMax : mzMax; rtMin = (rtMin == 0) ? this.head.rtMin : rtMin; rtMax = (rtMax == 0) ? this.head.rtMax : rtMax; // current level in tree int curLevel = 0; // all nodes in current level of tree within the query bounds ArrayList<MzTreeNode> curLevelNodesInBounds = new ArrayList<>(); // IDs the points in the current level that are within the query bounds ArrayList<MsDataPoint> curLevelPointsInBounds = new ArrayList<>(); // follow down the tree all nodes within the query bounds // base case: curLevel is the leaf level while (curLevel != this.treeHeight + 1) { // populates curLevelNodesInBounds with the children of the current curLevelNodesInBounds // that are within the query's bounds curLevelNodesInBounds = this.collectNextLevelNodesInBounds(curLevelNodesInBounds, mzMin, mzMax, rtMin, rtMax); curLevel++; if (useSummary) { // find candidate points at current level curLevelPointsInBounds = this.collectPointsWithinBounds(curLevelNodesInBounds, mzMin, mzMax, rtMin, rtMax); // stop going down the tree early if enough points are found if (curLevelPointsInBounds.size() >= numPoints) { break; } } } if (useSummary) { // when using summary, the points have been collected and need to be summarized if (curLevelPointsInBounds.size() <= numPoints) { // return all points if there are not enough to summarize return curLevelPointsInBounds; } else { // return points sampled down using a summary return this.summarizer.summarize(curLevelPointsInBounds, numPoints); } } else { // when not using summary, the points must be loaded from the leaf level try { // populate each node's pointID array for (MzTreeNode node : curLevelNodesInBounds) ensurePointIDs(node); // use the leaf-node optimized query return this.dataStorage.loadLeavesPointsInBounds(curLevelNodesInBounds, mzMin, mzMax, rtMin, rtMax); } catch (Exception e) { LOGGER.log(Level.WARNING, "Failed to load points from the leaf level", e); return null; } } } /** * Collects all child nodes of all nodes in curLevelNodesInBounds that overlap * with the bounds of the query * @param curLevelNodesInBounds nodes in the current level that overlap with the query bounds * @param mzMin mz lower bound * @param mzMax mz upper bound * @param rtMin rt lower bound * @param rtMax rt upper bound * @return ArrayList containing nodes of the next level that overlap with the query bounds */ private ArrayList<MzTreeNode> collectNextLevelNodesInBounds(ArrayList<MzTreeNode> curLevelNodesInBounds, double mzMin, double mzMax, float rtMin, float rtMax) { // nodes in the next level that overlap the bounds of the query ArrayList<MzTreeNode> nextLevelNodesInBounds = new ArrayList<>(); // base case: curLevelNodesInBounds is empty // return ArrayList with the head node of the tree (guaranteed to overlap bounds) if (curLevelNodesInBounds.isEmpty()) { nextLevelNodesInBounds.add(this.head); return nextLevelNodesInBounds; } // iterate through all current level nodes for (MzTreeNode curLevelNode : curLevelNodesInBounds) { // if there are children check each child for overlapping bounds if (!curLevelNode.children.isEmpty()) { // iterate through current level node's children for (MzTreeNode nextLevelNode : curLevelNode.children) { // collecting if overlaps with the bounds of the query if (this.doesOverlap(nextLevelNode, mzMin, mzMax, rtMin, rtMax)) nextLevelNodesInBounds.add(nextLevelNode); } } // else this is a leaf node not at the expected leaf level, // include in next level else nextLevelNodesInBounds.add(curLevelNode); } return nextLevelNodesInBounds; } /** * Collects the points within a collection of MzTreeNodes that fall within * the given mz/rt bounds * @param nodes nodes whose points are to be checked against bounds of query * @param mzMin mz lower bound * @param mzMax mz upper bound * @param rtMin rt lower bound * @param rtMax rt upper bound * @return List of MsDataPoints belonging to the given nodes that are within the given bounds */ private ArrayList<MsDataPoint> collectPointsWithinBounds(ArrayList<MzTreeNode> nodes, double mzMin, double mzMax, float rtMin, float rtMax) { // collect all point IDs from all nodes ArrayList<Integer> allNodesPointIDs = new ArrayList<>(); for (MzTreeNode node : nodes) { ensurePointIDs(node); allNodesPointIDs.addAll(node.pointIDs); } // retrieve all points from pointCache ArrayList<MsDataPoint> allNodesPoints = this.pointCache.retrievePoints(allNodesPointIDs); // arraylist for collecting points that fall within bounds ArrayList<MsDataPoint> pointsWithinBounds = new ArrayList<>(); // iterate through all nodes, checking to see if they fall within the bounds for (MsDataPoint pointToCheck : allNodesPoints) { // if in bounds then collect if (pointToCheck.isInBounds(mzMin, mzMax, rtMin, rtMax)) pointsWithinBounds.add(pointToCheck); } return pointsWithinBounds; } private void ensurePointIDs(MzTreeNode node) { if (node.pointIDs == null) { // node.pointIDs is lazy loaded on first access, not on file open try { node.pointIDs = this.dataStorage.getNodePointIDs(node.nodeID); } catch (Exception e) { LOGGER.log(Level.WARNING, "Failed to retrieve points for node. Future query results may be incomplete.", e); } } } /** * Checks if the search bounds overlap with a node's data bounds * @param node node to check * @param mzMin mz lower bound * @param mzMax mz upper bound * @param rtMin rt lower bound * @param rtMax rt upper bound * @return True if node's bounds overlap search bounds, false otherwise */ private boolean doesOverlap(MzTreeNode node, double mzMin, double mzMax, float rtMin, float rtMax) { return // bounds overlap in mz (node.mzMin <= mzMax && node.mzMax >= mzMin) && // bounds overlap in rt (node.rtMin <= rtMax && node.rtMax >= rtMin); } //***********************************************// // CSV EXPORT // //***********************************************// public void saveAs(Path targetFilepath) throws Exception { // source file path Path sourceFilepath = Paths.get(this.dataStorage.getFilePath()); try { // close current storage connection this.dataStorage.close(); // copy current output location to new output location this.dataStorage.copy(targetFilepath); // init connection to new database this.dataStorage.init(targetFilepath.toString(), null); } catch (Exception e) { LOGGER.log(Level.WARNING, "Could not create copy at " + targetFilepath.toString(), e); try { // revert back to previous connection this.dataStorage.init(sourceFilepath.toString(), null); } catch (Exception ex) { LOGGER.log(Level.WARNING, "After failed copy, could not revert back to " + sourceFilepath.toString(), ex); } throw e; } } /** * Exports the given data range into a csv at filepath * @param filepath out location * @param minMZ lower mz bound * @param maxMZ upper mz bound * @param minRT lower rt bound * @param maxRT upper rt bound * @throws java.io.IOException */ public int export(String filepath, double minMZ, double maxMZ, float minRT, float maxRT) throws IOException { //append csv extension if not already there if (!filepath.endsWith(".csv")) filepath = filepath + ".csv"; try (CSVWriter writer = new CSVWriter(new FileWriter(filepath))) { writer.writeNext(new String[] { "m/z", "RT", "intensity", "meta1" }); // get the points of the data range // THIS IS WHERE THE OPTIMIZATION PROBLEM STARTS // currently loads all pertinent points into memory (could be the whole file) List<MsDataPoint> points = this.query(minMZ, maxMZ, minRT, maxRT, 0); // write away! for (MsDataPoint p : points) writer.writeNext(new String[] { Double.toString(p.mz), Float.toString(p.rt), Double.toString(p.intensity), Integer.toString(p.meta1) }); return points.size(); } } /** * If import status is ready, returns mz x rt bounds in order: mzmin, mzmax, rtmin, rtmax * Else returns null * @return mz x rt bounds (mzmin, mzmax, rtmin, rtmax) or null */ public double[] getDataBounds() { // if not ready, then cannot access data bounds if (importState.getImportStatus() != ImportStatus.READY) return null; else return new double[] { this.head.mzMin, this.head.mzMax, this.head.rtMin, this.head.rtMax }; } //***********************************************// // HELPERS // //***********************************************// /** * Converts a csv row MsDataPoint to MsDataPoint object * @param line * @return */ private MsDataPoint csvRowToMsDataPoint(String[] line) { double mz = Double.parseDouble(line[0]); float rt = Float.parseFloat(line[1]); double intensity = Double.parseDouble(line[2]); int meta1 = Integer.parseInt(line[3]); MsDataPoint point = new MsDataPoint(0, mz, rt, intensity); point.meta1 = meta1; return point; } public void close() { if (this.dataStorage != null) { this.dataStorage.close(); dataStorage = null; } } }