Java tutorial
/** * Copyright 2014 CMBI (contact: <Coos.Baakman@radboudumc.nl>) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package nl.ru.cmbi.vase.parse; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import nl.ru.cmbi.vase.data.TableData; import nl.ru.cmbi.vase.data.VASEDataObject; import nl.ru.cmbi.vase.data.TableData.ColumnInfo; import nl.ru.cmbi.vase.data.VASEDataObject.PlotDescription; import nl.ru.cmbi.vase.data.stockholm.Alignment; import org.apache.commons.io.IOUtils; import org.dom4j.CDATA; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentFactory; import org.dom4j.Element; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class VASEXMLParser { static Logger log = LoggerFactory.getLogger(VASEXMLParser.class); private static void table2xml(TableData tableData, Element root) { Element table = root.addElement("data_table"); for (ColumnInfo ci : tableData.getColumnInfos()) { Element column = table.addElement("column"); column.addAttribute("id", ci.getId()); if (!ci.getTitle().isEmpty()) column.addAttribute("title", ci.getTitle()); column.addAttribute("hidden", new Boolean(ci.isHidden()).toString()); column.addAttribute("mouseover", new Boolean(ci.isMouseOver()).toString()); } for (int i = 0; i < tableData.getNumberOfRows(); i++) { Element row = table.addElement("row"); Map<String, Object> rowData = tableData.getRowValues(i); for (ColumnInfo ci : tableData.getColumnInfos()) { Element value = row.addElement("value"); value.addText(rowData.get(ci.getId()).toString()); } } } private static TableData parseTable(Element root, int alignmentLength) throws Exception { Element table = root.element("data_table"); if (table == null) { throw new Exception("missting table element"); } List<ColumnInfo> columns = new ArrayList<ColumnInfo>(); for (Element column : (List<Element>) table.elements("column")) { if (column.attribute("id") == null) { throw new Exception("there\'s a column with no id"); } ColumnInfo ci = new ColumnInfo(); ci.setId(column.attribute("id").getValue()); if (column.attribute("title") != null) { ci.setTitle(column.attribute("title").getValue()); } if (column.attribute("hidden") != null) { ci.setHidden(Boolean.parseBoolean(column.attribute("hidden").getValue())); } if (column.attribute("mouseover") != null) { ci.setMouseOver(Boolean.parseBoolean(column.attribute("mouseover").getValue())); } columns.add(ci); } TableData tableData = new TableData(columns); int rowIndex = 0; for (Element row : (List<Element>) table.elements("row")) { List<Element> values = row.elements("value"); if (columns.size() != values.size()) { throw new Exception("the number of value tags must always match the number of columns"); } for (int i = 0; i < values.size(); i++) { tableData.setValue(columns.get(i).getId(), rowIndex, values.get(i).getText()); } rowIndex++; } // Do some checks on the contents: ColumnInfo columnResidueNumber = tableData.getColumnByID(TableData.residueNumberID), columnPDBResidue = tableData.getColumnByID(TableData.pdbResidueID); if (columnResidueNumber == null) { throw new Exception("missing column: " + TableData.residueNumberID); } else if (!tableData.columnIsNumber(TableData.residueNumberID)) { throw new Exception("not numerical: " + TableData.residueNumberID); } List<Object> residueNumberDuplicates = tableData.listDuplicateValues(TableData.residueNumberID); if (residueNumberDuplicates.size() > 0) { throw new Exception("column " + TableData.residueNumberID + " contains duplicates: " + residueNumberDuplicates.toString()); } // The column must contain all the residue positions in the alignment: List<Integer> missingPositions = new ArrayList<Integer>(); for (int i = 1; i <= alignmentLength; i++) { if (!tableData.columnHasValue(TableData.residueNumberID, new Integer(i))) missingPositions.add(i); } if (missingPositions.size() > 0) { throw new Exception( "column " + TableData.pdbResidueID + " has missing values: " + missingPositions.toString()); } if (columnPDBResidue == null) { throw new Exception("missing column: " + TableData.pdbResidueID); } return tableData; } public static void write(VASEDataObject data, OutputStream xmlOut) throws IOException { DocumentFactory df = DocumentFactory.getInstance(); Document doc = df.createDocument(); Element root = doc.addElement("xml"); if (data.getTitle() != null) { Element title = root.addElement("title"); title.setText(data.getTitle()); } Element fasta = root.addElement("fasta"); ByteArrayOutputStream fastaStream = new ByteArrayOutputStream(); FastaParser.toFasta(data.getAlignment().getMap(), fastaStream); fasta.add(df.createCDATA(new String(fastaStream.toByteArray(), StandardCharsets.UTF_8))); Element pdb = root.addElement("pdb"); pdb.addAttribute("pdbid", data.getPdbID()); table2xml(data.getTable(), root); if (data.getPlots().size() > 0) { Element plots = root.addElement("plots"); for (PlotDescription pd : data.getPlots()) { Element plot = plots.addElement("plot"); plot.addElement("x").setText(pd.getXAxisColumnID()); plot.addElement("y").setText(pd.getYAxisColumnID()); plot.addAttribute("title", pd.getPlotTitle()); } } XMLWriter writer = new XMLWriter(xmlOut); writer.write(doc); writer.close(); } public static VASEDataObject parse(InputStream xmlIn) throws Exception { Document document = (new SAXReader()).read(xmlIn); Element root = document.getRootElement(); Element fasta = root.element("fasta"); if (fasta == null) { throw new Exception("no fasta tag"); } Map<String, String> fastaMap = FastaParser.parseFasta(IOUtils.toInputStream(fasta.getText(), "UTF-8")); Alignment alignment = new Alignment(fastaMap); Element pdb = root.element("pdb"); if (pdb == null) { throw new Exception("no pdb tag"); } TableData tableData = parseTable(root, alignment.countColumns()); VASEDataObject data = new VASEDataObject(alignment, tableData, pdb.attributeValue("pdbid")); Element title = root.element("title"); if (title != null) { data.setTitle(title.getText()); } for (Element sequenceUrl : (List<Element>) root.elements("sequence-url")) { URL url = new URL(sequenceUrl.getText()); if (sequenceUrl.attribute("id") == null) { throw new Exception("no id given for " + url.toString()); } String id = sequenceUrl.attribute("id").getValue(); if (!fastaMap.containsKey(id)) { throw new Exception("no sequence with id " + id + " in fasta"); } data.getSequenceReferenceURLs().put(id, url); } Element plots = root.element("plots"); if (plots != null) { for (Element plot : (List<Element>) plots.elements("plot")) { PlotDescription d = new PlotDescription(); if (plot.attribute("title") == null) { throw new Exception("evry plot must have a title"); } d.setPlotTitle(plot.attribute("title").getValue()); Element x = plot.element("x"), y = plot.element("y"); if (x == null || y == null) { throw new Exception("every plot must have a column specified for x and y"); } for (Element axis : (new Element[] { x, y })) { ColumnInfo column = tableData.getColumnByID(axis.getText()); if (column == null) { throw new Exception( "There\'s no column with id " + axis.getText() + " (specified in plot)"); } else if (!tableData.columnIsNumber(column.getId())) { throw new Exception("Column with id " + axis.getText() + " cannot be used in plot, since it\'s not numerical"); } } d.setXAxisColumnID(x.getText()); d.setYAxisColumnID(y.getText()); data.getPlots().add(d); } } return data; } }