Java tutorial
/* * Copyright (C) 2012 SeqWare * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.github.seqware.queryengine.system.exporters; import com.github.seqware.queryengine.Constants; import com.github.seqware.queryengine.factory.SWQEFactory; import com.github.seqware.queryengine.model.Feature; import com.github.seqware.queryengine.model.FeatureSet; import com.github.seqware.queryengine.model.Tag; import com.github.seqware.queryengine.system.Utility; import com.github.seqware.queryengine.system.importers.workers.ImportConstants; import com.github.seqware.queryengine.system.importers.workers.VCFVariantImportWorker; import com.github.seqware.queryengine.util.SGID; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; /** * This will dump JSON files compatible with ElasticSearch given a FeatureSet that was originally imported from * a VCF file. * * @author dyuen * @version $Id: $Id */ public class JSONDumper { /** Constant <code>VCF="VCFVariantImportWorker.VCF"</code> */ public static final String VCF = Constants.TRACK_TAGSET ? VCFVariantImportWorker.VCF : null; private String[] args; /** * <p>main.</p> * * @param args an array of {@link java.lang.String} objects. */ public static void main(String[] args) { JSONDumper dumper = new JSONDumper(args); dumper.export(); } /** * <p>export.</p> */ public void export() { if (args.length < 1 || args.length > 2) { System.err.println(args.length + " arguments found"); System.out.println("JSONDumper <featureSetID> [outputFile]"); System.exit(-1); } // parse a SGID from a String representation, we need a more elegant solution here String featureSetID = args[0]; SGID sgid = Utility.parseSGID(featureSetID); FeatureSet fSet = SWQEFactory.getQueryInterface().getLatestAtomBySGID(sgid, FeatureSet.class); // if this featureSet does not exist if (fSet == null) { System.out.println("featureSet ID not found"); System.exit(-2); } dumpVCFFromFeatureSetID(fSet, (args.length == 2 ? args[1] : null)); } /** * <p>Constructor for VCFDumper.</p> * * @param args an array of {@link java.lang.String} objects. */ public JSONDumper(String[] args) { this.args = args; } /** * <p>outputFeatureInVCF.</p> * * @param buffer a {@link java.lang.StringBuffer} object. * @param feature a {@link com.github.seqware.queryengine.model.Feature} object. * @return a boolean. */ public static boolean outputFeatureInVCF(StringBuilder buffer, Feature feature, FeatureSet set) { boolean caughtNonVCF = false; Gson gson = new GsonBuilder().create(); Map<String, Map<String, Object>> map = new HashMap<String, Map<String, Object>>(); Map<String, Object> innerMap = new HashMap<String, Object>(); innerMap.put("_index", "queryengine"); innerMap.put("_type", "features"); innerMap.put("_id", feature.getSGID().getRowKey()); map.put("index", innerMap); buffer.append(gson.toJson(map)); buffer.append("\n"); Gson gson2 = new GsonBuilder().create(); innerMap.clear(); innerMap.put("id", feature.getSGID().getRowKey()); String title = "chr" + feature.getSeqid() + ":" + feature.getStart() + "-" + feature.getStop() + ":" + feature.getTagByKey(VCF, ImportConstants.VCF_REFERENCE_BASE).getValue().toString() + "->" + feature.getTagByKey(VCF, ImportConstants.VCF_CALLED_BASE).getValue().toString(); innerMap.put("title", title); String[] interestingTags = { "isCompleteGenomics", "isDbSNP", "isDGV", "isEvoFold", "isGenomicsSegmentalDups", "isGERP", "isGWASCatalog", "isLRT", "isMutationTaster", "isNHLBI", "isPhastConsElements", "isPhyloPConservationScore", "isPolyPhen", "isTargetScanS", "isTfbsConsSite" }; // create clean tags Map<String, String> cleanTags = new HashMap<String, String>(); for (String tag : interestingTags) { // take off the "is" String cleanName = tag.substring(2); cleanTags.put(tag, cleanName); } List<String> databases = new ArrayList<String>(); for (String tag : interestingTags) { Tag tagByKey = feature.getTagByKey(VCF, tag); if (tagByKey != null) { databases.add(cleanTags.get(tag)); } } innerMap.put("databases", databases); // we'll take consequence type from SNPEFF_EFFECT List<String> consequences = new ArrayList<String>(); if (feature.getTagByKey(VCF, "SNPEFF_EFFECT") != null) { String value = feature.getTagByKey(VCF, "SNPEFF_EFFECT").getValue().toString(); // clean up value value = value.toLowerCase(); consequences.add(value); } if (consequences.isEmpty()) { consequences.add("none"); } innerMap.put("consequences", consequences); innerMap.put("feature_set", set.getSGID().getRowKey().replaceAll("-", "")); innerMap.put("variant_type", feature.getTagByKey(VCF, "IndelType") != null ? "INDEL" : "SNV"); buffer.append(gson2.toJson(innerMap)); return caughtNonVCF; } /** * <p>dumpVCFFromFeatureSetID.</p> * * @param fSet a {@link com.github.seqware.queryengine.model.FeatureSet} object. * @param file a {@link java.lang.String} object. */ public static void dumpVCFFromFeatureSetID(FeatureSet fSet, String file) { BufferedWriter outputStream = null; try { if (file != null) { outputStream = new BufferedWriter(new FileWriter(file)); } else { outputStream = new BufferedWriter(new OutputStreamWriter(System.out)); } } catch (IOException e) { Logger.getLogger(JSONDumper.class.getName()).fatal("Exception thrown starting export to file:", e); System.exit(-1); } // fall-through if plugin-fails try { for (Feature feature : fSet) { StringBuilder buffer = new StringBuilder(); boolean caught = outputFeatureInVCF(buffer, feature, fSet); outputStream.append(buffer); outputStream.newLine(); } outputStream.flush(); } catch (IOException e) { Logger.getLogger(JSONDumper.class.getName()).fatal("Exception thrown exporting to file:", e); System.exit(-1); } finally { IOUtils.closeQuietly(outputStream); } } }