Java tutorial
/* * Copyright 2013 Bazaarvoice, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bazaarvoice.jsonpps; import com.fasterxml.jackson.core.JsonEncoding; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import net.sourceforge.argparse4j.ArgumentParsers; import net.sourceforge.argparse4j.impl.Arguments; import net.sourceforge.argparse4j.inf.ArgumentParser; import net.sourceforge.argparse4j.inf.ArgumentParserException; import net.sourceforge.argparse4j.inf.Namespace; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.UUID; import static com.fasterxml.jackson.core.JsonTokenId.ID_FIELD_NAME; import static com.fasterxml.jackson.core.JsonTokenId.ID_START_ARRAY; import static com.fasterxml.jackson.core.JsonTokenId.ID_START_OBJECT; public class PrettyPrintJson { private static final File STDINOUT = new File("-"); private int flatten; private boolean sortKeys; private boolean strict; private boolean wrap; private InputStream stdin = System.in; private OutputStream stdout = System.out; public static void main(String[] args) throws Exception { try { ArgumentParser parser = ArgumentParsers.newArgumentParser("jsonpps") .description("A streaming JSON pretty printer that can format multi-GB input files.") .defaultHelp(true); parser.addArgument("-o", "--out").type(Arguments.fileType()).setDefault(STDINOUT).help("output file"); parser.addArgument("--flatten").metavar("N").type(Integer.class).setDefault(0) .help("flatten the top-N levels of object/array structure"); parser.addArgument("-i", "--in-place").action(Arguments.storeTrue()) .help("modify the original file(s)"); parser.addArgument("-S", "--sort-keys").action(Arguments.storeTrue()).help( "emit objects with keys in sorted order. this increases memory requirements since objects must be buffered in memory."); parser.addArgument("--strict").action(Arguments.storeTrue()).help("reject non-conforming json"); parser.addArgument("--wrap").action(Arguments.storeTrue()).help("wrap all output in a json array"); parser.addArgument("--unwrap").action(Arguments.storeTrue()) .help("flatten the top level of object/array structure"); parser.addArgument("in").nargs("*") .type(Arguments.fileType().acceptSystemIn().verifyExists().verifyIsFile().verifyCanRead()) .setDefault(new File[] { STDINOUT }).help("input file(s)"); Namespace ns; try { ns = parser.parseArgs(args); } catch (ArgumentParserException e) { parser.handleError(e); System.exit(2); return; } PrettyPrintJson jsonpp = new PrettyPrintJson(); File outputFile = ns.get("out"); List<File> inputFiles = ns.getList("in"); boolean inPlace = ns.getBoolean("in_place"); jsonpp.setFlatten(ns.getInt("flatten")); jsonpp.setSortKeys(ns.getBoolean("sort_keys")); jsonpp.setStrict(ns.getBoolean("strict")); jsonpp.setWrap(ns.getBoolean("wrap")); if (ns.getBoolean("unwrap")) { jsonpp.setFlatten(1); } if (!inPlace) { // Pretty print all input files to a single output jsonpp.prettyPrint(inputFiles, outputFile); } else { // Pretty print all input files back to themselves. if (outputFile != STDINOUT) { // use "!=" not "!.equals()" since default is ok but "-o -" is not. System.err.println("error: -o and --in-place are mutually exclusive"); System.exit(2); } if (inputFiles.isEmpty()) { System.err.println("error: --in-place requires at least one input file"); System.exit(2); } if (inputFiles.contains(STDINOUT)) { System.err.println("error: --in-place cannot operate on stdin"); System.exit(2); } for (File inputFile : inputFiles) { jsonpp.prettyPrint(inputFile, inputFile); } } } catch (Throwable t) { t.printStackTrace(); System.err.println(t.toString()); System.exit(1); } } public void setFlatten(int flatten) { this.flatten = flatten; } public void setSortKeys(boolean sortKeys) { this.sortKeys = sortKeys; } public void setStrict(boolean strict) { this.strict = strict; } public void setWrap(boolean wrap) { this.wrap = wrap; } public void setStdin(InputStream stdin) { this.stdin = stdin; } public void setStdout(OutputStream stdout) { this.stdout = stdout; } public void prettyPrint(File inputFile, File outputFile) throws IOException { prettyPrint(Collections.singletonList(inputFile), outputFile); } public void prettyPrint(List<File> inputFiles, File outputFile) throws IOException { JsonFactory factory = new JsonFactory(); factory.disable(JsonFactory.Feature.INTERN_FIELD_NAMES); if (!strict) { factory.enable(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); factory.enable(JsonParser.Feature.ALLOW_COMMENTS); factory.enable(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS); factory.enable(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS); factory.enable(JsonParser.Feature.ALLOW_SINGLE_QUOTES); factory.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS); factory.enable(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES); } ObjectMapper mapper = null; if (sortKeys) { mapper = new ObjectMapper(factory); mapper.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS); mapper.disable(SerializationFeature.FLUSH_AFTER_WRITE_VALUE); } // Open the output stream and create the Json emitter. JsonGenerator generator; File tempOutputFile = null; if (STDINOUT.equals(outputFile)) { generator = factory.createGenerator(stdout, JsonEncoding.UTF8); } else if (!caseInsensitiveContains(inputFiles, outputFile)) { generator = factory.createGenerator(outputFile, JsonEncoding.UTF8); } else { // Writing to an input file.. use a temp file to stage the output until we're done. tempOutputFile = getTemporaryFileFor(outputFile); generator = factory.createGenerator(tempOutputFile, JsonEncoding.UTF8); } try { // Separate top-level objects by a newline in the output. String newline = System.getProperty("line.separator"); generator.setPrettyPrinter(new DefaultPrettyPrinter(newline)); if (wrap) { generator.writeStartArray(); } for (File inputFile : inputFiles) { JsonParser parser; if (STDINOUT.equals(inputFile)) { parser = factory.createParser(stdin); } else { parser = factory.createParser(inputFile); } try { while (parser.nextToken() != null) { copyCurrentStructure(parser, mapper, 0, generator); } } finally { parser.close(); } } if (wrap) { generator.writeEndArray(); } generator.writeRaw(newline); } finally { generator.close(); } if (tempOutputFile != null && !tempOutputFile.renameTo(outputFile)) { System.err.println("error: unable to rename temporary file to output: " + outputFile); System.exit(1); } } private void copyCurrentStructure(JsonParser parser, ObjectMapper mapper, int depth, JsonGenerator generator) throws IOException { // Avoid using the mapper to parse the entire input until we absolutely must. This allows pretty // printing huge top-level arrays (that wouldn't fit in memory) containing smaller objects (that // individually do fit in memory) where the objects are printed with sorted keys. JsonToken t = parser.getCurrentToken(); if (t == null) { generator.copyCurrentStructure(parser); // Will report the error of a null token. return; } int id = t.id(); if (id == ID_FIELD_NAME) { if (depth > flatten) { generator.writeFieldName(parser.getCurrentName()); } t = parser.nextToken(); id = t.id(); } switch (id) { case ID_START_OBJECT: if (sortKeys && depth >= flatten) { // Load the entire object in memory so we can sort its keys and serialize it back out. mapper.writeValue(generator, parser.readValueAs(Map.class)); } else { // Don't load the whole object into memory. Copy it in a memory-efficient streaming fashion. if (depth >= flatten) { generator.writeStartObject(); } while (parser.nextToken() != JsonToken.END_OBJECT) { copyCurrentStructure(parser, mapper, depth + 1, generator); } if (depth >= flatten) { generator.writeEndObject(); } } break; case ID_START_ARRAY: // Don't load the whole array into memory. Copy it in a memory-efficient streaming fashion. if (depth >= flatten) { generator.writeStartArray(); } while (parser.nextToken() != JsonToken.END_ARRAY) { copyCurrentStructure(parser, mapper, depth + 1, generator); } if (depth >= flatten) { generator.writeEndArray(); } break; default: generator.copyCurrentEvent(parser); break; } } private boolean caseInsensitiveContains(Collection<File> srcs, File dest) throws IOException { for (File src : srcs) { if (!STDINOUT.equals(src) && src.getCanonicalPath().equalsIgnoreCase(dest.getCanonicalPath())) { return true; } } return false; } private File getTemporaryFileFor(File file) { // The temporary file must exist in the same directory as the destination file so we can // reliably rename it at the end w/o copying across volumes. Use a secure random UUID to // name the file since there is mathematically no realistic chance of collisions. String randomSuffix = UUID.randomUUID().toString().replace("-", ""); File tempFile = new File(file.getParentFile(), "_" + file.getName() + "." + randomSuffix); tempFile.deleteOnExit(); return tempFile; } }