Java tutorial
// // Copyright 2009 Robin Komiwes, Bruno Verachten, Christophe Cordenier // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /* * $Id: DocumentToXHTML.java,v 1.6 2002/08/09 14:28:44 bveracht Exp $ * $Log: DocumentToXHTML.java,v $ * Revision 1.6 2002/08/09 14:28:44 bveracht * Added a XML header for the clean.xhtml file. * This way, the bug disappears. * * Revision 1.5 2002/04/11 15:07:41 bveracht * Clean up * */ package com.wooki.services.parsers; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; import org.apache.log4j.Logger; import org.springframework.core.io.Resource; import org.w3c.tidy.Configuration; import org.w3c.tidy.Tidy; /** * This class cleans an HTML file to produce XHTML */ public class DocumentToXHTML implements Convertor { private boolean xmlOut; private Logger logger = Logger.getLogger(DocumentToXHTML.class); public InputStream performTransformation(Resource xmlDocument) { BufferedInputStream in; BufferedOutputStream out; ByteArrayOutputStream result; Tidy tidy = new Tidy(); java.util.Properties props = new java.util.Properties(); props.setProperty("new-inline-tags", "page-break,page-number,page-numbers,wooki,xsl:value-of,xsl:for-each,quote"); props.setProperty("new-blocklevel-tags", "for,page-header,page-footer,xsl:value-of,xsl:for-each"); props.setProperty("new-empty-tags", "page-break,page-number,page-numbers,xsl:value-of"); // props.setProperty("new-pre-tags", "for,header,footer"); props.setProperty("new-pre-tags", "wooki"); tidy.setConfigurationFromProps(props); // tidy.setDocType("omit"); tidy.setXmlOut(xmlOut); tidy.setXHTML(true); tidy.setEmacs(true); tidy.setErrfile("tidyErrors.txt"); tidy.setFixBackslash(true); tidy.setNumEntities(true); tidy.setQuoteNbsp(false); tidy.setCharEncoding(Configuration.LATIN1); // tidy.setInputEncoding("ISO-8859-2"); tidy.setFixComments(true); tidy.setQuoteAmpersand(false); tidy.setEncloseText(true); tidy.setEncloseBlockText(true); // tidy.setWord2000(true); try { tidy.setErrout(new PrintWriter(new FileWriter("tidyErrors.txt"), true)); in = new BufferedInputStream(xmlDocument.getInputStream()); out = new BufferedOutputStream(result = new ByteArrayOutputStream()); byte[] XMLHeader = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n".getBytes(); out.write(XMLHeader, 0, XMLHeader.length); tidy.parse(in, out); out.flush(); return new ByteArrayInputStream(result.toByteArray()); } catch (IOException ioe) { ioe.printStackTrace(); logger.error(ioe.getLocalizedMessage()); return null; } } }