Main.java Source code

Java tutorial

Introduction

Here is the source code for Main.java

Source

//package com.java2s;
import java.io.ByteArrayInputStream;
import java.io.CharArrayWriter;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

import org.xml.sax.SAXException;

public class Main {
    private final static Charset CHARSET_UTF8 = Charset.forName("UTF-8");
    public final static int TYPE_COMMON = 0;
    public final static int TYPE_CAPCAS = 3;
    public final static int TYPE_WILEY = 11;
    public final static int TYPE_TAYLORFRANCIS = 13;

    public static String trimTag(String xmlContent) throws XPathExpressionException, TransformerException,
            SAXException, IOException, ParserConfigurationException {

        return trimTag(xmlContent, TYPE_COMMON);
    }

    public static String trimTag(String xmlContent, int metaType) throws XPathExpressionException,
            TransformerException, SAXException, IOException, ParserConfigurationException {

        String xmlText = xmlContent;
        if (xmlContent == null || xmlContent.isEmpty()) {
            return xmlText;
        }

        String[] exceptNamespace = new String[] {};
        String[] repaceTags = new String[] { "inf", "sub" };
        String[] valueTags = new String[] { "sub", "sup" };

        switch (metaType) {
        case TYPE_CAPCAS:
            exceptNamespace = new String[] { "mml" };
            repaceTags = new String[] { "ce:sup", "sup", "ce:sub", "sub", "ce:inf", "sub" };
            xmlText = trimTagCommon(xmlContent, exceptNamespace, repaceTags, valueTags);
            break;
        case TYPE_COMMON:
        case TYPE_WILEY:
        case TYPE_TAYLORFRANCIS:
            xmlText = trimTagCommon(xmlContent, exceptNamespace, repaceTags, valueTags);
            break;
        default:
            break;
        }

        return xmlText;
    }

    private static String trimTagCommon(String xmlContent, String[] exceptNamespace, String[] repaceTags,
            String[] valueTags) throws ParserConfigurationException, TransformerException, SAXException,
            IOException, XPathExpressionException {

        String xmlText = xmlContent;

        xmlText = trimNamespace(xmlText, exceptNamespace);

        xmlText = replaceTags(xmlText, repaceTags);

        xmlText = getValueExceptTags(xmlText, valueTags);

        return xmlText;
    }

    private static String trimNamespace(String xmlContent, String... except)
            throws ParserConfigurationException, TransformerException, SAXException, IOException {

        Document srcDoc = parse(xmlContent, CHARSET_UTF8);
        Document document = generate();

        List<String> exceptNamespaces = new ArrayList<String>();
        if (except != null) {
            exceptNamespaces = Arrays.asList(except);
        }

        Element rootElement = (Element) copyNodeExceptNamespace(document, srcDoc.getDocumentElement(),
                new HashSet<String>(), exceptNamespaces);
        document.appendChild(rootElement);

        return write(document);
    }

    private static String replaceTags(String xmlContent, String... tags) throws TransformerException, SAXException,
            IOException, ParserConfigurationException, XPathExpressionException {

        Document document = parse(xmlContent, CHARSET_UTF8);
        NodeList nodeList = getNodeList(document, ".");
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node node = nodeList.item(i);
            node = replaceTagName(document, node, tags);
        }

        return write(document);
    }

    private static String getValueExceptTags(String xmlContent, String... tags)
            throws TransformerException, SAXException, IOException, ParserConfigurationException {

        Document document = parse(xmlContent, CHARSET_UTF8);
        Element rootElement = document.getDocumentElement();
        replace2TextExceptTags(document, rootElement, tags);

        String rootTag = "xml_root_tag";
        Document newDocument = generate();
        Element newRootElement = newDocument.createElement(rootTag);
        newDocument.appendChild(newRootElement);
        copyNode(newDocument, newRootElement, rootElement);

        String text = write(newDocument);
        int startIndex = text.indexOf(">", text.indexOf("<" + rootTag));
        int lastIndex = text.lastIndexOf("</" + rootTag + ">");

        String result = "";
        if (startIndex > 0 && lastIndex > 0) {
            result = text.substring(startIndex + 1, lastIndex);
        }

        return result;
    }

    /**
     *
     * @param xmlContent
     * @param charset
     * @return
     * @throws SAXException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    private static Document parse(String xmlContent, Charset charset)
            throws SAXException, IOException, ParserConfigurationException {

        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        documentBuilderFactory.setNamespaceAware(false);
        documentBuilderFactory.setValidating(false);
        DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
        Document document = documentBuilder.parse(new ByteArrayInputStream(xmlContent.getBytes(charset)));

        return document;
    }

    private static Document generate() throws ParserConfigurationException {

        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        documentBuilderFactory.setNamespaceAware(false);
        documentBuilderFactory.setValidating(false);
        DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
        Document document = documentBuilder.newDocument();

        return document;
    }

    private static Node copyNodeExceptNamespace(Document document, Node srcNode, Set<String> namespace,
            List<String> exceptNamespaces) {

        if (srcNode.getNodeType() == Node.ELEMENT_NODE) {

            String nodeName = srcNode.getNodeName();
            nodeName = nodeName.substring(nodeName.indexOf(":") + 1);
            Element element = document.createElement(nodeName);

            for (int i = 0; i < srcNode.getAttributes().getLength(); i++) {
                Attr attr = (Attr) srcNode.getAttributes().item(i);
                String name = attr.getName();
                if (name.startsWith("xmlns:")) {
                    String suffix = name.substring(6);
                    if (!exceptNamespaces.contains(suffix)) {
                        namespace.add(suffix);
                    }
                    continue;
                }
            }

            for (int i = 0; i < srcNode.getAttributes().getLength(); i++) {
                Attr attr = (Attr) srcNode.getAttributes().item(i);
                String name = attr.getName();
                if (name.startsWith("xmlns:")) {
                    continue;
                }
                int semi = name.indexOf(":");
                if (semi > 0) {
                    if (namespace.contains(name.substring(0, semi))) {
                        name = name.substring(semi + 1);
                    }
                }
                element.setAttribute(name, attr.getValue());
            }

            NodeList nodeList = srcNode.getChildNodes();
            for (int i = 0; i < nodeList.getLength(); i++) {
                Node childNode = nodeList.item(i);
                if (childNode.getNodeType() == Node.TEXT_NODE) {
                    element.appendChild(document.createTextNode(childNode.getTextContent()));
                } else if (childNode.getNodeType() == Node.ELEMENT_NODE) {
                    Node node = copyNodeExceptNamespace(document, childNode, namespace, exceptNamespaces);
                    element.appendChild(node);
                }
            }

            return element;
        }

        if (srcNode.getNodeType() == Node.TEXT_NODE) {
            Text text = document.createTextNode(srcNode.getTextContent());
            return text;
        }

        return null;
    }

    /**
     *
     * @param document
     * @return
     * @throws TransformerException
     * @throws UnsupportedEncodingException
     */
    private static String write(Document document) throws TransformerException, UnsupportedEncodingException {

        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer transformer = factory.newTransformer();
        Source source = new DOMSource(document);
        CharArrayWriter os = new CharArrayWriter();
        Result result = new StreamResult(os);
        transformer.transform(source, result);

        return os.toString();
    }

    private static NodeList getNodeList(Document document, String expression) throws XPathExpressionException {

        XPath xpath = XPathFactory.newInstance().newXPath();
        NodeList nodeList = (NodeList) xpath.evaluate(expression, document.getDocumentElement(),
                XPathConstants.NODESET);

        return nodeList;
    }

    private static NodeList getNodeList(String xmlContent, String expression)
            throws XPathExpressionException, SAXException, IOException, ParserConfigurationException {

        Document document = parse(xmlContent, CHARSET_UTF8);
        XPath xpath = XPathFactory.newInstance().newXPath();
        NodeList nodeList = (NodeList) xpath.evaluate(expression, document.getDocumentElement(),
                XPathConstants.NODESET);

        return nodeList;
    }

    private static Node replaceTagName(Document document, Node node, String... tags) {

        NodeList nodeList = node.getChildNodes();

        for (int i = 0; i < nodeList.getLength(); i++) {
            Node subNode = nodeList.item(i);
            if (subNode.getNodeType() != Node.ELEMENT_NODE) {
                continue;
            }

            String nodeName = subNode.getNodeName();
            for (int j = 0; j < tags.length / 2; j++) {
                if (nodeName.equals(tags[j])) {
                    Element element = document.createElement(tags[j + 1]);
                    for (int k = 0; k < subNode.getChildNodes().getLength(); k++) {
                        element.appendChild(subNode.getChildNodes().item(k));
                    }

                    node.replaceChild(element, subNode);
                    subNode = element;
                }
            }

            replaceTagName(document, subNode, tags);
        }

        return node;
    }

    private static Node replace2TextExceptTags(Document document, Node node, String... tags) {

        NodeList nodeList = node.getChildNodes();

        for (int i = 0; i < nodeList.getLength(); i++) {
            Node subNode = nodeList.item(i);
            if (subNode.getNodeType() == Node.TEXT_NODE) {
                String text = ((Text) subNode).getTextContent();
                text = text.replaceAll("[\t\n]+", "").replaceAll(" +", " ");
                subNode.setTextContent(text);
                continue;
            }
            if (subNode.getNodeType() != Node.ELEMENT_NODE) {
                continue;
            }

            String nodeName = subNode.getNodeName();
            boolean excepted = false;
            for (String tagName : tags) {
                if (tagName.equals(nodeName)) {
                    excepted = true;
                    replace2TextExceptTags(document, subNode, tags);
                    break;
                }
            }

            if (excepted) {
                continue;
            }

            subNode = replace2TextExceptTags(document, subNode, tags);
            NodeList childList = subNode.getChildNodes();
            List<Node> tempList = new ArrayList<Node>();
            for (int j = 0; j < childList.getLength(); j++) {
                tempList.add(childList.item(j));
            }
            for (Node child : tempList) {
                node.insertBefore(child, subNode);
            }
            node.removeChild(subNode);
        }

        return node;
    }

    private static Element copyNode(Document destDocument, Element dest, Element src) {

        NamedNodeMap namedNodeMap = src.getAttributes();
        for (int i = 0; i < namedNodeMap.getLength(); i++) {
            Attr attr = (Attr) namedNodeMap.item(i);
            dest.setAttribute(attr.getName(), attr.getValue());
        }
        NodeList childNodeList = src.getChildNodes();
        for (int i = 0; i < childNodeList.getLength(); i++) {
            Node child = childNodeList.item(i);
            if (child.getNodeType() == Node.TEXT_NODE) {
                Text text = destDocument.createTextNode(child.getTextContent());
                dest.appendChild(text);
            } else if (child.getNodeType() == Node.ELEMENT_NODE) {
                Element element = destDocument.createElement(((Element) child).getTagName());
                element = copyNode(destDocument, element, (Element) child);
                dest.appendChild(element);
            }
        }

        return dest;
    }
}