public abstract SAXParser newSAXParser() throws ParserConfigurationException, SAXException;

Creates a new instance of a SAXParser using the currently configured factory parameters.


From source file:com.jkoolcloud.jesl.simulator.TNT4JSimulator.java

public static void main(String[] args) {
    boolean isTTY = (System.console() != null);
    long startTime = System.currentTimeMillis();

    try {
        SAXParserFactory parserFactory = SAXParserFactory.newInstance();
        SAXParser theParser = parserFactory.newSAXParser();
        TNT4JSimulatorParserHandler xmlHandler = new TNT4JSimulatorParserHandler();

        processArgs(xmlHandler, args);

        TrackerConfig simConfig = DefaultConfigFactory.getInstance().getConfig(TNT4JSimulator.class.getName());
        logger = TrackingLogger.getInstance(simConfig.build());
        if (logger.isSet(OpLevel.TRACE))
            traceLevel = OpLevel.TRACE;
        else if (logger.isSet(OpLevel.DEBUG))
            traceLevel = OpLevel.DEBUG;

        if (runType == SimulatorRunType.RUN_SIM) {
            if (StringUtils.isEmpty(simFileName)) {
                simFileName = "tnt4j-sim.xml";
                String fileName = readFromConsole("Simulation file [" + simFileName + "]: ");

                if (!StringUtils.isEmpty(fileName))
                    simFileName = fileName;

            StringBuffer simDef = new StringBuffer();
            BufferedReader simLoader = new BufferedReader(new FileReader(simFileName));
            String line;
            while ((line = simLoader.readLine()) != null)

            info("jKool Activity Simulator Run starting: file=" + simFileName + ", iterations=" + numIterations
                    + ", ttl.sec=" + ttl);
            startTime = System.currentTimeMillis();

            if (isTTY && numIterations > 1)
                System.out.print("Iteration: ");
            int itTrcWidth = 0;
            for (iteration = 1; iteration <= numIterations; iteration++) {
                itTrcWidth = printProgress("Executing Iteration", iteration, itTrcWidth);

                theParser.parse(new InputSource(new StringReader(simDef.toString())), xmlHandler);

                if (!Utils.isEmpty(jkFileName)) {
                    PrintWriter gwFile = new PrintWriter(new FileOutputStream(jkFileName, true));
            if (numIterations > 1)

            info("jKool Activity Simulator Run finished, elapsed time = "
                    + DurationFormatUtils.formatDurationHMS(System.currentTimeMillis() - startTime));
            printMetrics(xmlHandler.getSinkStats(), "Total Sink Statistics");
        } else if (runType == SimulatorRunType.REPLAY_SIM) {
            info("jKool Activity Simulator Replay starting: file=" + jkFileName + ", iterations="
                    + numIterations);
            startTime = System.currentTimeMillis();

            // Determine number of lines in file
            BufferedReader gwFile = new BufferedReader(new java.io.FileReader(jkFileName));
            for (numIterations = 0; gwFile.readLine() != null; numIterations++)

            // Reopen the file and
            gwFile = new BufferedReader(new java.io.FileReader(jkFileName));
            if (isTTY && numIterations > 1)
                System.out.print("Processing Line: ");
            int itTrcWidth = 0;
            String gwMsg;
            iteration = 0;
            while ((gwMsg = gwFile.readLine()) != null) {
                if (isTTY)
                    itTrcWidth = printProgress("Processing Line", iteration, itTrcWidth);
            if (isTTY && numIterations > 1)
            long endTime = System.currentTimeMillis();

            info("jKool Activity Simulator Replay finished, elasped.time = "
                    + DurationFormatUtils.formatDurationHMS(endTime - startTime));
    } catch (Exception e) {
        if (e instanceof SAXParseException) {
            SAXParseException spe = (SAXParseException) e;
            error("Error at line: " + spe.getLineNumber() + ", column: " + spe.getColumnNumber(), e);
        } else {
            error("Error running simulator", e);
    } finally {
        try {
        } catch (Exception e) {


From source file:efen.parsewiki.WikipediaDocumentSequence.java

public static void main(final String arg[])
        throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException {
    SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(),
            "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.",
            new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"),
                    new Switch("iso", 'i', "iso",
                            "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."),
                    new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE),
                            JSAP.NOT_REQUIRED, 'w', "width",
                            "The width, in bits, of the signatures used to sign the function from URIs to their rank."),
                    new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The file containing the Wikipedia dump."),
                    new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."),
                    new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The URIs of the documents in the collection (generated by ScanMetadata)."),
                    new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of a precomputed virtual document resolver for the collection."),
                    new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of the resulting virtual document resolver.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        return;

    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>();
    final String baseURL = jsapResult.getString("baseURL");
    final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
    progressLogger.itemsName = "redirects";
    progressLogger.start("Extracting redirects...");

    final SAXParser parser = saxParserFactory.newSAXParser();
    final DefaultHandler handler = new DefaultHandler() {
        private boolean inTitle;
        private MutableString title = new MutableString();

        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                inTitle = false;
            } else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first title element.
            else if ("redirect".equals(localName) && attributes.getValue("title") != null) {
                redirects.put(title.copy(), attributes.getValue("title"));

        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("title".equals(localName))
                inTitle = false;

        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);

        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);

    InputStream in = new FileInputStream(jsapResult.getString("file"));
    if (jsapResult.userSpecified("bzip2"))
        in = new BZip2CompressorInputStream(in);
    parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),

    final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>();
    final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr"));

    progressLogger.expectedUpdates = redirects.size();
    progressLogger.start("Examining redirects...");

    for (Map.Entry<MutableString, String> e : redirects.entrySet()) {
        final MutableString start = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true));
        final MutableString end = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getValue(), true));
        final long s = vdr.resolve(start);
        if (s == -1) {
            final long t = vdr.resolve(end);
            if (t != -1)
                resolved.put(start.copy(), t);
                LOGGER.warn("Failed redirect: " + start + " -> " + end);
        } else
            LOGGER.warn("URL " + start + " is already known to the virtual document resolver");




    final Iterable<MutableString> allURIs = Iterables
            .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet());
    final long numberOfDocuments = vdr.numberOfDocuments();

    final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso")
            ? TransformationStrategies.iso()
            : TransformationStrategies.utf16();

    BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments,
            new ShiftAddXorSignedStringMap(allURIs.iterator(),
                    new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy)
            resolved.values().toLongArray())), jsapResult.getString("redvdr"));

From source file:it.unimi.di.wikipedia.parsing.NamespacedWikipediaDocumentSequence.java

public static void main(final String arg[])
        throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException {
    SimpleJSAP jsap = new SimpleJSAP(NamespacedWikipediaDocumentSequence.class.getName(),
            "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.",
            new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"),
                    new Switch("iso", 'i', "iso",
                            "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."),
                    new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE),
                            JSAP.NOT_REQUIRED, 'w', "width",
                            "The width, in bits, of the signatures used to sign the function from URIs to their rank."),
                    new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The file containing the Wikipedia dump."),
                    new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."),
                    new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The URIs of the documents in the collection (generated by ScanMetadata)."),
                    new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of a precomputed virtual document resolver for the collection."),
                    new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of the resulting virtual document resolver.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        return;

    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>();
    final String baseURL = jsapResult.getString("baseURL");
    final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
    progressLogger.itemsName = "redirects";
    progressLogger.start("Extracting redirects...");

    final SAXParser parser = saxParserFactory.newSAXParser();
    final DefaultHandler handler = new DefaultHandler() {
        private boolean inTitle;
        private MutableString title = new MutableString();

        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                inTitle = false;
            } else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first title element.
            else if ("redirect".equals(localName) && attributes.getValue("title") != null) {
                redirects.put(title.copy(), attributes.getValue("title"));

        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("title".equals(localName))
                inTitle = false;

        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);

        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);

    InputStream in = new FileInputStream(jsapResult.getString("file"));
    if (jsapResult.userSpecified("bzip2"))
        in = new BZip2CompressorInputStream(in);
    parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),

    final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>();
    final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr"));

    progressLogger.expectedUpdates = redirects.size();
    progressLogger.start("Examining redirects...");

    for (Map.Entry<MutableString, String> e : redirects.entrySet()) {
        final MutableString start = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true));
        final MutableString end = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getValue(), true));
        final long s = vdr.resolve(start);
        if (s == -1) {
            final long t = vdr.resolve(end);
            if (t != -1)
                resolved.put(start.copy(), t);
                LOGGER.warn("Failed redirect: " + start + " -> " + end);
        } else
            LOGGER.warn("URL " + start + " is already known to the virtual document resolver");




    final Iterable<MutableString> allURIs = Iterables
            .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet());
    final long numberOfDocuments = vdr.numberOfDocuments();

    final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso")
            ? TransformationStrategies.iso()
            : TransformationStrategies.utf16();

    BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments,
            new ShiftAddXorSignedStringMap(allURIs.iterator(),
                    new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy)
            resolved.values().toLongArray())), jsapResult.getString("redvdr"));

From source file:Main.java

public static SAXParser createSAXParser() throws SAXException, ParserConfigurationException {
    SAXParserFactory factory = SAXParserFactory.newInstance();
    return factory.newSAXParser();

From source file:Main.java

public static void parseDocumentByString(String result, DefaultHandler defaultHandler) {
    try {
        SAXParserFactory sf = SAXParserFactory.newInstance();
        SAXParser sp = sf.newSAXParser();
        sp.parse(new ByteArrayInputStream(result.getBytes("UTF-8")), defaultHandler);
    } catch (Exception ex) {

From source file:Main.java

public static XMLReader createXMLReader() {
    SAXParserFactory factory = createParserFactory();
    try {
        SAXParser parser = factory.newSAXParser();
        return parser.getXMLReader();
    } catch (ParserConfigurationException ex) {
        throw new IllegalStateException(ex);
    } catch (SAXException ex) {
        throw new IllegalStateException(ex);

From source file:net.eledge.android.europeana.tools.RssReader.java

public static List<BlogArticle> readFeed(String url, DateTime lastViewed) {
    InputStream is = null;
    try {
        HttpGet request = new HttpGet(url);
        HttpResponse response = new DefaultHttpClient().execute(request);
        is = AndroidHttpClient.getUngzippedContent(response.getEntity());

        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser sp = spf.newSAXParser();
        XMLReader xr = sp.getXMLReader();

        RssFeedHandler rh = new RssFeedHandler(lastViewed);

        xr.parse(new InputSource(is));

        return rh.articles;
    } catch (IOException | SAXException | ParserConfigurationException e) {
        Log.e("RssReader", e.getMessage(), e);
    } finally {

    return null;

From source file:Main.java

public static XMLReader createXmlReader() throws SAXException {
    try {
        // use Xerces to ensure XML 1.1 is handled correctly
        Class<?> clazz = Class.forName("org.apache.xerces.parsers.SAXParser"); //$NON-NLS-1$
        return (XMLReader) clazz.newInstance();
    } catch (Throwable e) {
        SAXParser saxParser;
        try {
            SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
            saxParser = saxParserFactory.newSAXParser();
        } catch (ParserConfigurationException e2) {
            throw new SAXException(e2);
        return saxParser.getXMLReader();

From source file:Main.java

public static boolean validateWithDTDUsingSAX(String xml) throws Exception {
    SAXParserFactory factory = SAXParserFactory.newInstance();
    factory.setValidating(true);

    SAXParser parser = factory.newSAXParser();
    XMLReader reader = parser.getXMLReader();
    reader.setErrorHandler(new ErrorHandler() {
        public void warning(SAXParseException e) throws SAXException {
            System.out.println("WARNING : " + e.getMessage()); // do nothing

        public void error(SAXParseException e) throws SAXException {
            System.out.println("ERROR : " + e.getMessage());
            throw e;

        public void fatalError(SAXParseException e) throws SAXException {
            System.out.println("FATAL : " + e.getMessage());
            throw e;
    reader.parse(new InputSource(xml));
    return true;

From source file:Main.java

 * Create a new SAXParser which processes XML securely.
 * @return a SAXParser/*from www .  j a va 2s .  c  o  m*/
public static SAXParser createSaxParser() {
    SAXParserFactory spf = SAXParserFactory.newInstance();
    try {
        spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
        return spf.newSAXParser();
    } catch (ParserConfigurationException | SAXException e) {
        throw new IllegalStateException(e);