List of usage examples for javax.xml.parsers SAXParserFactory newInstance
public static SAXParserFactory newInstance()
From source file:de.uzk.hki.da.cli.Cli.java
/** * Copies the files listed in a SIP list to a single directory * //from www. ja v a 2 s.c o m * @param fileListFile The SIP list file * @return The path to the directory containing the files */ private String copySipListContentToFolder(File sipListFile) { CliProgressManager progressManager = new CliProgressManager(); String tempFolderName = getTempFolderName(); XMLReader xmlReader = null; SAXParserFactory spf = SAXParserFactory.newInstance(); try { xmlReader = spf.newSAXParser().getXMLReader(); } catch (Exception e) { logger.error("Failed to create SAX parser", e); System.out.println("Fehler beim Einlesen der SIP-Liste: SAX-Parser konnte nicht erstellt werden."); return ""; } xmlReader.setErrorHandler(new ErrorHandler() { @Override public void error(SAXParseException e) throws SAXException { throw new SAXException("Beim Einlesen der SIP-Liste ist ein Fehler aufgetreten.", e); } @Override public void fatalError(SAXParseException e) throws SAXException { throw new SAXException("Beim Einlesen der SIP-Liste ist ein schwerer Fehler aufgetreten.", e); } @Override public void warning(SAXParseException e) throws SAXException { logger.warn("Warning while parsing siplist", e); System.out.println("\nWarnung:\n" + e.getMessage()); } }); InputStream inputStream; try { inputStream = new FileInputStream(sipListFile); Reader reader = new InputStreamReader(inputStream, "UTF-8"); Builder parser = new Builder(xmlReader); Document doc = parser.build(reader); reader.close(); Element root = doc.getRootElement(); Elements sipElements = root.getChildElements("sip"); long files = 0; for (int i = 0; i < sipElements.size(); i++) { Elements fileElements = sipElements.get(i).getChildElements("file"); if (fileElements != null) files += fileElements.size(); } progressManager.setTotalSize(files); for (int i = 0; i < sipElements.size(); i++) { Element sipElement = sipElements.get(i); String sipName = sipElement.getAttributeValue("name"); File tempDirectory = new File(tempFolderName + File.separator + sipName); if (tempDirectory.exists()) { FolderUtils.deleteQuietlySafe(new File(tempFolderName)); System.out.println("\nDie SIP-Liste enthlt mehrere SIPs mit dem Namen " + sipName + ". " + "Bitte vergeben Sie fr jedes SIP einen eigenen Namen."); return ""; } tempDirectory.mkdirs(); Elements fileElements = sipElement.getChildElements("file"); for (int j = 0; j < fileElements.size(); j++) { Element fileElement = fileElements.get(j); String filepath = fileElement.getValue(); File file = new File(filepath); if (!file.exists()) { logger.error("File " + file.getAbsolutePath() + " is referenced in siplist, " + "but does not exist"); System.out.println("\nDie in der SIP-Liste angegebene Datei " + file.getAbsolutePath() + " existiert nicht."); FolderUtils.deleteQuietlySafe(new File(tempFolderName)); return ""; } try { if (file.isDirectory()) FileUtils.copyDirectoryToDirectory(file, tempDirectory); else FileUtils.copyFileToDirectory(file, tempDirectory); progressManager.copyFilesFromListProgress(); } catch (IOException e) { logger.error("Failed to copy file " + file.getAbsolutePath() + " to folder " + tempDirectory.getAbsolutePath(), e); System.out.println("\nDie in der SIP-Liste angegebene Datei " + file.getAbsolutePath() + " konnte nicht kopiert werden."); FolderUtils.deleteQuietlySafe(new File(tempFolderName)); return ""; } } } } catch (Exception e) { logger.error("Failed to read siplist " + sipListFile.getAbsolutePath(), e); System.out.println("\nBeim Lesen der SIP-Liste ist ein Fehler aufgetreten. "); return ""; } return (new File(tempFolderName).getAbsolutePath()); }
From source file:com.taobao.android.tpatch.utils.SmaliUtils.java
/** * dex?smali//w w w. jav a 2s . co m * @param dex * @param outputDir * @param includeClasses ?? */ public static boolean disassembleDexFile(File dex, File outputDir, final Set<String> includeClasses) throws IOException { final baksmaliOptions options = createBaksmaliOptions(); if (!outputDir.exists()) { outputDir.mkdirs(); } DexFile dexFile = DexFileFactory.loadDexFile(dex, DEFAULT_API_LEVEL, true); options.outputDirectory = outputDir.getAbsolutePath(); //1. options.jobs = 3; if (options.registerInfo != 0 || options.deodex) { try { Iterable<String> extraClassPathEntries; if (options.extraClassPathEntries != null) { extraClassPathEntries = options.extraClassPathEntries; } else { extraClassPathEntries = ImmutableList.of(); } options.classPath = ClassPath.fromClassPath(options.bootClassPathDirs, Iterables.concat(options.bootClassPathEntries, extraClassPathEntries), dexFile, options.apiLevel, options.checkPackagePrivateAccess, options.experimental); if (options.customInlineDefinitions != null) { options.inlineResolver = new CustomInlineMethodResolver(options.classPath, options.customInlineDefinitions); } } catch (Exception ex) { System.err.println("\n\nError occurred while loading boot class path files. Aborting."); ex.printStackTrace(System.err); return false; } } if (options.resourceIdFileEntries != null) { class PublicHandler extends DefaultHandler { String prefix = null; public PublicHandler(String prefix) { super(); this.prefix = prefix; } public void startElement(String uri, String localName, String qName, Attributes attr) throws SAXException { if (qName.equals("public")) { String type = attr.getValue("type"); String name = attr.getValue("name").replace('.', '_'); Integer public_key = Integer.decode(attr.getValue("id")); String public_val = new StringBuffer().append(prefix).append(".").append(type).append(".") .append(name).toString(); options.resourceIds.put(public_key, public_val); } } } ; for (Map.Entry<String, String> entry : options.resourceIdFileEntries.entrySet()) { try { SAXParser saxp = SAXParserFactory.newInstance().newSAXParser(); String prefix = entry.getValue(); saxp.parse(entry.getKey(), new PublicHandler(prefix)); } catch (ParserConfigurationException e) { continue; } catch (SAXException e) { continue; } catch (IOException e) { continue; } } } File outputDirectoryFile = new File(options.outputDirectory); if (!outputDirectoryFile.exists()) { if (!outputDirectoryFile.mkdirs()) { System.err.println("Can't create the output directory " + options.outputDirectory); return false; } } // sort the classes, so that if we're on a case-insensitive file system and need to handle classes with file // name collisions, then we'll use the same name for each class, if the dex file goes through multiple // baksmali/smali cycles for some reason. If a class with a colliding name is added or removed, the filenames // may still change of course List<? extends ClassDef> classDefs = Ordering.natural().sortedCopy(dexFile.getClasses()); if (!options.noAccessorComments) { options.syntheticAccessorResolver = new SyntheticAccessorResolver(classDefs); } final ClassFileNameHandler fileNameHandler = new ClassFileNameHandler(outputDirectoryFile, ".smali"); ExecutorService executor = Executors.newFixedThreadPool(options.jobs); List<Future<Boolean>> tasks = Lists.newArrayList(); for (final ClassDef classDef : classDefs) { tasks.add(executor.submit(new Callable<Boolean>() { @Override public Boolean call() throws Exception { String className = getDalvikClassName(classDef.getType()); if (null != includeClasses) { if (includeClasses.contains(className)) { BakSmali.disassembleClass(classDef, fileNameHandler, options); } return true; } else { return BakSmali.disassembleClass(classDef, fileNameHandler, options); } } })); } boolean errorOccurred = false; try { for (Future<Boolean> task : tasks) { while (true) { try { if (!task.get()) { errorOccurred = true; } } catch (InterruptedException ex) { continue; } catch (ExecutionException ex) { throw new RuntimeException(ex); } break; } } } finally { executor.shutdown(); } return !errorOccurred; }
From source file:com.flipzu.flipzu.FlipInterface.java
private FlipUser setFollowUnfollow(String username, String token, boolean follow) throws IOException { String data = "username=" + username + "&access_token=" + token; String url;//w w w. ja va 2 s .co m if (follow) { url = WSServer + "/api/set_follow.xml"; } else { url = WSServer + "/api/set_unfollow.xml"; } debug.logV(TAG, "setFollow for username " + username); DefaultHttpClient hc = new DefaultHttpClient(); ResponseHandler<String> res = new ResponseHandler<String>() { public String handleResponse(final HttpResponse response) throws HttpResponseException, IOException { StatusLine statusLine = response.getStatusLine(); if (statusLine.getStatusCode() >= 300) { throw new HttpResponseException(statusLine.getStatusCode(), statusLine.getReasonPhrase()); } HttpEntity entity = response.getEntity(); return entity == null ? null : EntityUtils.toString(entity, "UTF-8"); } }; HttpPost postMethod = new HttpPost(url); postMethod.getParams().setParameter(CoreProtocolPNames.USE_EXPECT_CONTINUE, Boolean.FALSE); if (data != null) { StringEntity tmp = null; try { tmp = new StringEntity(data, "UTF-8"); } catch (UnsupportedEncodingException e) { debug.logE(TAG, "getUser ERROR", e.getCause()); return null; } postMethod.setEntity(tmp); } String response = hc.execute(postMethod, res); SAXParserFactory spf = SAXParserFactory.newInstance(); try { SAXParser sp = spf.newSAXParser(); XMLReader xr = sp.getXMLReader(); UserHandler myUserHandler = new UserHandler(); xr.setContentHandler(myUserHandler); InputSource inputSource = new InputSource(); inputSource.setEncoding("UTF-8"); inputSource.setCharacterStream(new StringReader(response)); xr.parse(inputSource); FlipUser parsedData = myUserHandler.getParsedData(); return parsedData; } catch (ParserConfigurationException e) { return null; } catch (SAXException e) { return null; } }
From source file:edwardawebb.queueman.classes.NetFlix.java
public boolean getNewETag(int queueType, int discPosition) { URL QueueUrl = null;//from w w w .j av a2 s. c o m DefaultHandler myQueueHandler = null; boolean result = false; //start index is 0 based, so step the true position down one String expanders = "?max_results=" + 1 + "&start_index=" + (discPosition - 1); InputStream xml = null; try { switch (queueType) { case NetFlixQueue.QUEUE_TYPE_INSTANT: QueueUrl = new URL( "http://api.netflix.com/users/" + user.getUserId() + "/queues/instant" + expanders); myQueueHandler = new InstantETagHandler(); break; case NetFlixQueue.QUEUE_TYPE_DISC: QueueUrl = new URL( "http://api.netflix.com/users/" + user.getUserId() + "/queues/disc/available" + expanders); myQueueHandler = new DiscETagHandler(); break; } setSignPost(user.getAccessToken(), user.getAccessTokenSecret()); HttpURLConnection request = (HttpURLConnection) QueueUrl.openConnection(); NetFlix.oaconsumer.sign(request); request.connect(); lastResponseMessage = request.getResponseCode() + ": " + request.getResponseMessage(); if (request.getResponseCode() == 200) { xml = request.getInputStream(); SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser sp; sp = spf.newSAXParser(); XMLReader xr = sp.getXMLReader(); xr.setContentHandler(myQueueHandler); //our custom handler will throw an exception when he gets what he want, interupting the full parse ErrorProcessor errors = new ErrorProcessor(); xr.setErrorHandler(errors); xr.parse(new InputSource(xml)); result = true; } } catch (ParserConfigurationException e) { reportError(e, lastResponseMessage); } catch (SAXException e) { reportError(e, lastResponseMessage); } catch (IOException e) { reportError(e, lastResponseMessage); // Log.i("NetFlix", "IO Error connecting to NetFlix queue") } catch (OAuthMessageSignerException e) { reportError(e, lastResponseMessage); // Log.i("NetFlix", "Unable to Sign request - token invalid") } catch (OAuthExpectationFailedException e) { reportError(e, lastResponseMessage); // Log.i("NetFlix", "Expectation failed") } return result; }
From source file:org.castor.jaxb.CastorUnmarshallerTest.java
/** * Tests the {@link CastorUnmarshaller#getUnmarshallerHandler()} method. * * @throws Exception if any error occurs during test *//*ww w. j av a 2 s . com*/ @Test public void testGetUnmarshallHandler() throws Exception { UnmarshallerHandler unmarshallerHandler = unmarshaller.getUnmarshallerHandler(); SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); XMLReader xmlReader = spf.newSAXParser().getXMLReader(); xmlReader.setContentHandler(unmarshallerHandler); xmlReader.parse(new InputSource(new StringReader(INPUT_XML))); Entity entity = (Entity) unmarshallerHandler.getResult(); testEntity(entity); }
From source file:edwardawebb.queueman.classes.NetFlix.java
public NetFlixQueue getSearchResults(String searchTerm) { searchQueue = new NetFlixQueue(NetFlixQueue.QUEUE_TYPE_SEARCH); setSignPost(user.getAccessToken(), user.getAccessTokenSecret()); InputStream xml = null;/*from ww w . j a va 2s .co m*/ try { String encSearchTerm = URLEncoder.encode(searchTerm); setSignPost(user.getAccessToken(), user.getAccessTokenSecret()); String expanders = "&expand=synopsis,formats"; URL QueueUrl = null; QueueUrl = new URL("http://api.netflix.com/catalog/titles?term=" + encSearchTerm + expanders); // Log.d("NetFlix",""+QueueUrl.toString()) HttpURLConnection request = (HttpURLConnection) QueueUrl.openConnection(); NetFlix.oaconsumer.sign(request); request.connect(); lastResponseMessage = request.getResponseCode() + ": " + request.getResponseMessage(); if (request.getResponseCode() == 200) { // Log.d("NetFlix", request.getContentType()) // //Log.d("NetFlix",request.getInputStream().toString()) // return xml xmldoc xml = request.getInputStream(); /*BufferedReader in = new BufferedReader(new InputStreamReader( xml)); String linein = null; while ((linein = in.readLine()) != null) { Log.d("NetFlix", "SearchMovie: " + linein); }*/ SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser sp; sp = spf.newSAXParser(); XMLReader xr = sp.getXMLReader(); // SearchResultsHandler myHandler = new // SearchResultsHandler(this); SearchQueueHandler myHandler = new SearchQueueHandler(); xr.setContentHandler(myHandler); xr.parse(new InputSource(xml)); } } catch (ParserConfigurationException e) { reportError(e, lastResponseMessage); } catch (SAXException e) { reportError(e, lastResponseMessage); } catch (IOException e) { reportError(e, lastResponseMessage); // Log.i("NetFlix", "IO Error connecting to NetFlix queue") } catch (OAuthMessageSignerException e) { reportError(e, lastResponseMessage); } catch (OAuthExpectationFailedException e) { reportError(e, lastResponseMessage); } return searchQueue; }
From source file:org.hil.webservice.mobile.impl.ChildrenWebServiceImpl.java
private void parseXml(String xml) { list = new ArrayList<Children>(); tmpAuth = ""; tmpAuthor = ""; SAXParserFactory factory = SAXParserFactory.newInstance(); try {//from w w w. jav a2 s . c om SAXParser saxParser = factory.newSAXParser(); DefaultHandler handler = new DefaultHandler() { Children tempChild; String tempVal = ""; public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (qName.equalsIgnoreCase("Children")) { tmpAuth = attributes.getValue("sessionAuth"); if (!tmpAuth.equalsIgnoreCase("sessionAuth")) return; tmpAuthor = attributes.getValue("author"); force = Boolean.parseBoolean(attributes.getValue("force")); } tempVal = ""; if (qName.equalsIgnoreCase("Child")) { tempChild = new Children(); } } public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equalsIgnoreCase("Child")) { //add it to the list list.add(tempChild); } else if (qName.equalsIgnoreCase("id")) { if (tempVal.length() > 0) tempChild.setId(Long.parseLong(tempVal)); } else if (qName.equalsIgnoreCase("fullName")) { if (tempVal.length() > 0) tempChild.setFullName(tempVal); } else if (qName.equalsIgnoreCase("dateOfBirth")) { if (tempVal.length() > 0) { SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); try { Date bdate = format.parse(tempVal); tempChild.setDateOfBirth(bdate); } catch (ParseException e) { e.printStackTrace(); } } } else if (qName.equalsIgnoreCase("gender")) { if (tempVal.length() > 0) tempChild.setGender(Boolean.parseBoolean(tempVal)); } else if (qName.equalsIgnoreCase("childCode")) { if (tempVal.length() > 0) tempChild.setChildCode(tempVal); } else if (qName.equalsIgnoreCase("fatherName")) { if (tempVal.length() > 0) tempChild.setFatherName(tempVal); } else if (qName.equalsIgnoreCase("fatherBirthYear")) { if (tempVal.length() > 0) tempChild.setFatherBirthYear(Integer.parseInt(tempVal)); } else if (qName.equalsIgnoreCase("fatherID")) { if (tempVal.length() > 0) tempChild.setFatherID(tempVal); } else if (qName.equalsIgnoreCase("fatherMobile")) { if (tempVal.length() > 0) tempChild.setFatherMobile(tempVal); } else if (qName.equalsIgnoreCase("motherName")) { if (tempVal.length() > 0) tempChild.setMotherName(tempVal); } else if (qName.equalsIgnoreCase("motherBirthYear")) { if (tempVal.length() > 0) tempChild.setMotherBirthYear(Integer.parseInt(tempVal)); } else if (qName.equalsIgnoreCase("motherID")) { if (tempVal.length() > 0) tempChild.setMotherID(tempVal); } else if (qName.equalsIgnoreCase("motherMobile")) { if (tempVal.length() > 0) tempChild.setMotherMobile(tempVal); } else if (qName.equalsIgnoreCase("caretakerName")) { if (tempVal.length() > 0) tempChild.setCaretakerName(tempVal); } else if (qName.equalsIgnoreCase("caretakerBirthYear")) { if (tempVal.length() > 0) tempChild.setCaretakerBirthYear(Integer.parseInt(tempVal)); } else if (qName.equalsIgnoreCase("caretakerID")) { if (tempVal.length() > 0) tempChild.setCaretakerID(tempVal); } else if (qName.equalsIgnoreCase("caretakerMobile")) { if (tempVal.length() > 0) tempChild.setCaretakerMobile(tempVal); } else if (qName.equalsIgnoreCase("currentCaretaker")) { if (tempVal.length() > 0) tempChild.setCurrentCaretaker(Short.parseShort(tempVal)); } else if (qName.equalsIgnoreCase("villageId")) { if (tempVal.length() > 0) { tempChild.setVillage(villageDao.get(Long.parseLong(tempVal))); } } } public void characters(char ch[], int start, int length) throws SAXException { tempVal = new String(ch, start, length); } }; saxParser.parse(new InputSource(new StringReader(xml)), handler); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:efen.parsewiki.WikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;// w w w.j ava2 s .c om final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }
From source file:it.unimi.di.wikipedia.parsing.NamespacedWikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(NamespacedWikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;//from w w w.j a va2 s. c o m final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }
From source file:com.xmobileapp.rockplayer.LastFmAlbumArtImporter.java
/********************************* * // w w w . j a va 2 s . co m * getAlbumArtByAlbumName * *********************************/ private String getAlbumArtByAlbumName(String albumName, String artistName) { try { SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); SAXParser saxParser; saxParser = saxParserFactory.newSAXParser(); XMLReader xmlReader; xmlReader = saxParser.getXMLReader(); XMLAlbumSearchHandler xmlHandler = new XMLAlbumSearchHandler(); xmlReader.setContentHandler(xmlHandler); /* * Get artist art from Last.FM */ String artistNameFiltered = filterString(artistName); String albumNameFiltered = filterString(albumName); URL lastFmApiRequest = new URL( this.LAST_FM_ALBUM_SEARCH_URL + "&album=" + URLEncoder.encode(albumNameFiltered)); BufferedReader in = new BufferedReader(new InputStreamReader(lastFmApiRequest.openStream())); xmlReader.parse(new InputSource(in)); for (int i = 0; i < xmlHandler.albumSearchList.size(); i++) { AlbumSearch albumSearch = xmlHandler.albumSearchList.get(i); if (artistNameIsSimilarEnough(filterString(albumSearch.artistName), artistNameFiltered)) { if (albumSearch.xlargeAlbumArt != null) { return albumSearch.xlargeAlbumArt; } else if (albumSearch.largeAlbumArt != null) { return albumSearch.largeAlbumArt; } else if (albumSearch.mediumAlbumArt != null) { return albumSearch.mediumAlbumArt; } } } return null; } catch (ParserConfigurationException e) { e.printStackTrace(); return null; } catch (SAXException e) { e.printStackTrace(); return null; } catch (MalformedURLException e) { e.printStackTrace(); return null; } catch (IOException e) { e.printStackTrace(); return null; } }