List of usage examples for java.io FileInputStream read
public int read(byte b[]) throws IOException
b.length
bytes of data from this input stream into an array of bytes. From source file:importer.handler.post.stages.SAXSplitter.java
public static void main(String[] args) { if (args.length == 1) { File f = new File(args[0]); byte[] data = new byte[(int) f.length()]; try {/*ww w . ja v a2s. c o m*/ FileInputStream fis = new FileInputStream(f); fis.read(data); SAXSplitter ss = new SAXSplitter(); JSONArray jArr = ss.scan(new String(data, "UTF-8")); String jStr = jArr.toJSONString(); System.out.println(jStr.replaceAll("\\\\/", "/")); } catch (Exception e) { System.out.println(e.getMessage()); } } }
From source file:io.bfscan.clueweb12.BuildWarcTrecIdMapping.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));/*w ww .j av a 2 s .c o m*/ options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(BuildWarcTrecIdMapping.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); FileInputStream fis = null; BufferedReader br = null; try { fis = new FileInputStream(new File(path)); byte[] ignoreBytes = new byte[2]; fis.read(ignoreBytes); // "B", "Z" bytes from commandline tools br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fis), "UTF8")); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String s; while ((s = br.readLine()) != null) { Runnable worker = new AddDocumentRunnable(writer, s); executor.execute(worker); cnt++; if (cnt % 1000000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); br.close(); fis.close(); } }
From source file:cc.twittertools.index.IndexStatuses.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(//from w ww . jav a 2s. co m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexStatuses.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexed(true); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES)); doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES)); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(), Field.Store.YES)); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(), Field.Store.YES)); doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES)); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }
From source file:io.anserini.index.IndexTweets.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(// w ww . j av a2 s . com OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexTweets.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(Paths.get(indexPath)); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongPoint(StatusField.ID.name, status.getId())); doc.add(new StoredField(StatusField.ID.name, status.getId())); doc.add(new LongPoint(StatusField.EPOCH.name, status.getEpoch())); doc.add(new StoredField(StatusField.EPOCH.name, status.getEpoch())); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntPoint(StatusField.FRIENDS_COUNT.name, status.getFollowersCount())); doc.add(new StoredField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount())); doc.add(new IntPoint(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount())); doc.add(new StoredField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount())); doc.add(new IntPoint(StatusField.STATUSES_COUNT.name, status.getStatusesCount())); doc.add(new StoredField(StatusField.STATUSES_COUNT.name, status.getStatusesCount())); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongPoint(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId)); doc.add(new StoredField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId)); doc.add(new LongPoint(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId())); doc.add(new StoredField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId())); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongPoint(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId)); doc.add(new StoredField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId)); doc.add(new LongPoint(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId())); doc.add(new StoredField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId())); doc.add(new IntPoint(StatusField.RETWEET_COUNT.name, status.getRetweetCount())); doc.add(new StoredField(StatusField.RETWEET_COUNT.name, status.getRetweetCount())); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }
From source file:com.almende.eve.deploy.Boot.java
/** * The default agent booter. It takes an EVE yaml file and creates all * agents mentioned in the "agents" section. * /*from w w w.j a va 2 s. c o m*/ * @param args * Single argument: args[0] -> Eve yaml */ public static void main(final String[] args) { if (args.length == 0) { LOG.warning("Missing argument pointing to yaml file:"); LOG.warning("Usage: java -jar <jarfile> eve.yaml"); return; } final ClassLoader cl = new ClassLoader() { @Override protected Class<?> findClass(final String name) throws ClassNotFoundException { Class<?> result = null; try { result = super.findClass(name); } catch (ClassNotFoundException cne) { } if (result == null) { FileInputStream fi = null; try { String path = name.replace('.', '/'); fi = new FileInputStream(System.getProperty("user.dir") + "/" + path + ".class"); byte[] classBytes = new byte[fi.available()]; fi.read(classBytes); fi.close(); return defineClass(name, classBytes, 0, classBytes.length); } catch (Exception e) { LOG.log(Level.WARNING, "Failed to load class:", e); } } if (result == null) { throw new ClassNotFoundException(name); } return result; } }; String configFileName = args[0]; try { InputStream is = new FileInputStream(new File(configFileName)); boot(is, cl); } catch (FileNotFoundException e) { LOG.log(Level.WARNING, "Couldn't find configfile:" + configFileName, e); return; } }
From source file:com.alexoree.jenkins.Main.java
public static void main(String[] args) throws Exception { // create Options object Options options = new Options(); options.addOption("t", false, "throttle the downloads, waits 5 seconds in between each d/l"); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("jenkins-sync", options); CommandLineParser parser = new DefaultParser(); CommandLine cmd = parser.parse(options, args); boolean throttle = cmd.hasOption("t"); String plugins = "https://updates.jenkins-ci.org/latest/"; List<String> ps = new ArrayList<String>(); Document doc = Jsoup.connect(plugins).get(); for (Element file : doc.select("td a")) { //System.out.println(file.attr("href")); if (file.attr("href").endsWith(".hpi") || file.attr("href").endsWith(".war")) { ps.add(file.attr("href")); }//from www. j a v a 2s. c o m } File root = new File("."); //https://updates.jenkins-ci.org/latest/AdaptivePlugin.hpi new File("./latest").mkdirs(); //output zip file String zipFile = "jenkinsSync.zip"; // create byte buffer byte[] buffer = new byte[1024]; FileOutputStream fos = new FileOutputStream(zipFile); ZipOutputStream zos = new ZipOutputStream(fos); //download the plugins for (int i = 0; i < ps.size(); i++) { System.out.println("[" + i + "/" + ps.size() + "] downloading " + plugins + ps.get(i)); String outputFile = download(root.getAbsolutePath() + "/latest/" + ps.get(i), plugins + ps.get(i)); FileInputStream fis = new FileInputStream(outputFile); // begin writing a new ZIP entry, positions the stream to the start of the entry data zos.putNextEntry(new ZipEntry(outputFile.replace(root.getAbsolutePath(), "") .replace("updates.jenkins-ci.org/", "").replace("https:/", ""))); int length; while ((length = fis.read(buffer)) > 0) { zos.write(buffer, 0, length); } zos.closeEntry(); fis.close(); if (throttle) Thread.sleep(WAIT); new File(root.getAbsolutePath() + "/latest/" + ps.get(i)).deleteOnExit(); } //download the json metadata plugins = "https://updates.jenkins-ci.org/"; ps = new ArrayList<String>(); doc = Jsoup.connect(plugins).get(); for (Element file : doc.select("td a")) { //System.out.println(file.attr("href")); if (file.attr("href").endsWith(".json")) { ps.add(file.attr("href")); } } for (int i = 0; i < ps.size(); i++) { download(root.getAbsolutePath() + "/" + ps.get(i), plugins + ps.get(i)); FileInputStream fis = new FileInputStream(root.getAbsolutePath() + "/" + ps.get(i)); // begin writing a new ZIP entry, positions the stream to the start of the entry data zos.putNextEntry(new ZipEntry(plugins + ps.get(i))); int length; while ((length = fis.read(buffer)) > 0) { zos.write(buffer, 0, length); } zos.closeEntry(); fis.close(); new File(root.getAbsolutePath() + "/" + ps.get(i)).deleteOnExit(); if (throttle) Thread.sleep(WAIT); } // close the ZipOutputStream zos.close(); }
From source file:io.nuclei.box.db.DbContext.java
public static void main(String[] args) throws IOException { FileInputStream input = new FileInputStream(args[0]); byte[] buf = new byte[input.available()]; input.read(buf); input.close();//from w w w . j ava 2s .c om JSONObject model = new JSONObject(new String(buf, "UTF-8")); DbContext.newContext(model, new File(args[1]), null, null).render(); }
From source file:de.mpg.escidoc.services.common.util.Util.java
/** * Command line transformation.// w ww . jav a2 s . c o m * * @param see usage * @throws Exception Any exception */ public static void main(String[] args) throws Exception { if (args.length < 7) { System.out.println("Usage: "); System.out.println( "Util <srcFile> <srcFormatName> <srcFormatType> <srcFormatEncoding> <trgFormatName> <trgFormatType> <trgFormatEncoding> [<trgFile> [<service>]]"); System.out.println("Example:"); System.out.println( "Util /tmp/source.xml eDoc application/xml UTF-8 eSciDoc application/xml UTF-8 /tmp/target.xml"); } else { // Init Transformation transformation = new TransformationBean(true); Format srcFormat = new Format(args[1], args[2], args[3]); Format trgFormat = new Format(args[4], args[5], args[6]); File srcFile = new File(args[0]); OutputStream trgStream; if (args.length > 7) { trgStream = new FileOutputStream(new File(args[7])); } else { trgStream = System.out; } String service = "eSciDoc"; if (args.length > 8) { service = args[8]; } // Get file content FileInputStream inputStream = new FileInputStream(srcFile); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); byte[] buffer = new byte[2048]; int read; while ((read = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, read); } byte[] content = outputStream.toByteArray(); // Transform byte[] result = transformation.transform(content, srcFormat, trgFormat, service); // Stream back trgStream.write(result); trgStream.close(); } }
From source file:Main.java
public static byte[] LoadImage(String filePath) throws Exception { File file = new File(filePath); int size = (int) file.length(); byte[] buffer = new byte[size]; FileInputStream in = new FileInputStream(file); in.read(buffer); in.close();//from w ww .j a v a 2 s .co m return buffer; }
From source file:Main.java
public static String loadString(String filename) throws Exception { File file = new File(sAppContext.getCacheDir().getPath() + "/" + filename); byte[] buff = new byte[(int) file.length()]; FileInputStream fis = new FileInputStream(file); fis.read(buff); String string = new String(buff); return string; }