List of usage examples for java.lang Math ceil
public static double ceil(double a)
From source file:com.linkedin.pinot.perf.MultiValueReaderWriterBenchmark.java
public static void main(String[] args) throws Exception { List<String> lines = IOUtils.readLines(new FileReader(new File(args[0]))); int totalDocs = lines.size(); int max = Integer.MIN_VALUE; int maxNumberOfMultiValues = Integer.MIN_VALUE; int totalNumValues = 0; int data[][] = new int[totalDocs][]; for (int i = 0; i < lines.size(); i++) { String line = lines.get(i); String[] split = line.split(","); totalNumValues = totalNumValues + split.length; if (split.length > maxNumberOfMultiValues) { maxNumberOfMultiValues = split.length; }/*from w w w .ja v a 2s. com*/ data[i] = new int[split.length]; for (int j = 0; j < split.length; j++) { String token = split[j]; int val = Integer.parseInt(token); data[i][j] = val; if (val > max) { max = val; } } } int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2)); int size = 2048; int[] offsets = new int[size]; int bitMapSize = 0; File outputFile = new File("output.mv.fwd"); FixedBitSkipListSCMVWriter fixedBitSkipListSCMVWriter = new FixedBitSkipListSCMVWriter(outputFile, totalDocs, totalNumValues, maxBitsNeeded); for (int i = 0; i < totalDocs; i++) { fixedBitSkipListSCMVWriter.setIntArray(i, data[i]); if (i % size == size - 1) { MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos); rr1.serialize(dos); dos.close(); //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size()); bitMapSize += bos.size(); } else if (i == totalDocs - 1) { MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size)); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos); rr1.serialize(dos); dos.close(); //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size()); bitMapSize += bos.size(); } } fixedBitSkipListSCMVWriter.close(); System.out.println("Output file size:" + outputFile.length()); System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs); System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues); System.out.println("chunk size\t\t\t\t:" + size); System.out.println("Num chunks\t\t\t\t:" + totalDocs / size); int numChunks = totalDocs / size + 1; int totalBits = (totalNumValues * maxBitsNeeded); int dataSizeinBytes = (totalBits + 7) / 8; System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes); System.out.println("\nPer encoding size"); System.out.println(); System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes)); System.out.println(); System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes)); System.out.println(); System.out.println("bitMapSize\t\t\t\t:" + bitMapSize); System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes)); System.out.println(); System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8); System.out.println("size (with custom bitset)\t\t\t:" + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes)); }
From source file:loanbroker.normalizer.NormalizerOurJsonBank.java
public static void main(String[] args) { try {/*from w w w . j a v a2s . c o m*/ ConnectionFactory factory = new ConnectionFactory(); factory.setHost("datdb.cphbusiness.dk"); factory.setPort(5672); factory.setUsername("student"); factory.setPassword("cph"); Connection connection = factory.newConnection(); Channel channel = connection.createChannel(); channel.exchangeDeclare(EXCHANGE_NAME, "fanout"); String queueName = channel.queueDeclare().getQueue(); channel.queueBind(queueName, EXCHANGE_NAME, ""); //channel.queueDeclare(RPC_QUEUE_NAME,false, false, false, null); System.out.println(" [*] Waiting for messages. To exit press CTRL+C"); QueueingConsumer consumer = new QueueingConsumer(channel); channel.basicConsume(queueName, true, consumer); //producer Channel channelOutput = connection.createChannel(); channelOutput.exchangeDeclare(ExchangeName.GLOBAL, "direct"); String queueNameProducer = channelOutput.queueDeclare().getQueue(); channelOutput.queueBind(queueNameProducer, ExchangeName.GLOBAL, RoutingKeys.NormalizerToAggregator); LoanResponse loanResponse; while (true) { System.out.println("Reading"); QueueingConsumer.Delivery delivery = consumer.nextDelivery(); System.out.println("CorrelationId: " + delivery.getProperties().getCorrelationId()); String message = new String(delivery.getBody()); JSONObject jsonObj = new JSONObject(message); loanResponse = new LoanResponse(jsonObj.getInt("ssn"), Math.ceil(jsonObj.getDouble("interestRate")), "Our Json Bank", delivery.getProperties().getCorrelationId()); System.out.println("renter: " + loanResponse.getInterestRate()); System.out.println("ssn: " + loanResponse.getSsn()); System.out.println("bank : " + loanResponse.getBank()); System.out.println("JSON:" + loanResponse); System.out.println("TOstring:" + jsonObj.toString()); Gson g = new Gson(); String fm = g.toJson(loanResponse); channelOutput.basicPublish(ExchangeName.GLOBAL, RoutingKeys.NormalizerToAggregator, null, fm.getBytes()); } } catch (IOException | TimeoutException | InterruptedException e) { e.printStackTrace(); } }
From source file:friendsandfollowers.DBFollowersIDs.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check arguments that passed in if ((args == null) || (args.length == 0)) { System.err.println("2 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'OUTPUT: /output/path/'"); System.err.println("Second: (int) Number of ids to fetch. " + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Third (optional): 'screen_name / user_id_str'"); System.err.println("If 3rd argument not provided then provide" + " Twitter users through database."); System.exit(-1);/* w ww . ja va 2 s . c om*/ } MysqlDB DB = new MysqlDB(); AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/followers/ids"; String OutputDirPath = null; try { OutputDirPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument" + args[0] + " must be an String."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; try { IDS_TO_FETCH_INT = Integer.parseInt(args[1]); } catch (NumberFormatException e) { System.err.println("Argument" + args[1] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } String targetedUser = ""; if (args.length == 3) { try { targetedUser = StringEscapeUtils.escapeJava(args[2]); } catch (Exception e) { System.err.println("Argument" + args[2] + " must be an String."); System.exit(-1); } } try { TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); IDs ids; System.out.println("Listing followers ids."); // if targetedUser not provided by argument, then look into database. if (StringUtils.isEmpty(targetedUser)) { String selectQuery = "SELECT * FROM `followers_parent` WHERE " + "`targeteduser` != '' AND " + "`nextcursor` != '0' AND " + "`nextcursor` != '2'"; ResultSet results = DB.selectQ(selectQuery); int numRows = DB.numRows(results); if (numRows < 1) { System.err.println("No User in database to get followersIDS"); System.exit(-1); } OUTERMOST: while (results.next()) { int followers_parent_id = results.getInt("id"); targetedUser = results.getString("targeteduser"); long cursor = results.getLong("nextcursor"); System.out.println("Targeted User: " + targetedUser); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter(OutputDirPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFollowersIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, // or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Followers Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println( "New User Loaded OAuth" + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } // update cursor in "followers_parent" String fieldValues = "`nextcursor` = 2"; String where = "id = " + followers_parent_id; DB.Update("`followers_parent`", fieldValues, where); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth " + "Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); // update cursor in "followers_parent" String fieldValues = "`nextcursor` = " + cursor; String where = "id = " + followers_parent_id; DB.Update("`followers_parent`", fieldValues, where); } // loop through every result found in db } else { // Second Argument Set, so we are here. System.out.println("screen_name / user_id_str passed by argument"); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter( OutputDirPath + "/" + targetedUser + "_ids_" + helpers.getUnixTimeStamp(), "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); long cursor = -1; do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFollowersIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Followers Get Exception: " + te.getMessage()); } System.exit(-1); } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reach then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); } } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get followers' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); }
From source file:friendsandfollowers.DBFriendsIDs.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check arguments that passed in if ((args == null) || (args.length == 0)) { System.err.println("2 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'OUTPUT: /output/path/'"); System.err.println("Second: (int) Number of ids to fetch. " + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Third (optional): 'screen_name / user_id_str'"); System.err.println("If 3rd argument not provided then provide" + " Twitter users through database."); System.exit(-1);// w ww. j a v a2s . c o m } MysqlDB DB = new MysqlDB(); AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/friends/ids"; String OutputDirPath = null; try { OutputDirPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument" + args[0] + " must be an String."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; try { IDS_TO_FETCH_INT = Integer.parseInt(args[1]); } catch (NumberFormatException e) { System.err.println("Argument" + args[1] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } String targetedUser = ""; if (args.length == 3) { try { targetedUser = StringEscapeUtils.escapeJava(args[2]); } catch (Exception e) { System.err.println("Argument" + args[2] + " must be an String."); System.exit(-1); } } try { TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); IDs ids; System.out.println("Listing friends ids."); // if targetedUser not provided by argument, then look into database. if (StringUtils.isEmpty(targetedUser)) { String selectQuery = "SELECT * FROM `followings_parent` WHERE " + "`targeteduser` != '' AND " + "`nextcursor` != '0' AND " + "`nextcursor` != '2'"; ResultSet results = DB.selectQ(selectQuery); int numRows = DB.numRows(results); if (numRows < 1) { System.err.println("No User in database to get friendsIDS"); System.exit(-1); } OUTERMOST: while (results.next()) { int following_parent_id = results.getInt("id"); targetedUser = results.getString("targeteduser"); long cursor = results.getLong("nextcursor"); System.out.println("Targeted User: " + targetedUser); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter(OutputDirPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFriendsIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, // or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Friends Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println( "New User Loaded OAuth" + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } // update cursor in "followings_parent" String fieldValues = "`nextcursor` = 2"; String where = "id = " + following_parent_id; DB.Update("`followings_parent`", fieldValues, where); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user. RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth " + "Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); // update cursor in "followings_parent" String fieldValues = "`nextcursor` = " + cursor; String where = "id = " + following_parent_id; DB.Update("`followings_parent`", fieldValues, where); } // loop through every result found in db } else { // Second Argument Sets, so we are here. System.out.println("screen_name / user_id_str " + "passed by argument"); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter( OutputDirPath + "/" + targetedUser + "_ids_" + helpers.getUnixTimeStamp(), "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); long cursor = -1; do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFriendsIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, // or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Friends Get Exception: " + te.getMessage()); } System.exit(-1); } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reach then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); } } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get friends' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); }
From source file:friendsandfollowers.FilesThreaderFriendsIDsParser.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check how many arguments were passed in if ((args == null) || (args.length < 5)) { System.err.println("5 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'INPUT: DB or /input/path/'"); System.err.println("Second: String 'OUTPUT: /output/path/'"); System.err.println("Third: (int) Total Number Of Jobs"); System.err.println("Fourth: (int) This Job Number"); System.err.println("Fifth: (int) Number of seconds to pause"); System.err.println("Sixth: (int) Number of ids to fetch" + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Example: fileToRun /input/path/ " + "/output/path/ 10 1 3 75000"); System.exit(-1);/* w w w. j a va 2s . c om*/ } // TODO documentation for command line AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/friends/ids"; String inputPath = null; try { inputPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument " + args[0] + " must be an String."); System.exit(-1); } String outputPath = null; try { outputPath = StringEscapeUtils.escapeJava(args[1]); } catch (Exception e) { System.err.println("Argument " + args[1] + " must be an String."); System.exit(-1); } int TOTAL_JOBS = 0; try { TOTAL_JOBS = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println("Argument " + args[2] + " must be an integer."); System.exit(1); } int JOB_NO = 0; try { JOB_NO = Integer.parseInt(args[3]); } catch (NumberFormatException e) { System.err.println("Argument " + args[3] + " must be an integer."); System.exit(1); } int secondsToPause = 0; try { secondsToPause = Integer.parseInt(args[4]); } catch (NumberFormatException e) { System.err.println("Argument" + args[4] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; if (args.length == 6) { try { IDS_TO_FETCH_INT = Integer.parseInt(args[5]); } catch (NumberFormatException e) { System.err.println("Argument" + args[5] + " must be an integer."); System.exit(-1); } } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } secondsToPause = (TOTAL_JOBS * secondsToPause) - (JOB_NO * secondsToPause); System.out.println("secondsToPause: " + secondsToPause); helpers.pause(secondsToPause); try { int TotalWorkLoad = 0; ArrayList<String> allFiles = null; try { final File folder = new File(inputPath); allFiles = helpers.listFilesForSingleFolder(folder); TotalWorkLoad = allFiles.size(); } catch (Exception e) { System.err.println("Input folder is not exists: " + e.getMessage()); System.exit(-1); } System.out.println("Total Workload is: " + TotalWorkLoad); if (TotalWorkLoad < 1) { System.err.println("No screen names file exists in: " + inputPath); System.exit(-1); } if (TOTAL_JOBS > TotalWorkLoad) { System.err.println("Number of jobs are more than total work" + " load. Please reduce 'Number of jobs' to launch."); System.exit(-1); } float TotalWorkLoadf = TotalWorkLoad; float TOTAL_JOBSf = TOTAL_JOBS; float res = (TotalWorkLoadf / TOTAL_JOBSf); int chunkSize = (int) Math.ceil(res); int offSet = JOB_NO * chunkSize; int chunkSizeToGet = (JOB_NO + 1) * chunkSize; System.out.println("My Share is " + chunkSize); System.out.println(); // Load OAuh User TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time OAuth Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); System.out.println(); IDs ids; System.out.println("Going to get friends ids."); // to write output in a file System.out.flush(); if (JOB_NO + 1 == TOTAL_JOBS) { chunkSizeToGet = TotalWorkLoad; } List<String> myFilesShare = allFiles.subList(offSet, chunkSizeToGet); for (String myFile : myFilesShare) { System.out.println("Going to parse file: " + myFile); try (BufferedReader br = new BufferedReader(new FileReader(inputPath + "/" + myFile))) { String line; OUTERMOST: while ((line = br.readLine()) != null) { // process the line. System.out.println("Going to get friends ids of Screen-name / user_id: " + line); System.out.println(); String targetedUser = line.trim(); // tmp long cursor = -1; int idsLoopCounter = 0; int totalIDs = 0; PrintWriter writer = new PrintWriter(outputPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFriendsIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, or // if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Friends Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println( "New Loaded OAuth User " + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println("New Loaded OAuth User Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); } // while get records from single file } // read my single file catch (IOException e) { System.err.println("Failed to read lines from " + myFile); } // to write output in a file System.out.flush(); } // all my files share } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get friends' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); // Close System.out for this thread which will // flush and close this thread. System.out.close(); }
From source file:friendsandfollowers.FilesThreaderFollowersIDsParser.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check how many arguments were passed in if ((args == null) || (args.length < 5)) { System.err.println("5 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'INPUT: DB or /input/path/'"); System.err.println("Second: String 'OUTPUT: /output/path/'"); System.err.println("Third: (int) Total Number Of Jobs"); System.err.println("Fourth: (int) This Job Number"); System.err.println("Fifth: (int) Number of seconds to pause"); System.err.println("Sixth: (int) Number of ids to fetch" + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Example: fileToRun /input/path/ " + "/output/path/ 10 1 3 75000"); System.exit(-1);/*from w ww. j a va2 s. co m*/ } // TODO documentation for command line AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/followers/ids"; String inputPath = null; try { inputPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument " + args[0] + " must be an String."); System.exit(-1); } String outputPath = null; try { outputPath = StringEscapeUtils.escapeJava(args[1]); } catch (Exception e) { System.err.println("Argument " + args[1] + " must be an String."); System.exit(-1); } int TOTAL_JOBS = 0; try { TOTAL_JOBS = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println("Argument " + args[2] + " must be an integer."); System.exit(1); } int JOB_NO = 0; try { JOB_NO = Integer.parseInt(args[3]); } catch (NumberFormatException e) { System.err.println("Argument " + args[3] + " must be an integer."); System.exit(1); } int secondsToPause = 0; try { secondsToPause = Integer.parseInt(args[4]); } catch (NumberFormatException e) { System.err.println("Argument" + args[4] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; if (args.length == 6) { try { IDS_TO_FETCH_INT = Integer.parseInt(args[5]); } catch (NumberFormatException e) { System.err.println("Argument" + args[5] + " must be an integer."); System.exit(-1); } } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } secondsToPause = (TOTAL_JOBS * secondsToPause) - (JOB_NO * secondsToPause); System.out.println("secondsToPause: " + secondsToPause); helpers.pause(secondsToPause); try { int TotalWorkLoad = 0; ArrayList<String> allFiles = null; try { final File folder = new File(inputPath); allFiles = helpers.listFilesForSingleFolder(folder); TotalWorkLoad = allFiles.size(); } catch (Exception e) { System.err.println("Input folder is not exists: " + e.getMessage()); System.exit(-1); } System.out.println("Total Workload is: " + TotalWorkLoad); if (TotalWorkLoad < 1) { System.err.println("No screen names file exists in: " + inputPath); System.exit(-1); } if (TOTAL_JOBS > TotalWorkLoad) { System.err.println("Number of jobs are more than total work" + " load. Please reduce 'Number of jobs' to launch."); System.exit(-1); } float TotalWorkLoadf = TotalWorkLoad; float TOTAL_JOBSf = TOTAL_JOBS; float res = (TotalWorkLoadf / TOTAL_JOBSf); int chunkSize = (int) Math.ceil(res); int offSet = JOB_NO * chunkSize; int chunkSizeToGet = (JOB_NO + 1) * chunkSize; System.out.println("My Share is " + chunkSize); System.out.println(); // Load OAuh User TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time OAuth Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); System.out.println(); IDs ids; System.out.println("Going to get followers ids."); // to write output in a file System.out.flush(); if (JOB_NO + 1 == TOTAL_JOBS) { chunkSizeToGet = TotalWorkLoad; } List<String> myFilesShare = allFiles.subList(offSet, chunkSizeToGet); for (String myFile : myFilesShare) { System.out.println("Going to parse file: " + myFile); try (BufferedReader br = new BufferedReader(new FileReader(inputPath + "/" + myFile))) { String line; OUTERMOST: while ((line = br.readLine()) != null) { // process the line. System.out.println("Going to get followers ids of Screen-name / user_id: " + line); System.out.println(); String targetedUser = line.trim(); // tmp long cursor = -1; int idsLoopCounter = 0; int totalIDs = 0; PrintWriter writer = new PrintWriter(outputPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFollowersIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, or // if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Followers Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println( "New Loaded OAuth User " + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println("New Loaded OAuth User Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); } // while get records from single file } // read my single file catch (IOException e) { System.err.println("Failed to read lines from " + myFile); } // to write output in a file System.out.flush(); } // all my files share } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get followers' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); // Close System.out for this thread which will // flush and close this thread. System.out.close(); }
From source file:cc.twittertools.search.api.RunQueriesBaselineThrift.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("string").hasArg().withDescription("host").create(HOST_OPTION)); options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(/*from ww w .j a v a2 s . c o m*/ OptionBuilder.withArgName("string").hasArg().withDescription("group id").create(GROUP_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("access token").create(TOKEN_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(HOST_OPTION) || !cmdline.hasOption(PORT_OPTION) || !cmdline.hasOption(QUERIES_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueriesThrift.class.getName(), options); System.exit(-1); } String queryFile = cmdline.getOptionValue(QUERIES_OPTION); if (!new File(queryFile).exists()) { System.err.println("Error: " + queryFile + " doesn't exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; TrecTopicSet topicsFile = TrecTopicSet.fromFile(new File(queryFile)); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String group = cmdline.hasOption(GROUP_OPTION) ? cmdline.getOptionValue(GROUP_OPTION) : null; String token = cmdline.hasOption(TOKEN_OPTION) ? cmdline.getOptionValue(TOKEN_OPTION) : null; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); TrecSearchThriftClient client = new TrecSearchThriftClient(cmdline.getOptionValue(HOST_OPTION), Integer.parseInt(cmdline.getOptionValue(PORT_OPTION)), group, token); for (cc.twittertools.search.TrecTopic query : topicsFile) { List<TResult> results = client.search(query.getQuery(), query.getQueryTweetTime(), numResults); SortedSet<TResultComparable> sortedResults = new TreeSet<TResultComparable>(); for (TResult result : results) { // Throw away retweets. if (result.getRetweeted_status_id() == 0) { sortedResults.add(new TResultComparable(result)); } } int i = 1; int dupliCount = 0; double rsvPrev = 0; for (TResultComparable sortedResult : sortedResults) { TResult result = sortedResult.getTResult(); double rsvCurr = result.rsv; if (Math.abs(rsvCurr - rsvPrev) > 0.0000001) { dupliCount = 0; } else { dupliCount++; rsvCurr = rsvCurr - 0.000001 / numResults * dupliCount; } out.println(String.format("%s Q0 %d %d %." + (int) (6 + Math.ceil(Math.log10(numResults))) + "f %s", query.getId(), result.id, i, rsvCurr, runtag)); if (verbose) { out.println("# " + result.toString().replaceAll("[\\n\\r]+", " ")); } i++; rsvPrev = result.rsv; } } out.close(); }
From source file:de.tudarmstadt.ukp.csniper.resbuild.EvaluationItemFixer.java
public static void main(String[] args) { connect(HOST, DATABASE, USER, PASSWORD); Map<Integer, String> items = new HashMap<Integer, String>(); Map<Integer, String> failed = new HashMap<Integer, String>(); // fetch coveredTexts of dubious items and clean it PreparedStatement select = null; try {//from ww w . j av a 2 s . c o m StringBuilder selectQuery = new StringBuilder(); selectQuery.append("SELECT * FROM EvaluationItem "); selectQuery.append("WHERE LOCATE(coveredText, ' ') > 0 "); selectQuery.append("OR LOCATE('" + LRB + "', coveredText) > 0 "); selectQuery.append("OR LOCATE('" + RRB + "', coveredText) > 0 "); selectQuery.append("OR LEFT(coveredText, 1) = ' ' "); selectQuery.append("OR RIGHT(coveredText, 1) = ' ' "); select = connection.prepareStatement(selectQuery.toString()); log.info("Running query [" + selectQuery.toString() + "]."); ResultSet rs = select.executeQuery(); while (rs.next()) { int id = rs.getInt("id"); String coveredText = rs.getString("coveredText"); try { // special handling of double whitespace: in this case, re-fetch the text if (coveredText.contains(" ")) { coveredText = retrieveCoveredText(rs.getString("collectionId"), rs.getString("documentId"), rs.getInt("beginOffset"), rs.getInt("endOffset")); } // replace bracket placeholders and trim the text coveredText = StringUtils.replace(coveredText, LRB, "("); coveredText = StringUtils.replace(coveredText, RRB, ")"); coveredText = coveredText.trim(); items.put(id, coveredText); } catch (IllegalArgumentException e) { failed.put(id, e.getMessage()); } } } catch (SQLException e) { log.error("Exception while selecting: " + e.getMessage()); } finally { closeQuietly(select); } // write logs BufferedWriter bwf = null; BufferedWriter bws = null; try { bwf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(LOG_FAILED)), "UTF-8")); for (Entry<Integer, String> e : failed.entrySet()) { bwf.write(e.getKey() + " - " + e.getValue() + "\n"); } bws = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(new File(LOG_SUCCESSFUL)), "UTF-8")); for (Entry<Integer, String> e : items.entrySet()) { bws.write(e.getKey() + " - " + e.getValue() + "\n"); } } catch (IOException e) { log.error("Got an IOException while writing the log files."); } finally { IOUtils.closeQuietly(bwf); IOUtils.closeQuietly(bws); } log.info("Texts for [" + items.size() + "] items need to be cleaned up."); // update the dubious items with the cleaned coveredText PreparedStatement update = null; try { String updateQuery = "UPDATE EvaluationItem SET coveredText = ? WHERE id = ?"; update = connection.prepareStatement(updateQuery); int i = 0; for (Entry<Integer, String> e : items.entrySet()) { int id = e.getKey(); String coveredText = e.getValue(); // update item in database update.setString(1, coveredText); update.setInt(2, id); update.executeUpdate(); log.debug("Updating " + id + " with [" + coveredText + "]"); // show percentage of updated items i++; int part = (int) Math.ceil((double) items.size() / 100); if (i % part == 0) { log.info(i / part + "% finished (" + i + "/" + items.size() + ")."); } } } catch (SQLException e) { log.error("Exception while updating: " + e.getMessage()); } finally { closeQuietly(update); } closeQuietly(connection); }
From source file:com.bah.applefox.main.Ingest.java
public static void main(String[] args) throws Exception { if (args.length == 1 && args[0].equals("--help")) { System.out.println("Not enough arguments"); System.out.println("Arguments should be in the format <properties file> <command>"); System.out.println("Valid commands:"); System.out.println("\tpr: Calculates Page Rank"); System.out.println("\timageload: Loads Images from URLs"); System.out.println("\tload: Loads Full Text Data"); System.out.println("\tingest: Ingests URLs from given seed"); System.out.println("\tftsample: Creates a Full Text Index Sample HashMap"); System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap"); }/*from ww w. j a va2 s .c o m*/ if (args.length > 2) { System.out.println("2 Arguments expected, " + args.length + " given."); } if (args.length < 2) { System.out.println("Not enough arguments"); System.out.println("Arguments should be in the format <properties file> <command>"); System.out.println("Valid commands:"); System.out.println("\tpr: Calculates Page Rank"); System.out.println("\timageload: Loads Images from URLs"); System.out.println("\tload: Loads Full Text Data"); System.out.println("\tingest: Ingests URLs from given seed"); System.out.println("\tftsample: Creates a Full Text Index Sample HashMap"); System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap"); } injector = Guice.createInjector(new IngesterModule()); // The properties object to read from the configuration file Properties properties = new Properties(); try { // Load configuration file from the command line properties.load(new FileInputStream(args[0])); } catch (Exception e) { log.error("ABORT: File not found or could not read from file ->" + e.getMessage()); log.error("Enter the location of the configuration file"); System.exit(1); } // Initialize variables from configuration file // Accumulo Variables INSTANCE_NAME = properties.getProperty("INSTANCE_NAME"); ZK_SERVERS = properties.getProperty("ZK_SERVERS"); USERNAME = properties.getProperty("USERNAME"); PASSWORD = properties.getProperty("PASSWORD"); SPLIT_SIZE = properties.getProperty("SPLIT_SIZE"); NUM_ITERATIONS = Integer.parseInt(properties.getProperty("NUM_ITERATIONS")); NUM_NODES = Integer.parseInt(properties.getProperty("NUM_NODES")); // General Search Variables MAX_NGRAMS = Integer.parseInt(properties.getProperty("MAX_NGRAMS")); GENERAL_STOP = properties.getProperty("GENERAL_STOP"); // Full Text Variables FT_DATA_TABLE = properties.getProperty("FT_DATA_TABLE"); FT_SAMPLE = properties.getProperty("FT_SAMPLE"); FT_CHECKED_TABLE = properties.getProperty("FT_CHECKED_TABLE"); FT_DIVS_FILE = properties.getProperty("FT_DIVS_FILE"); FT_SPLIT_SIZE = properties.getProperty("FT_SPLIT_SIZE"); // Web Crawler Variables URL_TABLE = properties.getProperty("URL_TABLE"); SEED = properties.getProperty("SEED"); USER_AGENT = properties.getProperty("USER_AGENT"); URL_SPLIT_SIZE = properties.getProperty("URL_SPLIT_SIZE"); // Page Rank Variables PR_TABLE_PREFIX = properties.getProperty("PR_TABLE_PREFIX"); PR_URL_MAP_TABLE_PREFIX = properties.getProperty("PR_URL_MAP_TABLE_PREFIX"); PR_OUT_LINKS_COUNT_TABLE = properties.getProperty("PR_OUT_LINKS_COUNT_TABLE"); PR_FILE = properties.getProperty("PR_FILE"); PR_DAMPENING_FACTOR = Double.parseDouble(properties.getProperty("PR_DAMPENING_FACTOR")); PR_ITERATIONS = Integer.parseInt(properties.getProperty("PR_ITERATIONS")); PR_SPLIT_SIZE = properties.getProperty("PR_SPLIT_SIZE"); // Image Variables IMG_HASH_TABLE = properties.getProperty("IMG_HASH_TABLE"); IMG_CHECKED_TABLE = properties.getProperty("IMG_CHECKED_TABLE"); IMG_TAG_TABLE = properties.getProperty("IMG_TAG_TABLE"); IMG_HASH_SAMPLE_TABLE = properties.getProperty("IMG_HASH_SAMPLE_TABLE"); IMG_TAG_SAMPLE_TABLE = properties.getProperty("IMG_TAG_SAMPLE_TABLE"); IMG_SPLIT_SIZE = properties.getProperty("IMG_SPLIT_SIZE"); // Future Use: // Work Directory in HDFS WORK_DIR = properties.getProperty("WORK_DIR"); // Initialize variable from command line RUN = args[1].toLowerCase(); // Set the instance information for AccumuloUtils AccumuloUtils.setInstanceName(INSTANCE_NAME); AccumuloUtils.setInstancePassword(PASSWORD); AccumuloUtils.setUser(USERNAME); AccumuloUtils.setZooserver(ZK_SERVERS); AccumuloUtils.setSplitSize(SPLIT_SIZE); String[] temp = new String[25]; // Accumulo Variables temp[0] = INSTANCE_NAME; temp[1] = ZK_SERVERS; temp[2] = USERNAME; temp[3] = PASSWORD; // Number of Map Tasks temp[4] = Integer.toString((int) Math.ceil(1.75 * NUM_NODES * 2)); // Web Crawler Variables temp[5] = URL_TABLE; temp[6] = USER_AGENT; // Future Use temp[7] = WORK_DIR; // General Search temp[8] = GENERAL_STOP; temp[9] = Integer.toString(MAX_NGRAMS); // Full Text Variables temp[10] = FT_DATA_TABLE; temp[11] = FT_CHECKED_TABLE; // Page Rank Variables temp[12] = PR_URL_MAP_TABLE_PREFIX; temp[13] = PR_TABLE_PREFIX; temp[14] = Double.toString(PR_DAMPENING_FACTOR); temp[15] = PR_OUT_LINKS_COUNT_TABLE; temp[16] = PR_FILE; // Image Variables temp[17] = IMG_HASH_TABLE; temp[18] = IMG_CHECKED_TABLE; temp[19] = IMG_TAG_TABLE; temp[20] = FT_DIVS_FILE; // Table Split Sizes temp[21] = FT_SPLIT_SIZE; temp[22] = IMG_SPLIT_SIZE; temp[23] = URL_SPLIT_SIZE; temp[24] = PR_SPLIT_SIZE; if (RUN.equals("pr")) { // Run PR_ITERATIONS number of iterations for page ranking PageRank.createPageRank(temp, PR_ITERATIONS, URL_SPLIT_SIZE); } else if (RUN.equals("imageload")) { // Load image index AccumuloUtils.setSplitSize(URL_SPLIT_SIZE); ToolRunner.run(new ImageLoader(), temp); } else if (RUN.equals("ingest")) { // Ingest System.out.println("Ingesting"); // Set table split size AccumuloUtils.setSplitSize(URL_SPLIT_SIZE); // Write the seed value to the table BatchWriter w; Value v = new Value(); v.set("0".getBytes()); Mutation m = new Mutation(SEED); m.put("0", "0", v); w = AccumuloUtils.connectBatchWrite(URL_TABLE); w.addMutation(m); for (int i = 0; i < NUM_ITERATIONS; i++) { // Run the ToolRunner for NUM_ITERATIONS iterations ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Ingester.class), temp); } } else if (RUN.equals("load")) { // Parse the URLs and add to the data table AccumuloUtils.setSplitSize(URL_SPLIT_SIZE); BatchWriter w = AccumuloUtils.connectBatchWrite(FT_CHECKED_TABLE); w.close(); AccumuloUtils.setSplitSize(FT_SPLIT_SIZE); w = AccumuloUtils.connectBatchWrite(FT_DATA_TABLE); w.close(); ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Loader.class), temp); } else if (RUN.equals("ftsample")) { // Create a sample table for full text index FTAccumuloSampler ftSampler = new FTAccumuloSampler(FT_SAMPLE, FT_DATA_TABLE, FT_CHECKED_TABLE); ftSampler.createSample(); } else if (RUN.equals("imagesample")) { // Create a sample table for images ImageAccumuloSampler imgHashSampler = new ImageAccumuloSampler(IMG_HASH_SAMPLE_TABLE, IMG_HASH_TABLE, IMG_CHECKED_TABLE); imgHashSampler.createSample(); ImageAccumuloSampler imgTagSampler = new ImageAccumuloSampler(IMG_TAG_SAMPLE_TABLE, IMG_TAG_TABLE, IMG_CHECKED_TABLE); imgTagSampler.createSample(); } else { System.out.println("Invalid argument " + RUN + "."); System.out.println("Valid Arguments:"); System.out.println("\tpr: Calculates Page Rank"); System.out.println("\timageload: Loads Images from URLs"); System.out.println("\tload: Loads Full Text Data"); System.out.println("\tingest: Ingests URLs from given seed"); System.out.println("\tftsample: Creates a Full Text Index Sample HashMap"); System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap"); } }
From source file:Main.java
public static int randomOrient() { return (int) Math.ceil(Math.random() * 360); }