List of usage examples for java.io PrintWriter close
public void close()
From source file:ch.epfl.lsir.xin.test.MostPopularTest.java
/** * @param args//w w w. j a v a2 s.co m */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub PrintWriter logger = new PrintWriter(".//results//MostPopular"); PropertiesConfiguration config = new PropertiesConfiguration(); config.setFile(new File(".//conf//MostPopular.properties")); try { config.load(); } catch (ConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " Read rating data..."); DataLoaderFile loader = new DataLoaderFile(".//data//MoveLens100k.txt"); loader.readSimple(); DataSetNumeric dataset = loader.getDataset(); System.out.println("Number of ratings: " + dataset.getRatings().size() + " Number of users: " + dataset.getUserIDs().size() + " Number of items: " + dataset.getItemIDs().size()); logger.println("Number of ratings: " + dataset.getRatings().size() + ", Number of users: " + dataset.getUserIDs().size() + ", Number of items: " + dataset.getItemIDs().size()); logger.flush(); TrainTestSplitter splitter = new TrainTestSplitter(dataset); splitter.splitFraction(config.getDouble("TRAIN_FRACTION")); ArrayList<NumericRating> trainRatings = splitter.getTrain(); ArrayList<NumericRating> testRatings = splitter.getTest(); HashMap<String, Integer> userIDIndexMapping = new HashMap<String, Integer>(); HashMap<String, Integer> itemIDIndexMapping = new HashMap<String, Integer>(); //create rating matrix for (int i = 0; i < dataset.getUserIDs().size(); i++) { userIDIndexMapping.put(dataset.getUserIDs().get(i), i); } for (int i = 0; i < dataset.getItemIDs().size(); i++) { itemIDIndexMapping.put(dataset.getItemIDs().get(i), i); } RatingMatrix trainRatingMatrix = new RatingMatrix(dataset.getUserIDs().size(), dataset.getItemIDs().size()); for (int i = 0; i < trainRatings.size(); i++) { trainRatingMatrix.set(userIDIndexMapping.get(trainRatings.get(i).getUserID()), itemIDIndexMapping.get(trainRatings.get(i).getItemID()), trainRatings.get(i).getValue()); } RatingMatrix testRatingMatrix = new RatingMatrix(dataset.getUserIDs().size(), dataset.getItemIDs().size()); for (int i = 0; i < testRatings.size(); i++) { //only consider 5-star rating in the test set // if( testRatings.get(i).getValue() < 5 ) // continue; testRatingMatrix.set(userIDIndexMapping.get(testRatings.get(i).getUserID()), itemIDIndexMapping.get(testRatings.get(i).getItemID()), testRatings.get(i).getValue()); } System.out.println("Training: " + trainRatingMatrix.getTotalRatingNumber() + " vs Test: " + testRatingMatrix.getTotalRatingNumber()); logger.println("Initialize a most popular based recommendation model."); MostPopular algo = new MostPopular(trainRatingMatrix); algo.setLogger(logger); algo.build(); algo.saveModel(".//localModels//" + config.getString("NAME")); logger.println("Save the model."); logger.flush(); HashMap<Integer, ArrayList<ResultUnit>> results = new HashMap<Integer, ArrayList<ResultUnit>>(); for (int i = 0; i < testRatingMatrix.getRow(); i++) { ArrayList<ResultUnit> rec = algo.getRecommendationList(i); if (rec == null) continue; int total = testRatingMatrix.getUserRatingNumber(i); if (total == 0)//this user is ignored continue; results.put(i, rec); } RankResultGenerator generator = new RankResultGenerator(results, algo.getTopN(), testRatingMatrix, trainRatingMatrix); System.out.println("Precision@N: " + generator.getPrecisionN()); System.out.println("Recall@N: " + generator.getRecallN()); System.out.println("MAP@N: " + generator.getMAPN()); System.out.println("MRR@N: " + generator.getMRRN()); System.out.println("NDCG@N: " + generator.getNDCGN()); System.out.println("AUC@N: " + generator.getAUC()); logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + "\n" + "Precision@N: " + generator.getPrecisionN() + "\n" + "Recall@N: " + generator.getRecallN() + "\n" + "MAP@N: " + generator.getMAPN() + "\n" + "MRR@N: " + generator.getMRRN() + "\n" + "NDCG@N: " + generator.getNDCGN() + "\n" + "AUC@N: " + generator.getAUC()); logger.flush(); logger.close(); }
From source file:friendsandfollowers.DBFollowersIDs.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check arguments that passed in if ((args == null) || (args.length == 0)) { System.err.println("2 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'OUTPUT: /output/path/'"); System.err.println("Second: (int) Number of ids to fetch. " + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Third (optional): 'screen_name / user_id_str'"); System.err.println("If 3rd argument not provided then provide" + " Twitter users through database."); System.exit(-1);//from w w w . jav a 2s . c o m } MysqlDB DB = new MysqlDB(); AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/followers/ids"; String OutputDirPath = null; try { OutputDirPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument" + args[0] + " must be an String."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; try { IDS_TO_FETCH_INT = Integer.parseInt(args[1]); } catch (NumberFormatException e) { System.err.println("Argument" + args[1] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } String targetedUser = ""; if (args.length == 3) { try { targetedUser = StringEscapeUtils.escapeJava(args[2]); } catch (Exception e) { System.err.println("Argument" + args[2] + " must be an String."); System.exit(-1); } } try { TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); IDs ids; System.out.println("Listing followers ids."); // if targetedUser not provided by argument, then look into database. if (StringUtils.isEmpty(targetedUser)) { String selectQuery = "SELECT * FROM `followers_parent` WHERE " + "`targeteduser` != '' AND " + "`nextcursor` != '0' AND " + "`nextcursor` != '2'"; ResultSet results = DB.selectQ(selectQuery); int numRows = DB.numRows(results); if (numRows < 1) { System.err.println("No User in database to get followersIDS"); System.exit(-1); } OUTERMOST: while (results.next()) { int followers_parent_id = results.getInt("id"); targetedUser = results.getString("targeteduser"); long cursor = results.getLong("nextcursor"); System.out.println("Targeted User: " + targetedUser); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter(OutputDirPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFollowersIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, // or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Followers Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println( "New User Loaded OAuth" + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } // update cursor in "followers_parent" String fieldValues = "`nextcursor` = 2"; String where = "id = " + followers_parent_id; DB.Update("`followers_parent`", fieldValues, where); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth " + "Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); // update cursor in "followers_parent" String fieldValues = "`nextcursor` = " + cursor; String where = "id = " + followers_parent_id; DB.Update("`followers_parent`", fieldValues, where); } // loop through every result found in db } else { // Second Argument Set, so we are here. System.out.println("screen_name / user_id_str passed by argument"); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter( OutputDirPath + "/" + targetedUser + "_ids_" + helpers.getUnixTimeStamp(), "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); long cursor = -1; do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFollowersIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Followers Get Exception: " + te.getMessage()); } System.exit(-1); } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reach then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); } } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get followers' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); }
From source file:friendsandfollowers.DBFriendsIDs.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check arguments that passed in if ((args == null) || (args.length == 0)) { System.err.println("2 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'OUTPUT: /output/path/'"); System.err.println("Second: (int) Number of ids to fetch. " + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Third (optional): 'screen_name / user_id_str'"); System.err.println("If 3rd argument not provided then provide" + " Twitter users through database."); System.exit(-1);// w ww . ja va 2 s . c o m } MysqlDB DB = new MysqlDB(); AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/friends/ids"; String OutputDirPath = null; try { OutputDirPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument" + args[0] + " must be an String."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; try { IDS_TO_FETCH_INT = Integer.parseInt(args[1]); } catch (NumberFormatException e) { System.err.println("Argument" + args[1] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } String targetedUser = ""; if (args.length == 3) { try { targetedUser = StringEscapeUtils.escapeJava(args[2]); } catch (Exception e) { System.err.println("Argument" + args[2] + " must be an String."); System.exit(-1); } } try { TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); IDs ids; System.out.println("Listing friends ids."); // if targetedUser not provided by argument, then look into database. if (StringUtils.isEmpty(targetedUser)) { String selectQuery = "SELECT * FROM `followings_parent` WHERE " + "`targeteduser` != '' AND " + "`nextcursor` != '0' AND " + "`nextcursor` != '2'"; ResultSet results = DB.selectQ(selectQuery); int numRows = DB.numRows(results); if (numRows < 1) { System.err.println("No User in database to get friendsIDS"); System.exit(-1); } OUTERMOST: while (results.next()) { int following_parent_id = results.getInt("id"); targetedUser = results.getString("targeteduser"); long cursor = results.getLong("nextcursor"); System.out.println("Targeted User: " + targetedUser); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter(OutputDirPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFriendsIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, // or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Friends Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println( "New User Loaded OAuth" + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } // update cursor in "followings_parent" String fieldValues = "`nextcursor` = 2"; String where = "id = " + following_parent_id; DB.Update("`followings_parent`", fieldValues, where); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user. RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth " + "Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); // update cursor in "followings_parent" String fieldValues = "`nextcursor` = " + cursor; String where = "id = " + following_parent_id; DB.Update("`followings_parent`", fieldValues, where); } // loop through every result found in db } else { // Second Argument Sets, so we are here. System.out.println("screen_name / user_id_str " + "passed by argument"); int idsLoopCounter = 0; int totalIDs = 0; // put idsJSON in a file PrintWriter writer = new PrintWriter( OutputDirPath + "/" + targetedUser + "_ids_" + helpers.getUnixTimeStamp(), "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); long cursor = -1; do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFriendsIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, // or if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Friends Get Exception: " + te.getMessage()); } System.exit(-1); } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reach then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint); twitter = tf.getInstance(); System.out.println("New User Loaded OAuth Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); System.out.println(); } } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get friends' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); }
From source file:friendsandfollowers.FilesThreaderFriendsIDsParser.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check how many arguments were passed in if ((args == null) || (args.length < 5)) { System.err.println("5 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'INPUT: DB or /input/path/'"); System.err.println("Second: String 'OUTPUT: /output/path/'"); System.err.println("Third: (int) Total Number Of Jobs"); System.err.println("Fourth: (int) This Job Number"); System.err.println("Fifth: (int) Number of seconds to pause"); System.err.println("Sixth: (int) Number of ids to fetch" + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Example: fileToRun /input/path/ " + "/output/path/ 10 1 3 75000"); System.exit(-1);//from ww w . j a va2 s . com } // TODO documentation for command line AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/friends/ids"; String inputPath = null; try { inputPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument " + args[0] + " must be an String."); System.exit(-1); } String outputPath = null; try { outputPath = StringEscapeUtils.escapeJava(args[1]); } catch (Exception e) { System.err.println("Argument " + args[1] + " must be an String."); System.exit(-1); } int TOTAL_JOBS = 0; try { TOTAL_JOBS = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println("Argument " + args[2] + " must be an integer."); System.exit(1); } int JOB_NO = 0; try { JOB_NO = Integer.parseInt(args[3]); } catch (NumberFormatException e) { System.err.println("Argument " + args[3] + " must be an integer."); System.exit(1); } int secondsToPause = 0; try { secondsToPause = Integer.parseInt(args[4]); } catch (NumberFormatException e) { System.err.println("Argument" + args[4] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; if (args.length == 6) { try { IDS_TO_FETCH_INT = Integer.parseInt(args[5]); } catch (NumberFormatException e) { System.err.println("Argument" + args[5] + " must be an integer."); System.exit(-1); } } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } secondsToPause = (TOTAL_JOBS * secondsToPause) - (JOB_NO * secondsToPause); System.out.println("secondsToPause: " + secondsToPause); helpers.pause(secondsToPause); try { int TotalWorkLoad = 0; ArrayList<String> allFiles = null; try { final File folder = new File(inputPath); allFiles = helpers.listFilesForSingleFolder(folder); TotalWorkLoad = allFiles.size(); } catch (Exception e) { System.err.println("Input folder is not exists: " + e.getMessage()); System.exit(-1); } System.out.println("Total Workload is: " + TotalWorkLoad); if (TotalWorkLoad < 1) { System.err.println("No screen names file exists in: " + inputPath); System.exit(-1); } if (TOTAL_JOBS > TotalWorkLoad) { System.err.println("Number of jobs are more than total work" + " load. Please reduce 'Number of jobs' to launch."); System.exit(-1); } float TotalWorkLoadf = TotalWorkLoad; float TOTAL_JOBSf = TOTAL_JOBS; float res = (TotalWorkLoadf / TOTAL_JOBSf); int chunkSize = (int) Math.ceil(res); int offSet = JOB_NO * chunkSize; int chunkSizeToGet = (JOB_NO + 1) * chunkSize; System.out.println("My Share is " + chunkSize); System.out.println(); // Load OAuh User TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time OAuth Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); System.out.println(); IDs ids; System.out.println("Going to get friends ids."); // to write output in a file System.out.flush(); if (JOB_NO + 1 == TOTAL_JOBS) { chunkSizeToGet = TotalWorkLoad; } List<String> myFilesShare = allFiles.subList(offSet, chunkSizeToGet); for (String myFile : myFilesShare) { System.out.println("Going to parse file: " + myFile); try (BufferedReader br = new BufferedReader(new FileReader(inputPath + "/" + myFile))) { String line; OUTERMOST: while ((line = br.readLine()) != null) { // process the line. System.out.println("Going to get friends ids of Screen-name / user_id: " + line); System.out.println(); String targetedUser = line.trim(); // tmp long cursor = -1; int idsLoopCounter = 0; int totalIDs = 0; PrintWriter writer = new PrintWriter(outputPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFriendsIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, or // if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Friends Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println( "New Loaded OAuth User " + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println("New Loaded OAuth User Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); } // while get records from single file } // read my single file catch (IOException e) { System.err.println("Failed to read lines from " + myFile); } // to write output in a file System.out.flush(); } // all my files share } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get friends' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); // Close System.out for this thread which will // flush and close this thread. System.out.close(); }
From source file:friendsandfollowers.FilesThreaderFollowersIDsParser.java
public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException, FileNotFoundException, UnsupportedEncodingException { // Check how many arguments were passed in if ((args == null) || (args.length < 5)) { System.err.println("5 Parameters are required plus one optional " + "parameter to launch a Job."); System.err.println("First: String 'INPUT: DB or /input/path/'"); System.err.println("Second: String 'OUTPUT: /output/path/'"); System.err.println("Third: (int) Total Number Of Jobs"); System.err.println("Fourth: (int) This Job Number"); System.err.println("Fifth: (int) Number of seconds to pause"); System.err.println("Sixth: (int) Number of ids to fetch" + "Provide number which increment by 5000 " + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids."); System.err.println("Example: fileToRun /input/path/ " + "/output/path/ 10 1 3 75000"); System.exit(-1);// w w w . java 2 s . c o m } // TODO documentation for command line AppOAuth AppOAuths = new AppOAuth(); Misc helpers = new Misc(); String endpoint = "/followers/ids"; String inputPath = null; try { inputPath = StringEscapeUtils.escapeJava(args[0]); } catch (Exception e) { System.err.println("Argument " + args[0] + " must be an String."); System.exit(-1); } String outputPath = null; try { outputPath = StringEscapeUtils.escapeJava(args[1]); } catch (Exception e) { System.err.println("Argument " + args[1] + " must be an String."); System.exit(-1); } int TOTAL_JOBS = 0; try { TOTAL_JOBS = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println("Argument " + args[2] + " must be an integer."); System.exit(1); } int JOB_NO = 0; try { JOB_NO = Integer.parseInt(args[3]); } catch (NumberFormatException e) { System.err.println("Argument " + args[3] + " must be an integer."); System.exit(1); } int secondsToPause = 0; try { secondsToPause = Integer.parseInt(args[4]); } catch (NumberFormatException e) { System.err.println("Argument" + args[4] + " must be an integer."); System.exit(-1); } int IDS_TO_FETCH_INT = -1; if (args.length == 6) { try { IDS_TO_FETCH_INT = Integer.parseInt(args[5]); } catch (NumberFormatException e) { System.err.println("Argument" + args[5] + " must be an integer."); System.exit(-1); } } int IDS_TO_FETCH = 0; if (IDS_TO_FETCH_INT > 5000) { float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000; IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F); } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) { IDS_TO_FETCH = 1; } secondsToPause = (TOTAL_JOBS * secondsToPause) - (JOB_NO * secondsToPause); System.out.println("secondsToPause: " + secondsToPause); helpers.pause(secondsToPause); try { int TotalWorkLoad = 0; ArrayList<String> allFiles = null; try { final File folder = new File(inputPath); allFiles = helpers.listFilesForSingleFolder(folder); TotalWorkLoad = allFiles.size(); } catch (Exception e) { System.err.println("Input folder is not exists: " + e.getMessage()); System.exit(-1); } System.out.println("Total Workload is: " + TotalWorkLoad); if (TotalWorkLoad < 1) { System.err.println("No screen names file exists in: " + inputPath); System.exit(-1); } if (TOTAL_JOBS > TotalWorkLoad) { System.err.println("Number of jobs are more than total work" + " load. Please reduce 'Number of jobs' to launch."); System.exit(-1); } float TotalWorkLoadf = TotalWorkLoad; float TOTAL_JOBSf = TOTAL_JOBS; float res = (TotalWorkLoadf / TOTAL_JOBSf); int chunkSize = (int) Math.ceil(res); int offSet = JOB_NO * chunkSize; int chunkSizeToGet = (JOB_NO + 1) * chunkSize; System.out.println("My Share is " + chunkSize); System.out.println(); // Load OAuh User TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); Twitter twitter = tf.getInstance(); int RemainingCalls = AppOAuths.RemainingCalls; int RemainingCallsCounter = 0; System.out.println("First Time OAuth Remianing Calls: " + RemainingCalls); String Screen_name = AppOAuths.screen_name; System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name); System.out.println(); IDs ids; System.out.println("Going to get followers ids."); // to write output in a file System.out.flush(); if (JOB_NO + 1 == TOTAL_JOBS) { chunkSizeToGet = TotalWorkLoad; } List<String> myFilesShare = allFiles.subList(offSet, chunkSizeToGet); for (String myFile : myFilesShare) { System.out.println("Going to parse file: " + myFile); try (BufferedReader br = new BufferedReader(new FileReader(inputPath + "/" + myFile))) { String line; OUTERMOST: while ((line = br.readLine()) != null) { // process the line. System.out.println("Going to get followers ids of Screen-name / user_id: " + line); System.out.println(); String targetedUser = line.trim(); // tmp long cursor = -1; int idsLoopCounter = 0; int totalIDs = 0; PrintWriter writer = new PrintWriter(outputPath + "/" + targetedUser, "UTF-8"); // call different functions for screen_name and id_str Boolean chckedNumaric = helpers.isNumeric(targetedUser); do { ids = null; try { if (chckedNumaric) { long LongValueTargetedUser = Long.valueOf(targetedUser).longValue(); ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor); } else { ids = twitter.getFollowersIDs(targetedUser, cursor); } } catch (TwitterException te) { // do not throw if user has protected tweets, or // if they deleted their account if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED || te.getStatusCode() == HttpResponseCode.NOT_FOUND) { System.out.println(targetedUser + " is protected or account is deleted"); } else { System.out.println("Followers Get Exception: " + te.getMessage()); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println( "New Loaded OAuth User " + " Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); // If error then switch to next user continue OUTERMOST; } if (ids.getIDs().length > 0) { idsLoopCounter++; totalIDs += ids.getIDs().length; System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length); JSONObject responseDetailsJson = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (long id : ids.getIDs()) { jsonArray.put(id); } Object idsJSON = responseDetailsJson.put("ids", jsonArray); writer.println(idsJSON); } // If rate limit reached then switch Auth user RemainingCallsCounter++; if (RemainingCallsCounter >= RemainingCalls) { // load auth user tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO); twitter = tf.getInstance(); System.out.println("New Loaded OAuth User Screen_name: " + AppOAuths.screen_name); RemainingCalls = AppOAuths.RemainingCalls; RemainingCallsCounter = 0; System.out.println("New OAuth Remianing Calls: " + RemainingCalls); } if (IDS_TO_FETCH_INT != -1) { if (idsLoopCounter == IDS_TO_FETCH) { break; } } } while ((cursor = ids.getNextCursor()) != 0); writer.close(); System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs); // Remove file if ids not found if (totalIDs == 0) { System.out.println("No ids fetched so removing " + "file " + targetedUser); File fileToDelete = new File(outputPath + "/" + targetedUser); fileToDelete.delete(); } System.out.println(); } // while get records from single file } // read my single file catch (IOException e) { System.err.println("Failed to read lines from " + myFile); } // to write output in a file System.out.flush(); } // all my files share } catch (TwitterException te) { // te.printStackTrace(); System.err.println("Failed to get followers' ids: " + te.getMessage()); System.exit(-1); } System.out.println("!!!! DONE !!!!"); // Close System.out for this thread which will // flush and close this thread. System.out.close(); }
From source file:edu.harvard.med.iccbl.screensaver.soaputils.PubchemChembankQueryUtility.java
@SuppressWarnings("static-access") public static void main(String[] args) throws IOException, InterruptedException { final PubchemChembankQueryUtility app = new PubchemChembankQueryUtility(args); String[] option = LIBRARY_NAME; app.addCommandLineOption(//from w w w . j a v a 2s . c om OptionBuilder.hasArg().withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); option = QUERY_ALL_LIBRARIES; app.addCommandLineOption( OptionBuilder.withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); option = OUTPUT_FILE; app.addCommandLineOption( OptionBuilder.hasArg().withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); option = TRY_LIMIT; app.addCommandLineOption( OptionBuilder.hasArg().withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); option = INTERVAL_BETWEEN_TRIES; app.addCommandLineOption( OptionBuilder.hasArg().withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); option = QUERY_PUBCHEM; app.addCommandLineOption( OptionBuilder.withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); option = QUERY_CHEMBANK; app.addCommandLineOption( OptionBuilder.withArgName(option[ARG_INDEX]).withDescription(option[DESCRIPTION_INDEX]) .withLongOpt(option[LONG_OPTION_INDEX]).create(option[SHORT_OPTION_INDEX])); try { if (!app.processOptions(/* acceptDatabaseOptions= */true, /* showHelpOnError= */true)) { return; } final boolean queryPubchem = app.isCommandLineFlagSet(QUERY_PUBCHEM[SHORT_OPTION_INDEX]); final boolean queryChembank = app.isCommandLineFlagSet(QUERY_CHEMBANK[SHORT_OPTION_INDEX]); if (!(queryPubchem || queryChembank)) { log.error("Must specify either " + QUERY_PUBCHEM[LONG_OPTION_INDEX] + " or " + QUERY_CHEMBANK[LONG_OPTION_INDEX]); app.showHelp(); return; } if (!app.isCommandLineFlagSet(LIBRARY_NAME[SHORT_OPTION_INDEX]) && !app.isCommandLineFlagSet(QUERY_ALL_LIBRARIES[SHORT_OPTION_INDEX])) { log.error("Must specify either " + LIBRARY_NAME[LONG_OPTION_INDEX] + " or " + QUERY_ALL_LIBRARIES[LONG_OPTION_INDEX]); app.showHelp(); return; } if (app.isCommandLineFlagSet(LIBRARY_NAME[SHORT_OPTION_INDEX]) && app.isCommandLineFlagSet(QUERY_ALL_LIBRARIES[SHORT_OPTION_INDEX])) { log.error("Must specify either " + LIBRARY_NAME[LONG_OPTION_INDEX] + " or " + QUERY_ALL_LIBRARIES[LONG_OPTION_INDEX]); app.showHelp(); return; } if (app.isCommandLineFlagSet(QUERY_ALL_LIBRARIES[SHORT_OPTION_INDEX]) && app.isCommandLineFlagSet(OUTPUT_FILE[SHORT_OPTION_INDEX])) { log.error("option \"" + OUTPUT_FILE[LONG_OPTION_INDEX] + "\" not allowed with \"" + QUERY_ALL_LIBRARIES[LONG_OPTION_INDEX] + "\" option."); app.showHelp(); return; } // if(app.isCommandLineFlagSet(LIBRARY_NAME[SHORT_OPTION_INDEX]) // && !app.isCommandLineFlagSet(OUTPUT_FILE[SHORT_OPTION_INDEX])) { // log.error("option \"" + OUTPUT_FILE[LONG_OPTION_INDEX] + "\" must be specified with \"" + LIBRARY_NAME[LONG_OPTION_INDEX] + "\" option."); // app.showHelp(); // return; // } final GenericEntityDAO dao = (GenericEntityDAO) app.getSpringBean("genericEntityDao"); dao.doInTransaction(new DAOTransaction() { public void runTransaction() { PrintWriter writer = null; PrintWriter errorWriter = null; try { int intervalMs = PugSoapUtil.INTERVAL_BETWEEN_TRIES_MS; if (app.isCommandLineFlagSet(INTERVAL_BETWEEN_TRIES[SHORT_OPTION_INDEX])) { intervalMs = app.getCommandLineOptionValue(INTERVAL_BETWEEN_TRIES[SHORT_OPTION_INDEX], Integer.class); } int numberOfTries = PugSoapUtil.TRY_LIMIT; if (app.isCommandLineFlagSet(TRY_LIMIT[SHORT_OPTION_INDEX])) { numberOfTries = app.getCommandLineOptionValue(TRY_LIMIT[SHORT_OPTION_INDEX], Integer.class); } List<Library> libraries = Lists.newArrayList(); if (app.isCommandLineFlagSet(LIBRARY_NAME[SHORT_OPTION_INDEX])) { String temp = app.getCommandLineOptionValue(LIBRARY_NAME[SHORT_OPTION_INDEX]); for (String libraryName : temp.split(",")) { Library library = dao.findEntityByProperty(Library.class, "shortName", libraryName.trim()); if (library == null) { throw new IllegalArgumentException( "no library with short name: " + libraryName); } libraries.add(library); } // if there is only one library to query, then set these values from the command line option if (libraries.size() == 1) { String outputFilename = app .getCommandLineOptionValue(OUTPUT_FILE[SHORT_OPTION_INDEX]); writer = app.getOutputFile(outputFilename); errorWriter = app.getOutputFile(outputFilename + ".errors"); } } else if (app.isCommandLineFlagSet(QUERY_ALL_LIBRARIES[SHORT_OPTION_INDEX])) { libraries = dao.findEntitiesByProperty(Library.class, "screenType", ScreenType.SMALL_MOLECULE); for (Iterator<Library> iter = libraries.iterator(); iter.hasNext();) { Library library = iter.next(); if (library.getLibraryType() == LibraryType.ANNOTATION || library.getLibraryType() == LibraryType.NATURAL_PRODUCTS) { iter.remove(); } } } Collections.sort(libraries, new NullSafeComparator<Library>() { @Override protected int doCompare(Library o1, Library o2) { return o1.getShortName().compareTo(o2.getShortName()); } }); List<String> libraryNames = Lists.transform(libraries, new Function<Library, String>() { @Override public String apply(Library from) { return from.getShortName(); } }); log.info("libraries to process:\n" + libraryNames); int i = 0; for (Library library : libraries) { if (writer == null || i > 0) { writer = app.getOutputFile(library.getShortName()); } if (errorWriter == null || i > 0) { errorWriter = app.getOutputFile(library.getShortName() + ".errors"); } log.info("\nProcessing the library: " + library.getShortName() + "\nlong name: " + library.getLibraryName() + "\noutput file: " + library.getShortName() + ".csv"); app.query(library, queryPubchem, queryChembank, dao, intervalMs, numberOfTries, writer, errorWriter); i++; } } catch (Exception e) { throw new DAOTransactionRollbackException(e); } finally { if (writer != null) writer.close(); if (errorWriter != null) errorWriter.close(); } } }); System.exit(0); } catch (ParseException e) { log.error("error parsing command line options: " + e.getMessage()); } }
From source file:eu.fbk.utils.lsa.util.Anvur.java
public static void main(String[] args) throws Exception { String logConfig = System.getProperty("log-config"); if (logConfig == null) { logConfig = "log-config.txt"; }/*from w w w . j av a 2 s. c o m*/ PropertyConfigurator.configure(logConfig); /* if (args.length != 2) { log.println("Usage: java -mx512M eu.fbk.utils.lsa.util.Anvur in-file out-dir"); System.exit(1); } File l = new File(args[1]); if (!l.exists()) { l.mkdir(); } List<String[]> list = readText(new File(args[0])); String oldCategory = ""; for (int i=0;i<list.size();i++) { String[] s = list.get(i); if (!oldCategory.equals(s[0])) { File f = new File(args[1] + File.separator + s[0]); boolean b = f.mkdir(); logger.debug(f + " created " + b); } File g = new File(args[1] + File.separator + s[0] + File.separator + s[1] + ".txt"); logger.debug("writing " + g + "..."); PrintWriter pw = new PrintWriter(new FileWriter(g)); //pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2])); if (s.length == 5) { pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2] + " " + s[4].replace('_', ' '))); } else { pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2])); } pw.flush(); pw.close(); } // end for i */ if (args.length != 7) { System.out.println(args.length); System.out.println( "Usage: java -mx2G eu.fbk.utils.lsa.util.Anvur input threshold size dim idf in-file-csv fields\n\n"); System.exit(1); } // DecimalFormat dec = new DecimalFormat("#.00"); File Ut = new File(args[0] + "-Ut"); File Sk = new File(args[0] + "-S"); File r = new File(args[0] + "-row"); File c = new File(args[0] + "-col"); File df = new File(args[0] + "-df"); double threshold = Double.parseDouble(args[1]); int size = Integer.parseInt(args[2]); int dim = Integer.parseInt(args[3]); boolean rescaleIdf = Boolean.parseBoolean(args[4]); //"author_check"0, "authors"1, "title"2, "year"3, "pubtype"4, "publisher"5, "journal"6, "volume"7, "number"8, "pages"9, "abstract"10, "nauthors", "citedby" String[] labels = { "author_check", "authors", "title", "year", "pubtype", "publisher", "journal", "volume", "number", "pages", "abstract", "nauthors", "citedby" //author_id authors title year pubtype publisher journal volume number pages abstract nauthors citedby }; String name = buildName(labels, args[6]); File bwf = new File(args[5] + name + "-bow.txt"); PrintWriter bw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bwf), "UTF-8"))); File bdf = new File(args[5] + name + "-bow.csv"); PrintWriter bd = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bdf), "UTF-8"))); File lwf = new File(args[5] + name + "-ls.txt"); PrintWriter lw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lwf), "UTF-8"))); File ldf = new File(args[5] + name + "-ls.csv"); PrintWriter ld = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(ldf), "UTF-8"))); File blwf = new File(args[5] + name + "-bow+ls.txt"); PrintWriter blw = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(blwf), "UTF-8"))); File bldf = new File(args[5] + name + "-bow+ls.csv"); PrintWriter bld = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bldf), "UTF-8"))); File logf = new File(args[5] + name + ".log"); PrintWriter log = new PrintWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(logf), "UTF-8"))); //System.exit(0); LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf); LSSimilarity lss = new LSSimilarity(lsm, size); List<String[]> list = readText(new File(args[5])); // author_check authors title year pubtype publisher journal volume number pages abstract nauthors citedby //header for (int i = 0; i < list.size(); i++) { String[] s1 = list.get(i); String t1 = s1[0].toLowerCase(); bw.print("\t"); lw.print("\t"); blw.print("\t"); bw.print(i + "(" + s1[0] + ")"); lw.print(i + "(" + s1[0] + ")"); blw.print(i + "(" + s1[0] + ")"); } // end for i bw.print("\n"); lw.print("\n"); blw.print("\n"); for (int i = 0; i < list.size(); i++) { logger.info(i + "\t"); String[] s1 = list.get(i); String t1 = buildText(s1, args[6]); BOW bow1 = new BOW(t1); logger.info(bow1); Vector d1 = lsm.mapDocument(bow1); d1.normalize(); log.println("d1:" + d1); Vector pd1 = lsm.mapPseudoDocument(d1); pd1.normalize(); log.println("pd1:" + pd1); Vector m1 = merge(pd1, d1); log.println("m1:" + m1); // write the orginal line for (int j = 0; j < s1.length; j++) { bd.print(s1[j]); bd.print("\t"); ld.print(s1[j]); ld.print("\t"); bld.print(s1[j]); bld.print("\t"); } // write the bow, ls, and bow+ls vectors bd.println(d1); ld.println(pd1); bld.println(m1); bw.print(i + "(" + s1[0] + ")"); lw.print(i + "(" + s1[0] + ")"); blw.print(i + "(" + s1[0] + ")"); for (int j = 0; j < i + 1; j++) { bw.print("\t"); lw.print("\t"); blw.print("\t"); } // end for j for (int j = i + 1; j < list.size(); j++) { logger.info(i + "\t" + j); String[] s2 = list.get(j); String t2 = buildText(s2, args[6]); BOW bow2 = new BOW(t2); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t1:" + t1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t2:" + t2); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow1:" + bow1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2); Vector d2 = lsm.mapDocument(bow2); d2.normalize(); log.println("d2:" + d2); Vector pd2 = lsm.mapPseudoDocument(d2); pd2.normalize(); log.println("pd2:" + pd2); Vector m2 = merge(pd2, d2); log.println("m2:" + m2); float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2)); float cosLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2)); float cosBOWLSM = m1.dotProduct(m2) / (float) Math.sqrt(m1.dotProduct(m1) * m2.dotProduct(m2)); bw.print("\t"); bw.print(dec.format(cosVSM)); lw.print("\t"); lw.print(dec.format(cosLSM)); blw.print("\t"); blw.print(dec.format(cosBOWLSM)); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow\t" + cosVSM); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") ls:\t" + cosLSM); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow+ls:\t" + cosBOWLSM); } bw.print("\n"); lw.print("\n"); blw.print("\n"); } // end for i logger.info("wrote " + bwf); logger.info("wrote " + bwf); logger.info("wrote " + bdf); logger.info("wrote " + lwf); logger.info("wrote " + ldf); logger.info("wrote " + blwf); logger.info("wrote " + bldf); logger.info("wrote " + logf); ld.close(); bd.close(); bld.close(); bw.close(); lw.close(); blw.close(); log.close(); }
From source file:ch.epfl.lsir.xin.test.GlobalMeanTest.java
/** * @param args//from w ww . j av a 2s.c om */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub PrintWriter logger = new PrintWriter(".//results//GlobalMean"); PropertiesConfiguration config = new PropertiesConfiguration(); config.setFile(new File("conf//GlobalMean.properties")); try { config.load(); } catch (ConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " Read rating data..."); DataLoaderFile loader = new DataLoaderFile(".//data//MoveLens100k.txt"); loader.readSimple(); DataSetNumeric dataset = loader.getDataset(); System.out.println("Number of ratings: " + dataset.getRatings().size() + " Number of users: " + dataset.getUserIDs().size() + " Number of items: " + dataset.getItemIDs().size()); logger.println("Number of ratings: " + dataset.getRatings().size() + ", Number of users: " + dataset.getUserIDs().size() + ", Number of items: " + dataset.getItemIDs().size()); double totalMAE = 0; double totalRMSE = 0; int F = 5; logger.println(F + "- folder cross validation."); logger.flush(); ArrayList<ArrayList<NumericRating>> folders = new ArrayList<ArrayList<NumericRating>>(); for (int i = 0; i < F; i++) { folders.add(new ArrayList<NumericRating>()); } while (dataset.getRatings().size() > 0) { int index = new Random().nextInt(dataset.getRatings().size()); int r = new Random().nextInt(F); folders.get(r).add(dataset.getRatings().get(index)); dataset.getRatings().remove(index); } for (int folder = 1; folder <= F; folder++) { System.out.println("Folder: " + folder); logger.println("Folder: " + folder); ArrayList<NumericRating> trainRatings = new ArrayList<NumericRating>(); ArrayList<NumericRating> testRatings = new ArrayList<NumericRating>(); for (int i = 0; i < folders.size(); i++) { if (i == folder - 1)//test data { testRatings.addAll(folders.get(i)); } else {//training data trainRatings.addAll(folders.get(i)); } } //create rating matrix HashMap<String, Integer> userIDIndexMapping = new HashMap<String, Integer>(); HashMap<String, Integer> itemIDIndexMapping = new HashMap<String, Integer>(); for (int i = 0; i < dataset.getUserIDs().size(); i++) { userIDIndexMapping.put(dataset.getUserIDs().get(i), i); } for (int i = 0; i < dataset.getItemIDs().size(); i++) { itemIDIndexMapping.put(dataset.getItemIDs().get(i), i); } RatingMatrix trainRatingMatrix = new RatingMatrix(dataset.getUserIDs().size(), dataset.getItemIDs().size()); for (int i = 0; i < trainRatings.size(); i++) { trainRatingMatrix.set(userIDIndexMapping.get(trainRatings.get(i).getUserID()), itemIDIndexMapping.get(trainRatings.get(i).getItemID()), trainRatings.get(i).getValue()); } RatingMatrix testRatingMatrix = new RatingMatrix(dataset.getUserIDs().size(), dataset.getItemIDs().size()); for (int i = 0; i < testRatings.size(); i++) { testRatingMatrix.set(userIDIndexMapping.get(testRatings.get(i).getUserID()), itemIDIndexMapping.get(testRatings.get(i).getItemID()), testRatings.get(i).getValue()); } System.out.println("Training: " + trainRatingMatrix.getTotalRatingNumber() + " vs Test: " + testRatingMatrix.getTotalRatingNumber()); logger.println("Initialize a recommendation model based on global average method."); GlobalAverage algo = new GlobalAverage(trainRatingMatrix); algo.setLogger(logger); algo.build(); algo.saveModel(".//localModels//" + config.getString("NAME")); logger.println("Save the model."); logger.flush(); System.out.println(trainRatings.size() + " vs. " + testRatings.size()); double RMSE = 0; double MAE = 0; int count = 0; for (int i = 0; i < testRatings.size(); i++) { NumericRating rating = testRatings.get(i); double prediction = algo.predict(rating.getUserID(), rating.getItemID()); if (Double.isNaN(prediction)) { System.out.println("no prediction"); continue; } MAE = MAE + Math.abs(rating.getValue() - prediction); RMSE = RMSE + Math.pow((rating.getValue() - prediction), 2); count++; } MAE = MAE / count; RMSE = Math.sqrt(RMSE / count); // System.out.println("MAE: " + MAE + " RMSE: " + RMSE); logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " MAE: " + MAE + " RMSE: " + RMSE); logger.flush(); totalMAE = totalMAE + MAE; totalRMSE = totalRMSE + RMSE; } System.out.println("MAE: " + totalMAE / F + " RMSE: " + totalRMSE / F); logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " Final results: MAE: " + totalMAE / F + " RMSE: " + totalRMSE / F); logger.flush(); logger.close(); //MAE: 0.9338607074893257 RMSE: 1.1170971131112037 (MovieLens1M) //MAE: 0.9446876509332618 RMSE: 1.1256517870920375 (MovieLens100K) }
From source file:ch.epfl.lsir.xin.test.UserAverageTest.java
/** * @param args/* w w w . ja v a 2 s. c om*/ */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub PrintWriter logger = new PrintWriter(".//results//UserAverage"); PropertiesConfiguration config = new PropertiesConfiguration(); config.setFile(new File(".//conf//UserAverage.properties")); try { config.load(); } catch (ConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " Read rating data..."); DataLoaderFile loader = new DataLoaderFile(".//data//MoveLens100k.txt"); loader.readSimple(); DataSetNumeric dataset = loader.getDataset(); System.out.println("Number of ratings: " + dataset.getRatings().size() + " Number of users: " + dataset.getUserIDs().size() + " Number of items: " + dataset.getItemIDs().size()); logger.println("Number of ratings: " + dataset.getRatings().size() + " Number of users: " + dataset.getUserIDs().size() + " Number of items: " + dataset.getItemIDs().size()); logger.flush(); double totalMAE = 0; double totalRMSE = 0; int F = 5; logger.println(F + "- folder cross validation."); ArrayList<ArrayList<NumericRating>> folders = new ArrayList<ArrayList<NumericRating>>(); for (int i = 0; i < F; i++) { folders.add(new ArrayList<NumericRating>()); } while (dataset.getRatings().size() > 0) { int index = new Random().nextInt(dataset.getRatings().size()); int r = new Random().nextInt(F); folders.get(r).add(dataset.getRatings().get(index)); dataset.getRatings().remove(index); } for (int folder = 1; folder <= F; folder++) { logger.println("Folder: " + folder); System.out.println("Folder: " + folder); ArrayList<NumericRating> trainRatings = new ArrayList<NumericRating>(); ArrayList<NumericRating> testRatings = new ArrayList<NumericRating>(); for (int i = 0; i < folders.size(); i++) { if (i == folder - 1)//test data { testRatings.addAll(folders.get(i)); } else {//training data trainRatings.addAll(folders.get(i)); } } //create rating matrix HashMap<String, Integer> userIDIndexMapping = new HashMap<String, Integer>(); HashMap<String, Integer> itemIDIndexMapping = new HashMap<String, Integer>(); for (int i = 0; i < dataset.getUserIDs().size(); i++) { userIDIndexMapping.put(dataset.getUserIDs().get(i), i); } for (int i = 0; i < dataset.getItemIDs().size(); i++) { itemIDIndexMapping.put(dataset.getItemIDs().get(i), i); } RatingMatrix trainRatingMatrix = new RatingMatrix(dataset.getUserIDs().size(), dataset.getItemIDs().size()); for (int i = 0; i < trainRatings.size(); i++) { trainRatingMatrix.set(userIDIndexMapping.get(trainRatings.get(i).getUserID()), itemIDIndexMapping.get(trainRatings.get(i).getItemID()), trainRatings.get(i).getValue()); } trainRatingMatrix.calculateGlobalAverage(); RatingMatrix testRatingMatrix = new RatingMatrix(dataset.getUserIDs().size(), dataset.getItemIDs().size()); for (int i = 0; i < testRatings.size(); i++) { testRatingMatrix.set(userIDIndexMapping.get(testRatings.get(i).getUserID()), itemIDIndexMapping.get(testRatings.get(i).getItemID()), testRatings.get(i).getValue()); } System.out.println("Training: " + trainRatingMatrix.getTotalRatingNumber() + " vs Test: " + testRatingMatrix.getTotalRatingNumber()); logger.println("Initialize a recommendation model based on user average method."); UserAverage algo = new UserAverage(trainRatingMatrix); algo.setLogger(logger); algo.build(); algo.saveModel(".//localModels//" + config.getString("NAME")); logger.println("Save the model."); System.out.println(trainRatings.size() + " vs. " + testRatings.size()); double RMSE = 0; double MAE = 0; int count = 0; for (int i = 0; i < testRatings.size(); i++) { NumericRating rating = testRatings.get(i); double prediction = algo.predict(userIDIndexMapping.get(rating.getUserID()), itemIDIndexMapping.get(rating.getItemID())); if (Double.isNaN(prediction)) { System.out.println("no prediction"); continue; } MAE = MAE + Math.abs(rating.getValue() - prediction); RMSE = RMSE + Math.pow((rating.getValue() - prediction), 2); count++; } MAE = MAE / count; RMSE = Math.sqrt(RMSE / count); logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " MAE: " + MAE + " RMSE: " + RMSE); logger.flush(); totalMAE = totalMAE + MAE; totalRMSE = totalRMSE + RMSE; } System.out.println("MAE: " + totalMAE / F + " RMSE: " + totalRMSE / F); logger.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()) + " Final results: MAE: " + totalMAE / F + " RMSE: " + totalRMSE / F); logger.flush(); logger.close(); //MAE: 0.8353035962363073 RMSE: 1.0422971886952053 (MovieLens 100k) }
From source file:edu.msu.cme.rdp.multicompare.Reprocess.java
/** * This class reprocesses the classification results (allrank output) and print out hierarchy output file, based on the confidence cutoff; * and print out only the detail classification results with assignment at certain rank with confidence above the cutoff or/and matching a given taxon. * @param args/* www. ja v a 2s .co m*/ * @throws Exception */ public static void main(String[] args) throws Exception { PrintWriter assign_out = new PrintWriter(new NullWriter()); float conf = 0.8f; PrintStream heir_out = null; String hier_out_filename = null; ClassificationResultFormatter.FORMAT format = ClassificationResultFormatter.FORMAT.allRank; String rank = null; String taxonFilterFile = null; String train_propfile = null; String gene = null; List<MCSample> samples = new ArrayList(); try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption(CmdOptions.HIER_OUTFILE_SHORT_OPT)) { hier_out_filename = line.getOptionValue(CmdOptions.HIER_OUTFILE_SHORT_OPT); heir_out = new PrintStream(hier_out_filename); } else { throw new Exception( "It make sense to provide output filename for " + CmdOptions.HIER_OUTFILE_LONG_OPT); } if (line.hasOption(CmdOptions.OUTFILE_SHORT_OPT)) { assign_out = new PrintWriter(line.getOptionValue(CmdOptions.OUTFILE_SHORT_OPT)); } if (line.hasOption(CmdOptions.RANK_SHORT_OPT)) { rank = line.getOptionValue(CmdOptions.RANK_SHORT_OPT); } if (line.hasOption(CmdOptions.TAXON_SHORT_OPT)) { taxonFilterFile = line.getOptionValue(CmdOptions.TAXON_SHORT_OPT); } if (line.hasOption(CmdOptions.BOOTSTRAP_SHORT_OPT)) { conf = Float.parseFloat(line.getOptionValue(CmdOptions.BOOTSTRAP_SHORT_OPT)); if (conf < 0 || conf > 1) { throw new IllegalArgumentException("Confidence must be in the range [0,1]"); } } if (line.hasOption(CmdOptions.FORMAT_SHORT_OPT)) { String f = line.getOptionValue(CmdOptions.FORMAT_SHORT_OPT); if (f.equalsIgnoreCase("allrank")) { format = ClassificationResultFormatter.FORMAT.allRank; } else if (f.equalsIgnoreCase("fixrank")) { format = ClassificationResultFormatter.FORMAT.fixRank; } else if (f.equalsIgnoreCase("db")) { format = ClassificationResultFormatter.FORMAT.dbformat; } else if (f.equalsIgnoreCase("filterbyconf")) { format = ClassificationResultFormatter.FORMAT.filterbyconf; } else { throw new IllegalArgumentException( "Not valid output format, only allrank, fixrank, filterbyconf and db allowed"); } } if (line.hasOption(CmdOptions.TRAINPROPFILE_SHORT_OPT)) { if (gene != null) { throw new IllegalArgumentException( "Already specified the gene from the default location. Can not specify train_propfile"); } else { train_propfile = line.getOptionValue(CmdOptions.TRAINPROPFILE_SHORT_OPT); } } if (line.hasOption(CmdOptions.GENE_SHORT_OPT)) { if (train_propfile != null) { throw new IllegalArgumentException( "Already specified train_propfile. Can not specify gene any more"); } gene = line.getOptionValue(CmdOptions.GENE_SHORT_OPT).toLowerCase(); if (!gene.equals(ClassifierFactory.RRNA_16S_GENE) && !gene.equals(ClassifierFactory.FUNGALLSU_GENE) && !gene.equals(ClassifierFactory.FUNGALITS_warcup_GENE) && !gene.equals(ClassifierFactory.FUNGALITS_unite_GENE)) { throw new IllegalArgumentException(gene + " is NOT valid, only allows " + ClassifierFactory.RRNA_16S_GENE + ", " + ClassifierFactory.FUNGALLSU_GENE + ", " + ClassifierFactory.FUNGALITS_warcup_GENE + " and " + ClassifierFactory.FUNGALITS_unite_GENE); } } args = line.getArgs(); if (args.length < 1) { throw new Exception("Incorrect number of command line arguments"); } for (String arg : args) { String[] inFileNames = arg.split(","); String inputFile = inFileNames[0]; File idmappingFile = null; if (inFileNames.length == 2) { idmappingFile = new File(inFileNames[1]); if (!idmappingFile.exists()) { System.err.println("Failed to find input file \"" + inFileNames[1] + "\""); return; } } MCSample nextSample = new MCSampleResult(inputFile, idmappingFile); samples.add(nextSample); } } catch (Exception e) { System.out.println("Command Error: " + e.getMessage()); new HelpFormatter().printHelp(120, "Reprocess [options] <Classification_allrank_result>[,idmappingfile] ...", "", options, ""); return; } if (train_propfile == null && gene == null) { gene = ClassifierFactory.RRNA_16S_GENE; } HashSet<String> taxonFilter = null; if (taxonFilterFile != null) { taxonFilter = readTaxonFilterFile(taxonFilterFile); } MultiClassifier multiClassifier = new MultiClassifier(train_propfile, gene); DefaultPrintVisitor printVisitor = new DefaultPrintVisitor(heir_out, samples); MultiClassifierResult result = multiClassifier.multiClassificationParser(samples, conf, assign_out, format, rank, taxonFilter); result.getRoot().topDownVisit(printVisitor); assign_out.close(); heir_out.close(); if (multiClassifier.hasCopyNumber()) { // print copy number corrected counts File cn_corrected_s = new File(new File(hier_out_filename).getParentFile(), "cncorrected_" + hier_out_filename); PrintStream cn_corrected_hier_out = new PrintStream(cn_corrected_s); printVisitor = new DefaultPrintVisitor(cn_corrected_hier_out, samples, true); result.getRoot().topDownVisit(printVisitor); cn_corrected_hier_out.close(); } }