List of usage examples for java.util Queue remove
E remove();
From source file:io.seldon.spark.actions.GroupActionsJob.java
public static void run(CmdLineArgs cmdLineArgs) { long unixDays = 0; try {//from w w w .j av a 2 s . c o m unixDays = JobUtils.dateToUnixDays(cmdLineArgs.input_date_string); } catch (ParseException e) { unixDays = 0; } System.out.println(String.format("--- started GroupActionsJob date[%s] unixDays[%s] ---", cmdLineArgs.input_date_string, unixDays)); System.out.println("Env: " + System.getenv()); System.out.println("Properties: " + System.getProperties()); SparkConf sparkConf = new SparkConf().setAppName("GroupActionsJob"); if (cmdLineArgs.debug_use_local_master) { System.out.println("Using 'local' master"); sparkConf.setMaster("local"); } Tuple2<String, String>[] sparkConfPairs = sparkConf.getAll(); System.out.println("--- sparkConf ---"); for (int i = 0; i < sparkConfPairs.length; i++) { Tuple2<String, String> kvPair = sparkConfPairs[i]; System.out.println(String.format("%s:%s", kvPair._1, kvPair._2)); } System.out.println("-----------------"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); { // setup aws access Configuration hadoopConf = jsc.hadoopConfiguration(); hadoopConf.set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem"); if (cmdLineArgs.aws_access_key_id != null && !"".equals(cmdLineArgs.aws_access_key_id)) { hadoopConf.set("fs.s3n.awsAccessKeyId", cmdLineArgs.aws_access_key_id); hadoopConf.set("fs.s3n.awsSecretAccessKey", cmdLineArgs.aws_secret_access_key); } } // String output_path_dir = "./out/" + input_date_string + "-" + UUID.randomUUID(); JavaRDD<String> dataSet = jsc.textFile( JobUtils.getSourceDirFromDate(cmdLineArgs.input_path_pattern, cmdLineArgs.input_date_string)) .repartition(4); final ObjectMapper objectMapper = new ObjectMapper(); objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); final String single_client = cmdLineArgs.single_client; if (single_client != null) { Function<String, Boolean> clientFilter = new Function<String, Boolean>() { @Override public Boolean call(String t) throws Exception { ActionData actionData = JobUtils.getActionDataFromActionLogLine(objectMapper, t); return ((actionData.client != null) && (actionData.client.equals(single_client))); } }; dataSet = dataSet.filter(clientFilter); } JavaPairRDD<String, ActionData> pairs = dataSet.mapToPair(new PairFunction<String, String, ActionData>() { @Override public Tuple2<String, ActionData> call(String t) throws Exception { ActionData actionData = JobUtils.getActionDataFromActionLogLine(objectMapper, t); // String key = (actionData.userid == 0) ? "__no_userid__" : actionData.client; String key = actionData.client; return new Tuple2<String, ActionData>(key, actionData); } }).persist(StorageLevel.MEMORY_AND_DISK()); List<String> clientList = pairs.keys().distinct().collect(); Queue<ClientDetail> clientDetailQueue = new PriorityQueue<ClientDetail>(30, new Comparator<ClientDetail>() { @Override public int compare(ClientDetail o1, ClientDetail o2) { if (o1.itemCount > o2.itemCount) { return -1; } else if (o1.itemCount < o2.itemCount) { return 1; } return 0; } }); Queue<ClientDetail> clientDetailZeroQueue = new PriorityQueue<ClientDetail>(30, new Comparator<ClientDetail>() { @Override public int compare(ClientDetail o1, ClientDetail o2) { if (o1.itemCount > o2.itemCount) { return -1; } else if (o1.itemCount < o2.itemCount) { return 1; } return 0; } }); System.out.println("Client list " + clientList.toString()); for (String client : clientList) { if (client != null) { System.out.println("looking at client " + client); final String currentClient = client; JavaPairRDD<String, ActionData> filtered_by_client = pairs .filter(new Function<Tuple2<String, ActionData>, Boolean>() { @Override public Boolean call(Tuple2<String, ActionData> v1) throws Exception { if (currentClient.equalsIgnoreCase(v1._1)) { return Boolean.TRUE; } else { return Boolean.FALSE; } } }); JavaPairRDD<String, ActionData> nonZeroUserIds = filtered_by_client .filter(new Function<Tuple2<String, ActionData>, Boolean>() { @Override public Boolean call(Tuple2<String, ActionData> v1) throws Exception { if (v1._2.userid == 0) { return Boolean.FALSE; } else { return Boolean.TRUE; } } }); JavaPairRDD<String, Integer> userIdLookupRDD = nonZeroUserIds .mapToPair(new PairFunction<Tuple2<String, ActionData>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<String, ActionData> t) throws Exception { String key = currentClient + "_" + t._2.client_userid; return new Tuple2<String, Integer>(key, t._2.userid); } }); Map<String, Integer> userIdLookupMap = userIdLookupRDD.collectAsMap(); Map<String, Integer> userIdLookupMap_wrapped = new HashMap<String, Integer>(userIdLookupMap); final Broadcast<Map<String, Integer>> broadcastVar = jsc.broadcast(userIdLookupMap_wrapped); JavaRDD<String> json_only_with_zeros = filtered_by_client .map(new Function<Tuple2<String, ActionData>, String>() { @Override public String call(Tuple2<String, ActionData> v1) throws Exception { Map<String, Integer> m = broadcastVar.getValue(); ActionData actionData = v1._2; if (actionData.userid == 0) { String key = currentClient + "_" + actionData.client_userid; if (m.containsKey(key)) { actionData.userid = m.get(key); } else { return ""; } } String json = JobUtils.getJsonFromActionData(actionData); return json; } }); JavaRDD<String> json_only = json_only_with_zeros.filter(new Function<String, Boolean>() { @Override public Boolean call(String v1) throws Exception { return (v1.length() == 0) ? Boolean.FALSE : Boolean.TRUE; } }); String outputPath = getOutputPath(cmdLineArgs.output_path_dir, unixDays, client); if (cmdLineArgs.gzip_output) { json_only.saveAsTextFile(outputPath, org.apache.hadoop.io.compress.GzipCodec.class); } else { json_only.saveAsTextFile(outputPath); } long json_only_count = json_only.count(); clientDetailZeroQueue .add(new ClientDetail(currentClient, json_only_with_zeros.count() - json_only_count)); clientDetailQueue.add(new ClientDetail(currentClient, json_only_count)); } else System.out.println("Found null client!"); } System.out.println("- Client Action (Zero Userid) Count -"); while (clientDetailZeroQueue.size() != 0) { GroupActionsJob.ClientDetail clientDetail = clientDetailZeroQueue.remove(); System.out.println(String.format("%s: %d", clientDetail.client, clientDetail.itemCount)); } System.out.println("- Client Action Count -"); while (clientDetailQueue.size() != 0) { GroupActionsJob.ClientDetail clientDetail = clientDetailQueue.remove(); System.out.println(String.format("%s: %d", clientDetail.client, clientDetail.itemCount)); } jsc.stop(); System.out.println(String.format("--- finished GroupActionsJob date[%s] unixDays[%s] ---", cmdLineArgs.input_date_string, unixDays)); }