List of usage examples for java.util HashMap get
public V get(Object key)
From source file:com.thed.zapi.cloud.sample.CycleExecutionReportByVersion.java
public static void main(String[] args) throws JSONException, URISyntaxException, ParseException, IOException { String API_GET_EXECUTIONS = "{SERVER}/public/rest/api/1.0/executions/search/cycle/"; String API_GET_CYCLES = "{SERVER}/public/rest/api/1.0/cycles/search?"; // Delimiter used in CSV file final String NEW_LINE_SEPARATOR = "\n"; final String fileName = "F:\\cycleExecutionReport.csv"; /** Declare JIRA,Zephyr URL,access and secret Keys */ // JIRA Cloud URL of the instance String jiraBaseURL = "https://demo.atlassian.net"; // Replace zephyr baseurl <ZAPI_Cloud_URL> shared with the user for ZAPI Cloud Installation String zephyrBaseUrl = "<ZAPI_Cloud_URL>"; // zephyr accessKey , we can get from Addons >> zapi section String accessKey = "YjE2MjdjMGEtNzExNy0zYjY1LWFkMzQtNjcwMDM3OTljFkbWluIGFkbWlu"; // zephyr secretKey , we can get from Addons >> zapi section String secretKey = "qufnbimi96Ob2hq3ISF08yZ8Qw4c1eHGeGlk"; /** Declare parameter values here */ String userName = "admin"; String versionId = "-1"; String projectId = "10100"; String projectName = "Support"; String versionName = "Unscheduled"; ZFJCloudRestClient client = ZFJCloudRestClient.restBuilder(zephyrBaseUrl, accessKey, secretKey, userName) .build();/*from w ww.j a v a2s. co m*/ /** * Get List of Cycles by Project and Version */ final String getCyclesUri = API_GET_CYCLES.replace("{SERVER}", zephyrBaseUrl) + "projectId=" + projectId + "&versionId=" + versionId; Map<String, String> cycles = getCyclesByProjectVersion(getCyclesUri, client, accessKey); // System.out.println("cycles :"+ cycles.toString()); /** * Iterating over the Cycles and writing the report to CSV * */ FileWriter fileWriter = null; System.out.println("Writing CSV file....."); try { fileWriter = new FileWriter(fileName); // Write the CSV file header fileWriter.append("Cycle Execution Report By Version and Project"); fileWriter.append(NEW_LINE_SEPARATOR); fileWriter.append("PROJECT:" + "," + projectName); fileWriter.append(NEW_LINE_SEPARATOR); fileWriter.append("VERSION:" + "," + versionName); fileWriter.append(NEW_LINE_SEPARATOR); JSONArray executions; int totalUnexecutedCount = 0; int totalExecutionCount = 0; for (String key : cycles.keySet()) { int executionCount = 0; int unexecutedCount = 0; final String getExecutionsUri = API_GET_EXECUTIONS.replace("{SERVER}", zephyrBaseUrl) + key + "?projectId=" + projectId + "&versionId=" + versionId; fileWriter.append("Cycle:" + "," + cycles.get(key)); fileWriter.append(NEW_LINE_SEPARATOR); executions = getExecutionsByCycleId(getExecutionsUri, client, accessKey); // System.out.println("executions :" + executions.toString()); HashMap<String, Integer> counter = new HashMap<String, Integer>(); String[] statusName = new String[executions.length()]; for (int i = 0; i < executions.length(); i++) { JSONObject executionObj = executions.getJSONObject(i).getJSONObject("execution"); // System.out.println("executionObj // "+executionObj.toString()); JSONObject statusObj = executionObj.getJSONObject("status"); // System.out.println("statusObj :"+statusObj.toString()); statusName[i] = statusObj.getString("name"); } if (statusName.length != 0) { // System.out.println(statusName.toString()); for (String a : statusName) { if (counter.containsKey(a)) { int oldValue = counter.get(a); counter.put(a, oldValue + 1); } else { counter.put(a, 1); } } for (String status : counter.keySet()) { fileWriter.append(" " + "," + " " + "," + status + "," + counter.get(status)); fileWriter.append(NEW_LINE_SEPARATOR); if (status.equalsIgnoreCase("UNEXECUTED")) { unexecutedCount += counter.get(status); } else { executionCount += counter.get(status); } } } totalExecutionCount += executionCount; totalUnexecutedCount += unexecutedCount; fileWriter.append(NEW_LINE_SEPARATOR); } fileWriter.append(NEW_LINE_SEPARATOR); fileWriter.append("TOTAL CYCLES:" + "," + cycles.size()); fileWriter.append(NEW_LINE_SEPARATOR); fileWriter.append("TOTAL EXECUTIONS:" + "," + totalExecutionCount); fileWriter.append(NEW_LINE_SEPARATOR); fileWriter.append("TOTAL ASSIGNED:" + "," + (totalUnexecutedCount + totalExecutionCount)); System.out.println("CSV file was created successfully !!!"); } catch (Exception e) { System.out.println("Error in CsvFileWriter !!!"); e.printStackTrace(); } finally { try { fileWriter.flush(); fileWriter.close(); } catch (IOException e) { System.out.println("Error while flushing/closing fileWriter !!!"); e.printStackTrace(); } } }
From source file:com.yahoo.semsearch.fastlinking.EntityContextFastEntityLinker.java
/** * Context-aware command line entity linker * @param args arguments (see -help for further info) * @throws Exception/*from ww w . jav a2 s . c o m*/ */ public static void main(String args[]) throws Exception { SimpleJSAP jsap = new SimpleJSAP(EntityContextFastEntityLinker.class.getName(), "Interactive mode for entity linking", new Parameter[] { new FlaggedOption("hash", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'h', "hash", "quasi succint hash"), new FlaggedOption("vectors", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'v', "vectors", "Word vectors file"), new FlaggedOption("labels", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'l', "labels", "File containing query2entity labels"), new FlaggedOption("id2type", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'i', "id2type", "File with the id2type mapping"), new Switch("centroid", 'c', "centroid", "Use centroid-based distances and not LR"), new FlaggedOption("map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'm', "map", "Entity 2 type mapping "), new FlaggedOption("threshold", JSAP.STRING_PARSER, "-20", JSAP.NOT_REQUIRED, 'd', "threshold", "Score threshold value "), new FlaggedOption("entities", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', "entities", "Entities word vectors file"), }); JSAPResult jsapResult = jsap.parse(args); if (jsap.messagePrinted()) return; double threshold = Double.parseDouble(jsapResult.getString("threshold")); QuasiSuccinctEntityHash hash = (QuasiSuccinctEntityHash) BinIO.loadObject(jsapResult.getString("hash")); EntityContext queryContext; if (!jsapResult.getBoolean("centroid")) { queryContext = new LREntityContext(jsapResult.getString("vectors"), jsapResult.getString("entities"), hash); } else { queryContext = new CentroidEntityContext(jsapResult.getString("vectors"), jsapResult.getString("entities"), hash); } HashMap<String, ArrayList<EntityRelevanceJudgment>> labels = null; if (jsapResult.getString("labels") != null) { labels = readTrainingData(jsapResult.getString("labels")); } String map = jsapResult.getString("map"); HashMap<String, String> entities2Type = null; if (map != null) entities2Type = readEntity2IdFile(map); EntityContextFastEntityLinker linker = new EntityContextFastEntityLinker(hash, queryContext); final BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String q; for (;;) { System.out.print(">"); q = br.readLine(); if (q == null) { System.err.println(); break; // CTRL-D } if (q.length() == 0) continue; long time = -System.nanoTime(); try { List<EntityResult> results = linker.getResults(q, threshold); //List<EntityResult> results = linker.getResultsGreedy( q, 5 ); //int rank = 0; for (EntityResult er : results) { if (entities2Type != null) { String name = er.text.toString().trim(); String newType = entities2Type.get(name); if (newType == null) newType = "NF"; System.out.println(q + "\t span: \u001b[1m [" + er.text + "] \u001b[0m eId: " + er.id + " ( t= " + newType + ")" + " score: " + er.score + " ( " + er.s.span + " ) "); //System.out.println( newType + "\t" + q + "\t" + StringUtils.remove( q, er.s.span.toString() ) + " \t " + er.text ); break; /* } else { System.out.print( "[" + er.text + "(" + String.format("%.2f",er.score) +")] "); System.out.println( "span: \u001b[1m [" + er.text + "] \u001b[0m eId: " + er.id + " ( t= " + typeMapping.get( hash.getEntity( er.id ).type ) + " score: " + er.score + " ( " + er.s.span + " ) " ); } rank++; */ } else { if (labels == null) { System.out.println(q + "\t" + er.text + "\t" + er.score); } else { ArrayList<EntityRelevanceJudgment> jds = labels.get(q); String label = "NF"; if (jds != null) { EntityRelevanceJudgment relevanceOfEntity = relevanceOfEntity(er.text, jds); label = relevanceOfEntity.label; } System.out.println(q + "\t" + er.text + "\t" + label + "\t" + er.score); break; } } System.out.println(); } time += System.nanoTime(); System.out.println("Time to rank and print the candidates:" + time / 1000000. + " ms"); } catch (Exception e) { e.printStackTrace(); } } }
From source file:com.ibm.crail.storage.StorageServer.java
public static void main(String[] args) throws Exception { Logger LOG = CrailUtils.getLogger(); CrailConfiguration conf = new CrailConfiguration(); CrailConstants.updateConstants(conf); CrailConstants.printConf();/*from ww w .j a v a 2 s. c om*/ CrailConstants.verify(); int splitIndex = 0; for (String param : args) { if (param.equalsIgnoreCase("--")) { break; } splitIndex++; } //default values StringTokenizer tokenizer = new StringTokenizer(CrailConstants.STORAGE_TYPES, ","); if (!tokenizer.hasMoreTokens()) { throw new Exception("No storage types defined!"); } String storageName = tokenizer.nextToken(); int storageType = 0; HashMap<String, Integer> storageTypes = new HashMap<String, Integer>(); storageTypes.put(storageName, storageType); for (int type = 1; tokenizer.hasMoreElements(); type++) { String name = tokenizer.nextToken(); storageTypes.put(name, type); } int storageClass = -1; //custom values if (args != null) { Option typeOption = Option.builder("t").desc("storage type to start").hasArg().build(); Option classOption = Option.builder("c").desc("storage class the server will attach to").hasArg() .build(); Options options = new Options(); options.addOption(typeOption); options.addOption(classOption); CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, Arrays.copyOfRange(args, 0, splitIndex)); if (line.hasOption(typeOption.getOpt())) { storageName = line.getOptionValue(typeOption.getOpt()); storageType = storageTypes.get(storageName).intValue(); } if (line.hasOption(classOption.getOpt())) { storageClass = Integer.parseInt(line.getOptionValue(classOption.getOpt())); } } catch (ParseException e) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("Storage tier", options); System.exit(-1); } } if (storageClass < 0) { storageClass = storageType; } StorageTier storageTier = StorageTier.createInstance(storageName); if (storageTier == null) { throw new Exception("Cannot instantiate datanode of type " + storageName); } String extraParams[] = null; splitIndex++; if (args.length > splitIndex) { extraParams = new String[args.length - splitIndex]; for (int i = splitIndex; i < args.length; i++) { extraParams[i - splitIndex] = args[i]; } } storageTier.init(conf, extraParams); storageTier.printConf(LOG); RpcClient rpcClient = RpcClient.createInstance(CrailConstants.NAMENODE_RPC_TYPE); rpcClient.init(conf, args); rpcClient.printConf(LOG); ConcurrentLinkedQueue<InetSocketAddress> namenodeList = CrailUtils.getNameNodeList(); ConcurrentLinkedQueue<RpcConnection> connectionList = new ConcurrentLinkedQueue<RpcConnection>(); while (!namenodeList.isEmpty()) { InetSocketAddress address = namenodeList.poll(); RpcConnection connection = rpcClient.connect(address); connectionList.add(connection); } RpcConnection rpcConnection = connectionList.peek(); if (connectionList.size() > 1) { rpcConnection = new RpcDispatcher(connectionList); } LOG.info("connected to namenode(s) " + rpcConnection.toString()); StorageServer server = storageTier.launchServer(); StorageRpcClient storageRpc = new StorageRpcClient(storageType, CrailStorageClass.get(storageClass), server.getAddress(), rpcConnection); HashMap<Long, Long> blockCount = new HashMap<Long, Long>(); long sumCount = 0; while (server.isAlive()) { StorageResource resource = server.allocateResource(); if (resource == null) { break; } else { storageRpc.setBlock(resource.getAddress(), resource.getLength(), resource.getKey()); DataNodeStatistics stats = storageRpc.getDataNode(); long newCount = stats.getFreeBlockCount(); long serviceId = stats.getServiceId(); long oldCount = 0; if (blockCount.containsKey(serviceId)) { oldCount = blockCount.get(serviceId); } long diffCount = newCount - oldCount; blockCount.put(serviceId, newCount); sumCount += diffCount; LOG.info("datanode statistics, freeBlocks " + sumCount); } } while (server.isAlive()) { DataNodeStatistics stats = storageRpc.getDataNode(); long newCount = stats.getFreeBlockCount(); long serviceId = stats.getServiceId(); long oldCount = 0; if (blockCount.containsKey(serviceId)) { oldCount = blockCount.get(serviceId); } long diffCount = newCount - oldCount; blockCount.put(serviceId, newCount); sumCount += diffCount; LOG.info("datanode statistics, freeBlocks " + sumCount); Thread.sleep(2000); } }
From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.Test.java
public static void main(String[] args) throws IOException, ClassNotFoundException { String path = DBpediaOntology.DBPEDIA_CSV_FOLDER; if (args != null && args.length > 0) { path = args[0];/*from w ww. j a v a 2s . c o m*/ if (!path.endsWith("/")) { path = path + "/"; } } stopAttributes.add("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); stopAttributes.add("http://www.w3.org/2002/07/owl#sameAs"); stopAttributes.add("http://dbpedia.org/ontology/wikiPageRevisionID"); stopAttributes.add("http://dbpedia.org/ontology/wikiPageID"); stopAttributes.add("http://purl.org/dc/elements/1.1/description"); stopAttributes.add("http://dbpedia.org/ontology/thumbnail"); stopAttributes.add("http://dbpedia.org/ontology/type"); try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(path + "counts.bin"))) { categories = (HashSet<String>) ois.readObject(); attributes = (HashSet<String>) ois.readObject(); categoryCount = (HashMap<String, Integer>) ois.readObject(); attributeCount = (HashMap<String, Integer>) ois.readObject(); categoryAttributeCount = (HashMap<String, HashMap<String, Integer>>) ois.readObject(); attributeCategoryCount = (HashMap<String, HashMap<String, Integer>>) ois.readObject(); } System.out.println(categories.size() + " categories found"); System.out.println(attributes.size() + " attributes found"); n = 0; for (Map.Entry<String, Integer> e : categoryCount.entrySet()) { n += e.getValue(); } System.out.println(n); HashMap<String, ArrayList<Pair>> sortedCategoryAttributes = new HashMap<>(); for (String category : categories) { //System.out.println(category); //System.out.println("-----------"); ArrayList<Pair> attributesRank = new ArrayList<Pair>(); Integer c = categoryCount.get(category); if (c == null || c == 0) { continue; } HashMap<String, Integer> thisCategoryAttributeCount = categoryAttributeCount.get(category); for (Map.Entry<String, Integer> e : thisCategoryAttributeCount.entrySet()) { attributesRank.add(new Pair(e.getKey(), 1.0 * e.getValue() / c)); } Collections.sort(attributesRank); for (Pair p : attributesRank) { //System.out.println("A:" + p.getS() + "\t" + p.getP()); } //System.out.println("==============================="); sortedCategoryAttributes.put(category, attributesRank); } for (String attribute : attributes) { //System.out.println(attribute); //System.out.println("-----------"); ArrayList<Pair> categoriesRank = new ArrayList<>(); Integer a = attributeCount.get(attribute); if (a == null || a == 0) { continue; } HashMap<String, Integer> thisAttributeCategoryCount = attributeCategoryCount.get(attribute); for (Map.Entry<String, Integer> e : thisAttributeCategoryCount.entrySet()) { categoriesRank.add(new Pair(e.getKey(), 1.0 * e.getValue() / a)); } Collections.sort(categoriesRank); for (Pair p : categoriesRank) { //System.out.println("C:" + p.getS() + "\t" + p.getP()); } //System.out.println("==============================="); } HashMap<Integer, Integer> histogram = new HashMap<>(); histogram.put(0, 0); histogram.put(1, 0); histogram.put(2, 0); histogram.put(Integer.MAX_VALUE, 0); int nTest = 0; if (args != null && args.length > 0) { path = args[0]; if (!path.endsWith("/")) { path = path + "/"; } } for (File f : new File(path).listFiles()) { if (f.isFile() && f.getName().endsWith(".csv")) { String category = f.getName().replaceFirst("\\.csv", ""); System.out.println("Category: " + category); ArrayList<HashSet<String>> entities = extractEntities(f, 2); for (HashSet<String> attributesOfThisEntity : entities) { nTest++; ArrayList<String> rankedCategories = rankedCategories(attributesOfThisEntity); boolean found = false; for (int i = 0; i < rankedCategories.size() && !found; i++) { if (rankedCategories.get(i).equals(category)) { Integer count = histogram.get(i); if (count == null) { histogram.put(i, 1); } else { histogram.put(i, count + 1); } found = true; } } if (!found) { histogram.put(Integer.MAX_VALUE, histogram.get(Integer.MAX_VALUE) + 1); } } System.out.println("Tested entities: " + nTest); System.out.println("1: " + histogram.get(0)); System.out.println("2: " + histogram.get(1)); System.out.println("3: " + histogram.get(2)); System.out.println("+3: " + (nTest - histogram.get(2) - histogram.get(1) - histogram.get(0) - histogram.get(Integer.MAX_VALUE))); System.out.println("NF: " + histogram.get(Integer.MAX_VALUE)); } } }
From source file:at.tuwien.ifs.somtoolbox.apps.VisualisationImageSaver.java
public static void main(String[] args) { JSAPResult res = OptionFactory.parseResults(args, OPTIONS); String uFile = res.getString("unitDescriptionFile"); String wFile = res.getString("weightVectorFile"); String dwmFile = res.getString("dataWinnerMappingFile"); String cFile = res.getString("classInformationFile"); String vFile = res.getString("inputVectorFile"); String tFile = res.getString("templateVectorFile"); String ftype = res.getString("filetype"); boolean unitGrid = res.getBoolean("unitGrid"); String basename = res.getString("basename"); if (basename == null) { basename = FileUtils.extractSOMLibInputPrefix(uFile); }/*from w w w . j av a2s . com*/ basename = new File(basename).getAbsolutePath(); int unitW = res.getInt("width"); int unitH = res.getInt("height", unitW); String[] vizs = res.getStringArray("vis"); GrowingSOM gsom = null; CommonSOMViewerStateData state = CommonSOMViewerStateData.getInstance(); try { SOMLibFormatInputReader inputReader = new SOMLibFormatInputReader(wFile, uFile, null); gsom = new GrowingSOM(inputReader); SharedSOMVisualisationData d = new SharedSOMVisualisationData(cFile, null, null, dwmFile, vFile, tFile, null); d.readAvailableData(); state.inputDataObjects = d; gsom.setSharedInputObjects(d); Visualizations.initVisualizations(d, inputReader, 0, Palettes.getDefaultPalette(), Palettes.getAvailablePalettes()); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(1); } catch (SOMLibFileFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(1); } if (ArrayUtils.isEmpty(vizs)) { System.out.println("No specific visualisation specified - saving all available visualisations."); vizs = Visualizations.getReadyVisualizationNames(); System.out.println("Found " + vizs.length + ": " + Arrays.toString(vizs)); } for (String viz : vizs) { BackgroundImageVisualizerInstance v = Visualizations.getVisualizationByName(viz); if (v == null) { System.out.println("Visualization '" + viz + "' not found!"); continue; } BackgroundImageVisualizer i = v.getVis(); GrowingLayer layer = gsom.getLayer(); try { int height = unitH * layer.getYSize(); int width = unitW * layer.getXSize(); HashMap<String, BufferedImage> visualizationFlavours = i.getVisualizationFlavours(v.getVariant(), gsom, width, height); ArrayList<String> keys = new ArrayList<String>(visualizationFlavours.keySet()); Collections.sort(keys); // if the visualisation has more than 5 flavours, we create a sub-dir for it String subDirName = ""; String oldBasename = basename; // save original base name for later if (keys.size() > 5) { String parentDir = new File(basename).getParentFile().getPath(); // get the parent path String filePrefix = basename.substring(parentDir.length()); // end the file name prefix subDirName = parentDir + File.separator + filePrefix + "_" + viz + File.separator; // compose a new // subdir name new File(subDirName).mkdir(); // create the dir basename = subDirName + filePrefix; // and extend the base name by the subdir } for (String key : keys) { File out = new File(basename + "_" + viz + key + "." + ftype); System.out.println("Generating visualisation '" + viz + "' as '" + out.getPath() + "'."); BufferedImage image = visualizationFlavours.get(key); if (unitGrid) { VisualisationUtils.drawUnitGrid(image, gsom, width, height); } ImageIO.write(image, ftype, out); } basename = oldBasename; // reset base name } catch (SOMToolboxException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } System.exit(0); }
From source file:TwitterClustering.java
public static void main(String[] args) throws FileNotFoundException, IOException { // TODO code application logic here File outFile = new File(args[3]); Scanner s = new Scanner(new File(args[1])).useDelimiter(","); JSONParser parser = new JSONParser(); Set<Cluster> clusterSet = new HashSet<Cluster>(); HashMap<String, Tweet> tweets = new HashMap(); FileWriter fw = new FileWriter(outFile.getAbsoluteFile()); BufferedWriter bw = new BufferedWriter(fw); // init//w w w . j a va2 s . c om try { Object obj = parser.parse(new FileReader(args[2])); JSONArray jsonArray = (JSONArray) obj; for (int i = 0; i < jsonArray.size(); i++) { Tweet twt = new Tweet(); JSONObject jObj = (JSONObject) jsonArray.get(i); String text = jObj.get("text").toString(); long sum = 0; for (int y = 0; y < text.toCharArray().length; y++) { sum += (int) text.toCharArray()[y]; } String[] token = text.split(" "); String tID = jObj.get("id").toString(); Set<String> mySet = new HashSet<String>(Arrays.asList(token)); twt.setAttributeValue(sum); twt.setText(mySet); twt.setTweetID(tID); tweets.put(tID, twt); } // preparing initial clusters int i = 0; while (s.hasNext()) { String id = s.next();// id Tweet t = tweets.get(id.trim()); clusterSet.add(new Cluster(i + 1, t, new LinkedList())); i++; } Iterator it = tweets.entrySet().iterator(); for (int l = 0; l < 2; l++) { // limit to 25 iterations while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); // calculate distance to each centroid Tweet p = (Tweet) me.getValue(); HashMap<Cluster, Float> distMap = new HashMap(); for (Cluster clust : clusterSet) { distMap.put(clust, jaccardDistance(p.getText(), clust.getCentroid().getText())); } HashMap<Cluster, Float> sorted = (HashMap<Cluster, Float>) sortByValue(distMap); sorted.keySet().iterator().next().getMembers().add(p); } // calculate new centroid and update Clusterset for (Cluster clust : clusterSet) { TreeMap<String, Long> tDistMap = new TreeMap(); Tweet newCentroid = null; Long avgSumDist = new Long(0); for (int j = 0; j < clust.getMembers().size(); j++) { avgSumDist += clust.getMembers().get(j).getAttributeValue(); tDistMap.put(clust.getMembers().get(j).getTweetID(), clust.getMembers().get(j).getAttributeValue()); } if (clust.getMembers().size() != 0) { avgSumDist /= (clust.getMembers().size()); } ArrayList<Long> listValues = new ArrayList<Long>(tDistMap.values()); if (tDistMap.containsValue(findClosestNumber(listValues, avgSumDist))) { // found closest newCentroid = tweets .get(getKeyByValue(tDistMap, findClosestNumber(listValues, avgSumDist))); clust.setCentroid(newCentroid); } } } // create an iterator Iterator iterator = clusterSet.iterator(); // check values while (iterator.hasNext()) { Cluster c = (Cluster) iterator.next(); bw.write(c.getId() + "\t"); System.out.print(c.getId() + "\t"); for (Tweet t : c.getMembers()) { bw.write(t.getTweetID() + ", "); System.out.print(t.getTweetID() + ","); } bw.write("\n"); System.out.println(""); } System.out.println(""); System.out.println("SSE " + sumSquaredErrror(clusterSet)); } catch (Exception e) { e.printStackTrace(); } finally { bw.close(); fw.close(); } }
From source file:enrichment.Disambiguate.java
/**prerequisites: * cd silk_2.5.3/*_links//from w w w .jav a 2 s.co m * cat *.nt|sort -t' ' -k3 > $filename * * @param args $filename * @throws IOException * @throws URISyntaxException */ public static void main(String[] args) { File file = new File(args[0]); if (file.isDirectory()) { args = file.list(new OnlyExtFilenameFilter("nt")); } BufferedReader in; for (int q = 0; q < args.length; q++) { String filename = null; if (file.isDirectory()) { filename = file.getPath() + File.separator + args[q]; } else { filename = args[q]; } try { FileWriter output = new FileWriter(filename + "_disambiguated.nt"); String prefix = "@prefix rdrel: <http://rdvocab.info/RDARelationshipsWEMI/> .\n" + "@prefix dbpedia: <http://de.dbpedia.org/resource/> .\n" + "@prefix frbr: <http://purl.org/vocab/frbr/core#> .\n" + "@prefix lobid: <http://lobid.org/resource/> .\n" + "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n" + "@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n" + "@prefix mo: <http://purl.org/ontology/mo/> .\n" + "@prefix wikipedia: <https://de.wikipedia.org/wiki/> ."; output.append(prefix + "\n\n"); in = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); HashMap<String, HashMap<String, ArrayList<String>>> hm = new HashMap<String, HashMap<String, ArrayList<String>>>(); String s; HashMap<String, ArrayList<String>> hmLobid = new HashMap<String, ArrayList<String>>(); Stack<String> old_object = new Stack<String>(); while ((s = in.readLine()) != null) { String[] triples = s.split(" "); String object = triples[2].substring(1, triples[2].length() - 1); if (old_object.size() > 0 && !old_object.firstElement().equals(object)) { hmLobid = new HashMap<String, ArrayList<String>>(); old_object = new Stack<String>(); } old_object.push(object); String subject = triples[0].substring(1, triples[0].length() - 1); System.out.print("\nSubject=" + object); System.out.print("\ntriples[2]=" + triples[2]); hmLobid.put(subject, getAllCreators(new URI(subject))); hm.put(object, hmLobid); } // get all dbpedia resources for (String key_one : hm.keySet()) { System.out.print("\n==============\n==== " + key_one + "\n==============="); int resources_cnt = hm.get(key_one).keySet().size(); ArrayList<String>[] creators = new ArrayList[resources_cnt]; HashMap<String, Integer> creators_backed = new HashMap<String, Integer>(); int x = 0; // get all lobid_resources subsumed under the dbpedia resource for (String subject_uri : hm.get(key_one).keySet()) { creators[x] = new ArrayList<String>(); System.out.print("\n subject_uri=" + subject_uri); Iterator<String> ite = hm.get(key_one).get(subject_uri).iterator(); int y = 0; // get all creators of the lobid resource while (ite.hasNext()) { String creator = ite.next(); System.out.print("\n " + creator); if (creators_backed.containsKey(creator)) { y = creators_backed.get(creator); } else { y = creators_backed.size(); creators_backed.put(creator, y); } while (creators[x].size() <= y) { creators[x].add("-"); } creators[x].set(y, creator); y++; } x++; } if (creators_backed.size() == 1) { System.out .println("\n" + "Every resource pointing to " + key_one + " has the same creator!"); for (String key_two : hm.get(key_one).keySet()) { output.append("<" + key_two + "> rdrel:workManifested <" + key_one + "> .\n"); output.append("<" + key_two + "> mo:wikipedia <" + key_one.replaceAll("dbpedia\\.org/resource", "wikipedia\\.org/wiki") + "> .\n"); } } /*else { for (int a = 0; a < creators.length; a++) { System.out.print(creators[a].toString()+","); } }*/ } output.flush(); if (output != null) { output.close(); } } catch (Exception e) { System.out.print("Exception while working on " + filename + ": \n"); e.printStackTrace(System.out); } } }
From source file:licenseUtil.LicenseUtil.java
public static void main(String[] args) throws IOException, IncompleteLicenseObjectException { if (args.length == 0) { logger.error("Missing parameters. Use --help to get a list of the possible options."); } else if (args[0].equals("--addPomToTsv")) { if (args.length < 4) logger.error(/*from w w w . j av a 2 s.c o m*/ "Missing arguments for option --addPomToTsv. Please specify <pomFileName> <licenses.stub.tsv> <currentVersion> or use the option --help for further information."); String pomFN = args[1]; String spreadSheetFN = args[2]; String currentVersion = args[3]; MavenProject project = null; try { project = Utils.readPom(new File(pomFN)); } catch (XmlPullParserException e) { logger.error("Could not parse pom file: \"" + pomFN + "\""); } LicensingList licensingList = new LicensingList(); File f = new File(spreadSheetFN); if (f.exists() && !f.isDirectory()) { licensingList.readFromSpreadsheet(spreadSheetFN); } licensingList.addMavenProject(project, currentVersion); licensingList.writeToSpreadsheet(spreadSheetFN); } else if (args[0].equals("--writeLicense3rdParty")) { if (args.length < 4) logger.error( "Missing arguments for option --writeLicense3rdParty. Please provide <licenses.enhanced.tsv> <processModule> <currentVersion> [and <targetDir>] or use the option --help for further information."); String spreadSheetFN = args[1]; String processModule = args[2]; String currentVersion = args[3]; HashMap<String, String> targetDirs = new HashMap<>(); if (args.length > 4) { File targetDir = new File(args[4]); logger.info("scan pom files in direct subdirectories of \"" + targetDir.getPath() + "\" to obtain target locations for 3rd party license files..."); File[] subdirs = targetDir.listFiles((FileFilter) DirectoryFileFilter.DIRECTORY); for (File subdir : subdirs) { File pomFile = new File(subdir.getPath() + File.separator + POM_FN); if (!pomFile.exists()) continue; MavenProject mavenProject; try { mavenProject = Utils.readPom(pomFile); } catch (Exception e) { logger.warn("Could not read from pom file: \"" + pomFile.getPath() + "\" because of " + e.getMessage()); continue; } targetDirs.put(mavenProject.getModel().getArtifactId(), subdir.getAbsolutePath()); } } LicensingList licensingList = new LicensingList(); licensingList.readFromSpreadsheet(spreadSheetFN); if (processModule.toUpperCase().equals("ALL")) { for (String module : licensingList.getNonFixedHeaders()) { try { writeLicense3rdPartyFile(module, licensingList, currentVersion, targetDirs.get(module)); } catch (NoLicenseTemplateSetException e) { logger.error("Could not write file for module \"" + module + "\". There is no template specified for \"" + e.getLicensingObject() + "\". Please add an existing template filename to the column \"" + LicensingObject.ColumnHeader.LICENSE_TEMPLATE.value() + "\" of \"" + spreadSheetFN + "\"."); } } } else { try { writeLicense3rdPartyFile(processModule, licensingList, currentVersion, targetDirs.get(processModule)); } catch (NoLicenseTemplateSetException e) { logger.error("Could not write file for module \"" + processModule + "\". There is no template specified for \"" + e.getLicensingObject() + "\". Please add an existing template filename to the column \"" + LicensingObject.ColumnHeader.LICENSE_TEMPLATE.value() + "\" of \"" + spreadSheetFN + "\"."); } } } else if (args[0].equals("--buildEffectivePom")) { Utils.writeEffectivePom(new File(args[1]), (new File(EFFECTIVE_POM_FN)).getAbsolutePath()); } else if (args[0].equals("--updateTsvWithProjectsInFolder")) { if (args.length < 4) logger.error( "Missing arguments for option --processProjectsInFolder. Please provide <superDirectory> <licenses.stub.tsv> and <currentVersion> or use the option --help for further information."); File directory = new File(args[1]); String spreadSheetFN = args[2]; String currentVersion = args[3]; LicensingList licensingList = new LicensingList(); File f = new File(spreadSheetFN); if (f.exists() && !f.isDirectory()) { licensingList.readFromSpreadsheet(spreadSheetFN); } licensingList.addAll(processProjectsInFolder(directory, currentVersion, false)); licensingList.writeToSpreadsheet(spreadSheetFN); } else if (args[0].equals("--purgeTsv")) { if (args.length < 3) logger.error( "Missing arguments for option --purgeTsv. Please provide <spreadSheetIN.tsv>, <spreadSheetOUT.tsv> and <currentVersion> or use the option --help for further information."); String spreadSheetIN = args[1]; String spreadSheetOUT = args[2]; String currentVersion = args[3]; LicensingList licensingList = new LicensingList(); licensingList.readFromSpreadsheet(spreadSheetIN); licensingList.purge(currentVersion); licensingList.writeToSpreadsheet(spreadSheetOUT); } else if (args[0].equals("--help")) { InputStream in = LicenseUtil.class.getClassLoader().getResourceAsStream(README_PATH); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line; while ((line = reader.readLine()) != null) { System.out.println(line); } } else { logger.error("Unknown parameter: " + args[0] + ". Use --help to get a list of the possible options."); } }
From source file:eu.fbk.dkm.sectionextractor.PageClassMerger.java
public static void main(String args[]) throws IOException { CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger(); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("WikiData ID file").isRequired().withLongOpt("wikidata-id").create("i")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("Airpedia Person file").isRequired().withLongOpt("airpedia").create("a")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Output file") .isRequired().withLongOpt("output").create("o")); CommandLine commandLine = null;/*from w w w.j a va 2 s .co m*/ try { commandLine = commandLineWithLogger.getCommandLine(args); PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps()); } catch (Exception e) { System.exit(1); } String wikiIDFileName = commandLine.getOptionValue("wikidata-id"); String airpediaFileName = commandLine.getOptionValue("airpedia"); String outputFileName = commandLine.getOptionValue("output"); HashMap<Integer, String> wikiIDs = new HashMap<>(); HashSet<Integer> airpediaClasses = new HashSet<>(); List<String> strings; logger.info("Loading file " + wikiIDFileName); strings = Files.readLines(new File(wikiIDFileName), Charsets.UTF_8); for (String line : strings) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\t"); if (parts.length < 2) { continue; } int id; try { id = Integer.parseInt(parts[0]); } catch (Exception e) { continue; } wikiIDs.put(id, parts[1]); } logger.info("Loading file " + airpediaFileName); strings = Files.readLines(new File(airpediaFileName), Charsets.UTF_8); for (String line : strings) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\t"); if (parts.length < 2) { continue; } int id; try { id = Integer.parseInt(parts[0]); } catch (Exception e) { continue; } airpediaClasses.add(id); } logger.info("Saving information"); BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName)); for (int i : wikiIDs.keySet()) { if (!airpediaClasses.contains(i)) { continue; } writer.append(wikiIDs.get(i)).append("\n"); } writer.close(); }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
/** * @param args//from w w w . ja va 2 s .com * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }