Java tutorial
package de.iisys.schub.processMining; import java.io.FileNotFoundException; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; import org.json.JSONObject; import de.iisys.schub.processMining.activities.ActivityController; import de.iisys.schub.processMining.activities.model.ProcessCycle; import de.iisys.schub.processMining.activities.network.LiferayConnector; import de.iisys.schub.processMining.activities.network.ShindigRESTConnector; import de.iisys.schub.processMining.similarity.AlgoController; /** * Main class to start the cosine similarity calculation * of a main document (.docx) with chapters and other documents. * * This application was written * for the project "Social Collaboration Hub" (www.sc-hub.de) * at the Institute of Information Systems (www.iisys.de), * which is part of Hof University, Germany. * * * LICENSE: * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @author Christian Ochsenkhn */ public class MiningMain { /** * Main method * @param args * args[0]: Version * 0 = Save all cosine similarities between mainDoc and compareDocs to an output file (default) * 1 = Save only similarities between similar docs (cosSim > percentile) to an output file * args[1]: path to main document (default: mainDoc.docx) * args[2]: path to folder of documents to compare (default: docs) * @throws FileNotFoundException * @throws IOException */ public static void main(String args[]) throws FileNotFoundException, IOException { /** * For .jar use: */ int version = 0; if (args.length > 0 && !args[0].isEmpty()) version = Integer.parseInt(args[0]); String mainDocPath = "mainDoc.docx"; if (args.length > 1 && !args[1].isEmpty()) mainDocPath = args[1]; String docsPath = "docs"; if (args.length > 2 && !args[2].isEmpty()) docsPath = args[2]; //test: // test_ProcessMining(); // mainDocPath = "TextMiningTest_Highlights der IFA 2015.docx"; // String nuxeoDocId = "0639a687-01e5-49dd-910c-7040111d80a2"; // version = 1; test_LiferayConnection(); // CamundaConnector.testConnection(); // ElasticSearchConnector.test(); /* AlgoController algo = new AlgoController(); if(version==0) algo.pipelineCosineSimilarity(mainDocPath, docsPath); else if(version==1) algo.pipelineSimilarDocs(mainDocPath, docsPath); // else if(version==2) // algo.pipelineNuxeoSimilarDocs(nuxeoDocId, docsPath); */ } private static void test_LiferayConnection() { LiferayConnector life = new LiferayConnector(); String blogEntryId = "51363"; System.out.println("Connecting to Liferay..."); JSONObject output; try { // output = life.getBlogEntry(blogEntryId); // output = life.getWikiPage("55605"); // System.out.println(output.toString()); life.getWebContentTemplate("25412", "20181"); /* System.out.println("Title: "+output.getString("title")); System.out.println("\n\n ############## \n"+"Html-Content: "+output.getString("content")); System.out.println("\n\n ############## \n"+"Content: "+TextParser.parseHtml(output.getString("content"))); */ } catch (IOException e) { e.printStackTrace(); } } private static void test_ProcessMining() { DateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm"); List<ProcessCycle> cycles = new ArrayList<ProcessCycle>(); // cycle 1 ----- List<JSONObject> activities = new ArrayList<JSONObject>(); JSONObject a1_1 = new JSONObject(); a1_1.put("actor", getActor("baerbel", "Brbel Bitte")); a1_1.put("generator", getGenerator("application:liferay-wikis", "Liferay Wikis")); a1_1.put("id", "activityentry:315"); a1_1.put("title", "Brbel Bitte hat die Wikiseite Huawei Watch kostet so viel wie Apple Watch erstellt."); a1_1.put("verb", "post"); a1_1.put("published", "2015-11-13T10:37:41.660Z"); a1_1.put("object", new JSONObject().put("id", "liferay-wiki-page:55604").put("displayName", "") .put("objectType", "liferay-wiki-page").put("url", "").put("content", "version 1.0")); activities.add(a1_1); JSONObject a1_2 = new JSONObject(); a1_2.put("actor", getActor("anna", "Anna Alster")); a1_2.put("generator", getGenerator("application:liferay-blogs", "Liferay Blogs")); a1_2.put("id", "activityentry:316"); a1_2.put("title", "Anna Alster hat den Blogeintrag target erstellt."); a1_2.put("verb", "post"); a1_2.put("published", "2015-11-13T11:11:44.359Z"); a1_2.put("object", new JSONObject().put("id", "liferay-blog-entry:55701").put("objectType", "liferay-blog-entry")); activities.add(a1_2); JSONObject a1_3 = new JSONObject(); a1_3.put("actor", getActor("anna", "Anna Alster")); a1_3.put("generator", getGenerator("application:liferay-messageboards", "Liferay Message Boards")); a1_3.put("id", "activityentry:317"); a1_3.put("title", "Anna Alster hat den Forenthread target erstellt."); a1_3.put("verb", "add"); a1_3.put("published", "2015-11-13T11:36:01.817Z"); a1_3.put("object", new JSONObject().put("id", "liferay-message-board-entry:55717").put("objectType", "liferay-message-board-entry")); activities.add(a1_3); JSONObject a1_31 = new JSONObject(); a1_31.put("actor", getActor("anna", "Anna Alster")); a1_31.put("generator", getGenerator("nuxeo", "Nuxeo")); a1_31.put("id", "activityentry:335"); a1_31.put("title", "Dokument erstellt"); a1_31.put("verb", "add"); a1_31.put("published", "2015-11-19T13:31:15.518Z"); a1_31.put("object", new JSONObject().put("id", "25ede30f-5c5e-457b-9d84-58ef14bf73e5").put("displayName", "") .put("objectType", "Document").put("url", "") .put("content", "type: File\nname: Vernetzt und mobil ins i")); activities.add(a1_31); JSONObject a1_4 = new JSONObject(); a1_4.put("actor", getActor("zoltan", "Zoltan Zorn")); a1_4.put("generator", getGenerator("application:liferay-journal", "Liferay Journal")); a1_4.put("id", "activityentry:319"); a1_4.put("title", "Zoltan Zorn hat den Webcontent-Artikel ... erstellt."); a1_4.put("verb", "add"); a1_4.put("published", "2015-11-13T13:04:14.550Z"); a1_4.put("object", new JSONObject().put("id", "liferay-journal-entry:55735").put("objectType", "liferay-journal-entry")); activities.add(a1_4); // activities which are NOT similar for (int i = 0; i < 24; i++) { JSONObject temp = new JSONObject(); temp.put("actor", getActor("anna", "Anna Alster")); temp.put("generator", getGenerator("application:liferay-blogs", "Liferay Blogs")); temp.put("title", "Anna Alster hat den Blogeintrag target erstellt."); temp.put("verb", "post"); temp.put("published", "2015-11-13T11:" + i + ":44.359Z"); temp.put("object", new JSONObject().put("id", "liferay-blog-entry:57215").put("objectType", "liferay-blog-entry")); activities.add(temp); } // Doc: Projektvorschlag - Smarte Uhren vernetzen.docx ProcessCycle cycle1 = new ProcessCycle("a8953f85-d81c-4cdc-b701-9b653d0008e1", "Project Proposal", activities); cycle1.addUserId("baerbel"); cycle1.addUserId("anna"); try { cycle1.setStartDate(df.parse("2015-07-13 12:12")); cycle1.setEndDate(df.parse("2015-12-11 13:45")); } catch (ParseException e) { e.printStackTrace(); } cycles.add(cycle1); // cycle 2 ----- List<JSONObject> activities2 = new ArrayList<JSONObject>(); JSONObject a2_11 = new JSONObject(); a2_11.put("actor", getActor("baerbel", "Brbel Bitte")); a2_11.put("generator", getGenerator("application:liferay-wikis", "Liferay Wikis")); a2_11.put("id", "activityentry:333"); a2_11.put("title", "Brbel Bitte hat die Wikiseite Telekom erwartet Ablsung der klassischen SIM-Karte fr 2016 erstellt."); a2_11.put("verb", "post"); a2_11.put("published", "2015-11-18T14:12:00.421Z"); a2_11.put("object", new JSONObject().put("id", "liferay-wiki-page:56923").put("objectType", "liferay-wiki-page")); activities2.add(a2_11); JSONObject a2_12 = new JSONObject(); a2_12.put("actor", getActor("erika", "Erika Ernst")); a2_12.put("generator", getGenerator("application:liferay-messageboards", "Liferay Message Boards")); a2_12.put("id", "activityentry:337"); a2_12.put("title", "Erika Ernst hat den Forenthread target erstellt."); a2_12.put("verb", "add"); a2_12.put("published", "2015-11-20T10:06:01.624Z"); a2_12.put("object", new JSONObject().put("id", "liferay-message-board-entry:57348").put("objectType", "liferay-message-board-entry")); activities2.add(a2_12); JSONObject a2_21 = new JSONObject(); a2_21.put("actor", getActor("anna", "Anna Alster")); a2_21.put("generator", getGenerator("application:liferay-blogs", "Liferay Blogs")); a2_21.put("id", "activityentry:328"); a2_21.put("title", "Anna Alster hat den Blogeintrag target erstellt."); a2_21.put("verb", "post"); a2_21.put("published", "2015-11-17T15:01:33.031Z"); a2_21.put("object", new JSONObject().put("id", "liferay-blog-entry:56353").put("objectType", "liferay-blog-entry")); activities2.add(a2_21); JSONObject a2_22 = new JSONObject(); a2_22.put("actor", getActor("anna", "Anna Alster")); a2_22.put("generator", getGenerator("application:liferay-blogs", "Liferay Blogs")); a2_22.put("id", "activityentry:329"); a2_22.put("title", "Anna Alster hat den Blogeintrag target erstellt."); a2_22.put("verb", "post"); a2_22.put("published", "2015-11-17T15:07:31.825Z"); a2_22.put("object", new JSONObject().put("id", "liferay-blog-entry:56362").put("objectType", "liferay-blog-entry")); activities2.add(a2_22); JSONObject a2_31 = new JSONObject(); a2_31.put("actor", getActor("zoltan", "Zoltan Zorn")); a2_31.put("generator", getGenerator("application:liferay-messageboards", "Liferay Message Boards")); a2_31.put("id", "activityentry:330"); a2_31.put("title", "Zoltan Zorn hat den Forenthread target erstellt."); a2_31.put("verb", "add"); a2_31.put("published", "2015-11-17T15:29:52.109Z"); a2_31.put("object", new JSONObject().put("id", "liferay-message-board-entry:56372").put("objectType", "liferay-message-board-entry")); activities2.add(a2_31); JSONObject a2_32 = new JSONObject(); a2_32.put("actor", getActor("erika", "Erika Ernst")); a2_32.put("generator", getGenerator("application:liferay-blogs", "Liferay Blogs")); a2_32.put("id", "activityentry:332"); a2_32.put("title", "Erika Ernst hat den Blogeintrag target erstellt."); a2_32.put("verb", "post"); a2_32.put("published", "2015-11-17T15:36:11.247Z"); a2_32.put("object", new JSONObject().put("id", "liferay-blog-entry:56395").put("objectType", "liferay-blog-entry")); activities2.add(a2_32); JSONObject a2_41 = new JSONObject(); a2_41.put("actor", getActor("baerbel", "Brbel Bitte")); a2_41.put("generator", getGenerator("application:liferay-wikis", "Liferay Wikis")); a2_41.put("id", "activityentry:339"); a2_41.put("title", "Brbel Bitte hat die Wikiseite Kein anderes Smartphone lsst sich so leicht reparieren wie das Fairphone 2 erstellt."); a2_41.put("verb", "post"); a2_41.put("published", "2015-11-20T10:48:26.810Z"); a2_41.put("object", new JSONObject().put("id", "liferay-wiki-page:57360").put("objectType", "liferay-wiki-page")); activities2.add(a2_41); JSONObject a2_m1 = new JSONObject(); a2_m1.put("actor", getActor("baerbel", "Brbel Bitte")); a2_m1.put("generator", getGenerator("shindig-socialmessaging", "Social Messenger")); a2_m1.put("id", "activityentry:345"); a2_m1.put("title", "Fragen zur eSim-Karte: Hallo Zoltan,\ndu kennst ..."); a2_m1.put("verb", "send"); a2_m1.put("published", "2015-11-26T13:18:14.700Z"); a2_m1.put("object", new JSONObject().put("id", "messages:22").put("objectType", "message")); a2_m1.put("target", new JSONObject().put("id", "zoltan").put("objectType", "person")); activities2.add(a2_m1); JSONObject a2_m2 = ShindigRESTConnector.getActivity("anna", "activityentry:346"); // social message if (a2_m2 != null) activities2.add(a2_m2); else System.out.println("Error loading a2_m2"); JSONObject a2_m3 = ShindigRESTConnector.getActivity("erika", "activityentry:347"); // social message if (a2_m3 != null) activities2.add(a2_m3); else System.out.println("Error loading a2_m3"); // activities which are NOT similar for (int i = 0; i < 24; i++) { JSONObject temp = new JSONObject(); temp.put("actor", getActor("anna", "Anna Alster")); temp.put("generator", getGenerator("application:liferay-blogs", "Liferay Blogs")); temp.put("title", "Anna Alster hat den Blogeintrag target erstellt."); temp.put("verb", "post"); temp.put("published", "2015-11-13T11:" + i + ":44.359Z"); temp.put("object", new JSONObject().put("id", "liferay-blog-entry:57215").put("objectType", "liferay-blog-entry")); activities2.add(temp); } ProcessCycle cycle2 = new ProcessCycle("08257161-2e56-4299-80da-d41b77968beb", "Project Proposal", activities2); cycle2.addUserId("baerbel"); cycle2.addUserId("anna"); try { cycle2.setStartDate(df.parse("2015-10-25 12:32")); cycle2.setEndDate(df.parse("2015-12-11 13:45")); } catch (ParseException e) { e.printStackTrace(); } cycles.add(cycle2); ActivityController controller = new ActivityController(cycles); controller.startPipeline(); } private static JSONObject getActor(String id, String name) { JSONObject actor = new JSONObject(); actor.put("id", id); actor.put("displayName", name); actor.put("objectType", "person"); return actor; } private static JSONObject getGenerator(String id, String name) { JSONObject gen = new JSONObject(); gen.put("id", id); gen.put("displayName", name); gen.put("objectType", "application"); return gen; } }