Java tutorial
/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; import java.net.*; import java.io.*; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.FileUtils; /** * class for diffbot article apiu usage * @author Themistoklis Mavridis */ public class Diffbot { /** * url connection */ public HttpURLConnection httpCon; /** * Method to get the words recognized by Diffbot as important in given urls * @param links the urls to analyzes * @param directory the directory to save the output * @param config_path the configuration path to get the diffbot key * @return a list of the words */ public List<String> compute(String[] links, String directory, String config_path) { List<String> wordList = null; try { URL diff_url = null; String stringtosplit = ""; String token = GetToken(config_path); for (String link : links) { if (!(link == null)) { diff_url = new URL( "http://api.diffbot.com/v2/article?token=" + token + "&fields=tags,meta&url=" + link); APIconn apiconn = new APIconn(); String line = apiconn.connect(diff_url); JSONparsing jp = new JSONparsing(); stringtosplit = jp.DiffbotParsing(line); if (!(stringtosplit == null) && (!(stringtosplit.equalsIgnoreCase("")))) { stringtosplit = stringtosplit.replaceAll("[\\W&&[^\\s]]", ""); if (!(stringtosplit == null) && (!(stringtosplit.equalsIgnoreCase("")))) { String[] tokenizedTerms = stringtosplit.split("\\W+"); //to get individual terms for (String tokenizedTerm : tokenizedTerms) { if (!(tokenizedTerm == null) && (!(tokenizedTerm.equalsIgnoreCase("")))) { wordList.add(tokenizedTerm); } } } } } } File file_words = new File(directory + "words.txt"); FileUtils.writeLines(file_words, wordList); return wordList; } catch (MalformedURLException ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); return wordList; } catch (IOException ex) { Logger.getLogger(Diffbot.class.getName()).log(Level.SEVERE, null, ex); return wordList; } } /** * Method to the token of diffbot * @param config_path the configuration path to get the diffbot key * @return the token in a string */ public String GetToken(String config_path) { Path input_path = Paths.get(config_path); DataManipulation getfiles = new DataManipulation();//class responsible for the extraction of paths Collection<File> inputs_files;//array to include the paths of the txt files inputs_files = getfiles.getinputfiles(input_path.toString(), "txt");//method to retrieve all the path of the input documents List<String> tokenList = new ArrayList<>(); ReadInput ri = new ReadInput(); for (File input : inputs_files) { if (input.getName().contains("diffbottoken")) { tokenList = ri.readLinesConfig(input); } } if (tokenList.size() > 0) { return tokenList.get(0); } else { String output = ""; return output; } } }