gov.nasa.jpl.analytics.util.CommonUtil.java Source code

Java tutorial

Introduction

Here is the source code for gov.nasa.jpl.analytics.util.CommonUtil.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gov.nasa.jpl.analytics.util;

import com.google.gson.Gson;
import gov.nasa.jpl.analytics.model.CdrDumpParam;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.nutch.segment.SegmentMerger;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.TableUtil;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Map;

/**
 * Created by karanjeetsingh on 8/31/16.
 */
public class CommonUtil {

    private static Logger LOG = LoggerFactory.getLogger(CommonUtil.class);
    private static boolean SIMPLE_DATE_FORMAT = true;
    private static JSONParser jsonParser = new JSONParser();
    public static Map<String, String> statusMap = new HashMap<>();

    static {
        statusMap.put("db_fetched", "FETCHED");
    }

    public static String formatTimestamp(String sdate) {
        if (SIMPLE_DATE_FORMAT) {
            String timestamp = null;
            try {
                long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z").parse(ifNullString(sdate))
                        .getTime();
                timestamp = String.valueOf(epoch);
            } catch (ParseException pe) {
                LOG.warn(pe.getMessage());
            }
            return timestamp;
        } else {
            return ifNullString(sdate);
        }
    }

    public static String toSolrTimestamp(Long epoch) {
        SimpleDateFormat out = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        return out.format(epoch);
    }

    public static String reverseUrl(String url) {
        String reversedURLPath = "";
        try {
            String[] reversedURL = TableUtil.reverseUrl(url).split(":");
            reversedURL[0] = reversedURL[0].replace('.', '/');

            reversedURLPath = reversedURL[0] + "/" + hashString(url);
        } catch (MalformedURLException e) {
            LOG.error("Error occurred while reversing the URL " + url);
            e.printStackTrace();
        }
        return reversedURLPath;
    }

    public static String hashString(String str) {
        String hash = "";
        try {
            MessageDigest md = MessageDigest.getInstance("SHA-256");
            md.update(str.getBytes("UTF-8"));
            byte[] digest = md.digest();
            hash = String.format("%064x", new java.math.BigInteger(1, digest)).toUpperCase();
        } catch (NoSuchAlgorithmException e) {
            LOG.error("Not a valid Hash Algorithm for String " + str);
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            LOG.error("Not a valid Encoding for String " + str);
            e.printStackTrace();
        }
        return hash;
    }

    public static HashMap<String, Integer> termFrequency(Iterable<String> terms) {
        HashMap<String, Integer> map = new HashMap();
        for (String term : terms) {
            if (!map.containsKey(term)) {
                map.put(term, 1);
            } else {
                map.put(term, map.get(term) + 1);
            }
        }
        return map;
    }

    public static String getHost(String url) {
        //return InternetDomainName.from(url).topPrivateDomain().toString();
        /*if () {
        return InternetDomainName.from(url).topPrivateDomain().toString();
        } else {
        System.out.println("Omg, It's not a valid URL: " + url);
        return "";
        }*/

        URI uri = null;
        try {
            uri = new URI(url);
        } catch (URISyntaxException e) {
            System.out.println("Omg, It's not a valid URL: " + url);
            // Do Nothing & Return Empty ""
            return "";
        }
        String host = uri.getHost();
        if (host != null) {
            if (host.startsWith("www.")) {
                return host.substring(4);
            } else {
                return host;
            }
        } else {
            System.out.println("Omg, www not present: " + url);
            return "";
        }
    }

    public static void makeSafeDir(String dirPath) throws Exception {
        File dir = new File(dirPath);
        if (!dir.exists()) {
            dir.mkdirs();
        }
    }

    public static void mergeSegments(Path out, Path[] segments, Configuration config) {
        Configuration myConfig = NutchConfiguration.create();
        System.out.println(segments.toString());
        SegmentMerger merger = new SegmentMerger(NutchConfiguration.create());
        try {
            merger.merge(out, segments, false, false, 0);
        } catch (Exception e) {
            System.out.println("Merging Failed");
            e.printStackTrace();
        }
    }

    private static String ifNullString(String value) {
        return (value != null) ? value : "";
    }

    public static JSONObject toJson(String json) throws org.json.simple.parser.ParseException {
        //return (JSONObject) jsonParser.parse(json);
        return (new JSONObject(new Gson().fromJson(json, Map.class)));
    }

    public static String joinString(CdrDumpParam s1, String s2) {
        return s1 + "-" + s2;
    }

    public static void main(String[] args) {
    }

}