mobisocial.musubi.util.OGUtil.java Source code

Java tutorial

Introduction

Here is the source code for mobisocial.musubi.util.OGUtil.java

Source

/*
 * Copyright 2012 The Stanford MobiSocial Laboratory
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package mobisocial.musubi.util;

import java.io.ByteArrayOutputStream;
import java.net.URL;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;

import android.graphics.Bitmap;
import android.graphics.Bitmap.CompressFormat;
import android.graphics.BitmapFactory;
import android.util.Log;

public class OGUtil {
    public static class OGData {
        String mTitle;
        String mUrl;
        byte[] mImage;
        String mDescription;
        String mMimeType;
    }

    private static final String TAG = "OGUtil";
    //TODO: these could be better, unit tests as well
    private static Pattern sTitleRegex = Pattern.compile("<\\s*title\\s*>([^<]+)<\\s*/title\\s*>",
            Pattern.CASE_INSENSITIVE);
    private static Pattern sImageRegex = Pattern.compile("<\\s*img\\s+[^>]+>", Pattern.CASE_INSENSITIVE);
    private static Pattern sMetaRegex = Pattern.compile("<\\s*meta\\s+[^>]+>", Pattern.CASE_INSENSITIVE);
    private static Pattern sPropertyOfMeta = Pattern.compile("\\b(?:name|property)\\s*=\\s*(\"[^\"]+\"|'[^']+')",
            Pattern.CASE_INSENSITIVE);
    private static Pattern sContentOfMeta = Pattern.compile("\\bcontent\\s*=\\s*(\"[^\"]+\"|'[^']+')",
            Pattern.CASE_INSENSITIVE);
    private static Pattern sSrcOfImage = Pattern.compile("\\bsrc\\s*=\\s*(\"[^\"]+\"|'[^']+')",
            Pattern.CASE_INSENSITIVE);

    public static OGData getOrGuess(String url) {
        DefaultHttpClient hc = new DefaultHttpClient();
        HttpResponse res;
        try {
            HttpGet hg = new HttpGet(url);
            res = hc.execute(hg);
        } catch (Exception e) {
            Log.e(TAG, "unable to fetch page to get og tags", e);
            return null;
        }
        String location = url;
        //TODO: if some kind of redirect magic happened, then
        //make the location match that

        OGData og = new OGData();
        HttpEntity he = res.getEntity();
        Header content_type = he.getContentType();
        //TODO: check the content directly if they forget the type header
        if (content_type == null || content_type.getValue() == null) {
            Log.e(TAG, "page missing content type ..abandoning: " + url);
            return null;
        }
        og.mMimeType = content_type.getValue();
        //just make a thumbnail if the shared item is an image
        if (og.mMimeType.startsWith("image/")) {
            Bitmap b;
            try {
                b = BitmapFactory.decodeStream(he.getContent());
            } catch (Exception e) {
                return null;
            }
            //TODO: scaling
            int w = b.getWidth();
            int h = b.getHeight();
            if (w > h) {
                h = h * 200 / w;
                w = 200;
            } else {
                w = w * 200 / h;
                h = 200;
            }

            Bitmap b2 = Bitmap.createScaledBitmap(b, w, h, true);
            b.recycle();
            b = b2;
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            b.compress(CompressFormat.PNG, 100, baos);
            og.mImage = baos.toByteArray();
            b.recycle();
            return og;
        }
        //if its not html, we can't extract more details, the caller
        //should rely on what they already know.
        if (!og.mMimeType.startsWith("text/html") && !og.mMimeType.startsWith("application/xhtml")) {
            Log.e(TAG, "shared content is not a known type for meta data processing " + og.mMimeType);
            return og;
        }

        String html;
        try {
            html = IOUtils.toString(he.getContent());
        } catch (Exception e) {
            Log.e(TAG, "failed to read html content", e);
            return og;
        }

        Matcher m = sTitleRegex.matcher(html);
        if (m.find()) {
            og.mTitle = StringEscapeUtils.unescapeHtml4(m.group(1));

        }
        m = sMetaRegex.matcher(html);
        int offset = 0;
        String raw_description = null;
        while (m.find(offset)) {
            try {
                String meta_tag = m.group();
                Matcher mp = sPropertyOfMeta.matcher(meta_tag);
                if (!mp.find())
                    continue;
                String type = mp.group(1);
                type = type.substring(1, type.length() - 1);
                Matcher md = sContentOfMeta.matcher(meta_tag);
                if (!md.find())
                    continue;
                String data = md.group(1);
                //remove quotes
                data = data.substring(1, data.length() - 1);
                data = StringEscapeUtils.unescapeHtml4(data);
                if (type.equalsIgnoreCase("og:title")) {
                    og.mTitle = data;
                } else if (type.equalsIgnoreCase("og:image")) {
                    HttpResponse resi;
                    try {
                        HttpGet hgi = new HttpGet(data);
                        resi = hc.execute(hgi);
                    } catch (Exception e) {
                        Log.e(TAG, "unable to fetch og image url", e);
                        continue;
                    }
                    HttpEntity hei = resi.getEntity();
                    if (!hei.getContentType().getValue().startsWith("image/")) {
                        Log.e(TAG, "image og tag points to non image data" + hei.getContentType().getValue());
                    }
                    try {
                        Bitmap b;
                        try {
                            b = BitmapFactory.decodeStream(hei.getContent());
                        } catch (Exception e) {
                            return null;
                        }
                        //TODO: scaling
                        int w = b.getWidth();
                        int h = b.getHeight();
                        if (w > h) {
                            h = h * Math.min(200, w) / w;
                            w = Math.min(200, w);
                        } else {
                            w = w * Math.min(200, h) / h;
                            h = Math.min(200, h);
                        }
                        Bitmap b2 = Bitmap.createScaledBitmap(b, w, h, true);
                        b.recycle();
                        b = b2;
                        ByteArrayOutputStream baos = new ByteArrayOutputStream();
                        b.compress(CompressFormat.PNG, 100, baos);
                        b.recycle();
                        og.mImage = baos.toByteArray();
                    } catch (Exception e) {
                        Log.e(TAG, "failed to fetch image for og", e);
                        continue;
                    }
                } else if (type.equalsIgnoreCase("description")) {
                    raw_description = data;
                } else if (type.equalsIgnoreCase("og:description")) {
                    og.mDescription = data;
                } else if (type.equalsIgnoreCase("og:url")) {
                    og.mUrl = data;
                }
            } finally {
                offset = m.end();
            }
        }
        HashSet<String> already_fetched = new HashSet<String>();
        if (og.mImage == null) {
            int max_area = 0;
            m = sImageRegex.matcher(html);
            int img_offset = 0;
            while (m.find(img_offset)) {
                try {
                    String img_tag = m.group();
                    Matcher ms = sSrcOfImage.matcher(img_tag);
                    if (!ms.find())
                        continue;
                    String img_src = ms.group(1);
                    img_src = img_src.substring(1, img_src.length() - 1);
                    img_src = StringEscapeUtils.unescapeHtml4(img_src);
                    //don't fetch an image twice (like little 1x1 images)
                    if (already_fetched.contains(img_src))
                        continue;
                    already_fetched.add(img_src);
                    HttpResponse resi;
                    try {
                        HttpGet hgi = new HttpGet(new URL(new URL(location), img_src).toString());
                        resi = hc.execute(hgi);
                    } catch (Exception e) {
                        Log.e(TAG, "unable to fetch image url for biggest image search" + img_src, e);
                        continue;
                    }
                    HttpEntity hei = resi.getEntity();
                    if (hei == null) {
                        Log.w(TAG, "image missing en ..trying entity response: " + url);
                        continue;
                    }
                    Header content_type_image = hei.getContentType();
                    if (content_type_image == null || content_type_image.getValue() == null) {
                        Log.w(TAG, "image missing content type ..trying anyway: " + url);
                    }
                    if (!content_type_image.getValue().startsWith("image/")) {
                        Log.w(TAG, "image tag points to non image data " + hei.getContentType().getValue() + " "
                                + img_src);
                    }
                    try {
                        Bitmap b;
                        try {
                            b = BitmapFactory.decodeStream(hei.getContent());
                        } catch (Exception e) {
                            return null;
                        }
                        //TODO: scaling
                        int w = b.getWidth();
                        int h = b.getHeight();
                        if (w * h <= max_area) {
                            continue;
                        }
                        if (w < 32 || h < 32) {
                            //skip dinky crap
                            continue;
                        }
                        if (w > h) {
                            h = h * Math.min(200, w) / w;
                            w = Math.min(200, w);
                        } else {
                            w = w * Math.min(200, h) / h;
                            h = Math.min(200, h);
                        }
                        Bitmap b2 = Bitmap.createScaledBitmap(b, w, h, true);
                        b.recycle();
                        b = b2;
                        ByteArrayOutputStream baos = new ByteArrayOutputStream();
                        b.compress(CompressFormat.PNG, 100, baos);
                        og.mImage = baos.toByteArray();
                        b.recycle();
                        max_area = w * h;
                    } catch (Exception e) {
                        Log.e(TAG, "failed to fetch image for og", e);
                        continue;
                    }
                } finally {
                    img_offset = m.end();
                }
            }

        }
        if (og.mDescription == null)
            og.mDescription = raw_description;
        return og;
    }
}