Example usage for twitter4j Status getLang

List of usage examples for twitter4j Status getLang

Introduction

In this page you can find the example usage for twitter4j Status getLang.

Prototype

String getLang();

Source Link

Document

Returns the lang of the status text if available.

Usage

From source file:io.rakam.datasource.twitter.TweetProcessor.java

License:Apache License

@Override
public void onStatus(Status status) {
    Map<String, Object> map = new HashMap<>();

    GeoLocation geoLocation = status.getGeoLocation();
    if (geoLocation != null) {
        map.put("latitude", geoLocation.getLatitude());
        map.put("longitude", geoLocation.getLongitude());
    }//  w  w  w  . j  ava2s .c o  m

    map.put("_time", status.getCreatedAt().getTime());
    Place place = status.getPlace();
    if (place != null) {
        map.put("country_code", place.getCountryCode());
        map.put("place", place.getName());
        map.put("place_type", place.getPlaceType());
        map.put("place_id", place.getId());
    }

    User user = status.getUser();
    map.put("_user", user.getId());
    map.put("user_lang", user.getLang());
    map.put("user_created", user.getCreatedAt());
    map.put("user_followers", user.getFollowersCount());
    map.put("user_status_count", user.getStatusesCount());
    map.put("user_verified", user.isVerified());

    map.put("id", status.getId());
    map.put("is_reply", status.getInReplyToUserId() > -1);
    map.put("is_retweet", status.isRetweet());
    map.put("has_media", status.getMediaEntities().length > 0);
    map.put("urls",
            Arrays.stream(status.getURLEntities()).map(URLEntity::getText).collect(Collectors.toList()));
    map.put("hashtags", Arrays.stream(status.getHashtagEntities()).map(HashtagEntity::getText)
            .collect(Collectors.toList()));
    map.put("user_mentions", Arrays.stream(status.getUserMentionEntities()).map(UserMentionEntity::getText)
            .collect(Collectors.toList()));
    map.put("language", "und".equals(status.getLang()) ? null : status.getLang());
    map.put("is_positive", classifier.isPositive(status.getText()));

    Event event = new Event().properties(map).collection(collection);
    buffer.add(event);

    commitIfNecessary();
}

From source file:mapper.TweetDataMapper.java

/**
* Transform a {@link Status} into an {@link Tweet}.
*
* @param status Object to be transformed.
* @return {@link Tweet}.// w w  w  . jav  a2 s.c o  m
*/
@Override
public Tweet transform(Status status) {
    if (status == null) {
        throw new IllegalArgumentException("Cannot transform a null value");
    }
    Tweet tweet = new Tweet();
    tweet.setCreateAt(status.getCreatedAt());
    tweet.setLang(status.getLang());
    if (status.getGeoLocation() != null) {
        tweet.setLat(status.getGeoLocation().getLatitude());
        tweet.setLon(status.getGeoLocation().getLongitude());
    }
    tweet.setReTweetCount(status.getRetweetCount());
    tweet.setText(status.getText());
    return tweet;

}

From source file:nl.utwente.bigdata.bolts.FileOutputBolt.java

License:Apache License

@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
    Status tweet = (Status) tuple.getValueByField("tweet");

    this.pw.println(String.format("%s;%s;%s;%s;%s;%s", tweet.getLang(), tweet.getCreatedAt().toGMTString(),
            tuple.getStringByField("normalized_text"), tuple.getIntegerByField("sentiment"),
            tuple.getStringByField("home"), tuple.getStringByField("away")));
    //   logger.info("Wrote to " + this.f.getAbsolutePath());
    try {//from  w  w  w  .  jav  a 2 s  .co m
        this.writer.flush();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:nl.utwente.bigdata.bolts.FilterLanguageBolt.java

License:Apache License

@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
    String lang = tuple.getStringByField("lang");
    Status status = (Status) tuple.getValueByField("tweet");

    if (Arrays.asList(this.languages).contains(status.getLang())) {
        collector.emit(status.getLang(), tuple.getValues());
    } else {/*from  w ww .  ja va 2 s .  c  o  m*/
        //logger.info(lang + " not found");
    }

}

From source file:nl.utwente.bigdata.bolts.NormalizerBolt.java

License:Apache License

@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
    Status tweet;
    tweet = (Status) tuple.getValueByField("tweet");

    // from: http://stackoverflow.com/questions/1008802/converting-symbols-accent-letters-to-english-alphabet
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    String nfdNormalizedString = "";
    nfdNormalizedString = Normalizer.normalize(tweet.getText(), Normalizer.Form.NFD);

    String normalizedTweet = (String) pattern.matcher(nfdNormalizedString.toLowerCase()).replaceAll("")
            .replace("\n", "").replace("\r", "");
    // Also remove prefixed with rt
    if (!normalizedTweet.startsWith("rt")) {
        collector.emit(new Values(tweet, normalizedTweet, tweet.getLang()));
    }//from ww  w . j a  v a2  s.  c  o  m
}

From source file:nlptexthatespeechdetection.dataCollection.GetTwitterDoc2VecTrainingData.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    File dir = new File(folderName);
    if (!dir.exists())
        dir.mkdir();/*ww w .  jav a2 s . co m*/
    if (!dir.isDirectory()) {
        System.out.println(folderName + " is not a directory");
        return;
    }

    System.out.println("number of tweets required: ");
    int numTweetsRequired = (new Scanner(System.in)).nextInt();

    String path = folderName + "/" + fileName;
    File file = new File(path);
    if (!file.exists())
        file.createNewFile();
    FileWriter writer = new FileWriter(path, true);

    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
    StatusListener listener = new StatusListener() {
        int numTweets = 0;

        @Override
        public void onStatus(Status status) {
            if (status.getLang().equals("in")) {
                try {
                    String statusText = status.getText();
                    writer.write("\n");
                    writer.write(statusText);
                    numTweets++;
                    System.out.println("numTweets: " + numTweets);

                    if (numTweets >= numTweetsRequired) {
                        writer.close();
                        System.exit(0);
                    }
                } catch (IOException ex) {
                    Logger.getLogger(GetTwitterDoc2VecTrainingData.class.getName()).log(Level.SEVERE, null, ex);
                }

            }
        }

        @Override
        public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            //                System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
        }

        @Override
        public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
        }

        @Override
        public void onScrubGeo(long userId, long upToStatusId) {
            System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
        }

        @Override
        public void onStallWarning(StallWarning warning) {
            System.out.println("Got stall warning:" + warning);
        }

        @Override
        public void onException(Exception ex) {
            ex.printStackTrace();
        }
    };

    twitterStream.addListener(listener);

    FilterQuery filterQuery = new FilterQuery();
    filterQuery.track(new String[] { "a", "i", "u", "e", "o" });
    filterQuery.language("in");
    twitterStream.filter(filterQuery);

}

From source file:nlptexthatespeechdetection.dataCollection.TwitterStreamingAnnotator.java

public static void main(String[] args) throws NotDirectoryException {
    Scanner sc = new Scanner(System.in);
    System.out.println("Nama Anda (sebagai anotator): ");
    String namaAnotator = sc.nextLine();
    AnnotatedDataFolder annotatedDataFolder = new AnnotatedDataFolder(dataFolderName);

    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
    StatusListener listener = new StatusListener() {
        @Override/*  www  .j  av a2s.  co m*/
        public void onStatus(Status status) {
            if (status.getLang().equals("in")) {
                System.out.println();
                System.out.println();
                System.out.println("=======ANOTASI=======");
                System.out.println("status: " + status.getText());
                System.out.println();
                System.out.println("is this a hate speech?(y/n. any other if you do not know)");
                String annotatorResponse = sc.nextLine().trim().toLowerCase();

                Date date = new Date();
                String dateString = dateFormat.format(date);

                try {
                    if (annotatorResponse.equals("y")) {
                        String filePath = annotatedDataFolder.saveHateSpeechString(namaAnotator, dateString,
                                status.getText());
                        System.out.println("Saved data to: " + filePath);
                    } else if (annotatorResponse.equals("n")) {
                        String filePath = annotatedDataFolder.saveNotHateSpeechString(namaAnotator, dateString,
                                status.getText());
                        System.out.println("Saved data to: " + filePath);
                    }
                    System.out.println("thank you!");
                } catch (FileNotFoundException ex) {
                    ex.printStackTrace();
                } catch (IOException ex) {
                    Logger.getLogger(TwitterStreamingAnnotator.class.getName()).log(Level.SEVERE, null, ex);
                }

            } else {
                System.out.println("ignoring non-indonesian tweet");
            }
            //                if (status.getGeoLocation() != null) {
            //                    System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " $ " + status.getGeoLocation().toString());
            //                }
            //                if (status.getLang().equals("id")) {
            //                    System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString()));
            //                }
        }

        @Override
        public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            //                System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
        }

        @Override
        public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
        }

        @Override
        public void onScrubGeo(long userId, long upToStatusId) {
            System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
        }

        @Override
        public void onStallWarning(StallWarning warning) {
            System.out.println("Got stall warning:" + warning);
        }

        @Override
        public void onException(Exception ex) {
            ex.printStackTrace();
        }
    };

    twitterStream.addListener(listener);

    FilterQuery filterQuery = new FilterQuery();
    filterQuery.track(new String[] { "a", "i", "u", "e", "o" });
    filterQuery.language("in");
    twitterStream.filter(filterQuery);
}

From source file:nlptexthatespeechdetection.NLPTextHateSpeechDetection.java

/**
 * @param args the command line arguments
 *///from  w  w w  .j av  a 2 s . c  o  m
public static void main(String[] args) throws TwitterException, NotDirectoryException, IOException {
    HateSpeechClassifier1 classifier = new HateSpeechClassifier1();
    AnnotatedDataFolder data = new AnnotatedDataFolder("data");
    boolean overSampling = false;
    classifier.train(data.getDateSortedLabeledData(overSampling));

    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
    StatusListener listener = new StatusListener() {
        int numHateSpeech = 0;
        int numTweets = 0;

        @Override
        public void onStatus(Status status) {
            if (status.getLang().equals("in")) {
                numTweets++;
                if (classifier.isHateSpeech(status.getText(), 0.5)) {
                    System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * "
                            + status.getId() + " # " + status.getLang() + " $ "
                            + (status.getGeoLocation() == null ? "NULLGEO"
                                    : status.getGeoLocation().toString()));
                    System.out.println();
                    System.out.println("lang: " + status.getLang());
                    System.out.println("number of detected hate speech: " + numHateSpeech);
                    System.out.println("total number of streamed tweets: " + numTweets);
                    System.out.println();
                    System.out.println();
                    numHateSpeech++;
                }
            } else {
                System.out.println("ignoring non-Indonesian tweet");
            }
            //                if (status.getGeoLocation() != null) {
            //                    System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " $ " + status.getGeoLocation().toString());
            //                }
            //                if (status.getLang().equals("id")) {
            //                    System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString()));
            //                }
        }

        @Override
        public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            //                System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
        }

        @Override
        public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
        }

        @Override
        public void onScrubGeo(long userId, long upToStatusId) {
            System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
        }

        @Override
        public void onStallWarning(StallWarning warning) {
            System.out.println("Got stall warning:" + warning);
        }

        @Override
        public void onException(Exception ex) {
            ex.printStackTrace();
        }
    };

    twitterStream.addListener(listener);

    FilterQuery filterQuery = new FilterQuery();
    filterQuery.track(new String[] { "a", "i", "u", "e", "o" });
    filterQuery.language("in");
    twitterStream.filter(filterQuery);

    twitterStream.sample();
}

From source file:org.apache.solr.handler.dataimport.TwitterEntityProcessor.java

License:Apache License

@Override
public Map<String, Object> nextRow() {

    Map<String, Object> row = new HashMap<>();

    if (twitter == null || query == null)
        return null;

    try {/*  w w  w  .j  av  a 2  s  .  c  om*/
        if (results == null) {
            results = twitter.search(query);
            if (results == null || results.getCount() == 0)
                return null;
        }
        if (tweets == null)
            tweets = (ArrayList<Status>) results.getTweets();

        Status tweet = null;
        if (index < tweets.size()) {
            tweet = (Status) tweets.get(index++);
        } else {
            query = results.nextQuery();
            if (query != null) {
                results = twitter.search(query);
                if (results == null || results.getCount() == 0)
                    return null;
                tweets = (ArrayList<Status>) results.getTweets();
                index = 0;
                tweet = (Status) tweets.get(index++);
            }
        }
        if (tweet == null)
            return null;

        // id
        row.put(MESSAGE_ID, tweet.getId());

        // lang
        row.put(MESSAGE_LANG, tweet.getLang());

        // user
        User user = tweet.getUser();

        // name
        row.put(MESSAGE_USER, user.getName());

        // pseudo
        row.put(MESSAGE_PSEUDO, tweet.getUser().getScreenName());

        // text
        row.put(MESSAGE_TEXT, tweet.getText());

        // date
        Date date = tweet.getCreatedAt();
        row.put(MESSAGE_DATE, date.toString());

    } catch (TwitterException e) {
        e.printStackTrace();
        return null;
    }

    return row;
}

From source file:org.bireme.interop.toJson.Twitter2Json.java

License:Open Source License

private JSONObject getDocument(final Status status) {
    assert status != null;

    final JSONObject obj = new JSONObject();
    final GeoLocation geo = status.getGeoLocation();
    final Place place = status.getPlace();
    final User user = status.getUser();

    obj.put("createdAt", status.getCreatedAt()).put("id", status.getId()).put("lang", status.getLang());
    if (geo != null) {
        obj.put("location_latitude", geo.getLatitude()).put("location_longitude", geo.getLongitude());
    }/*w  w  w  .ja v a2s.co  m*/
    if (place != null) {
        obj.put("place_country", place.getCountry()).put("place_fullName", place.getFullName())
                .put("place_id", place.getId()).put("place_name", place.getName())
                .put("place_type", place.getPlaceType()).put("place_streetAddress", place.getStreetAddress())
                .put("place_url", place.getURL());
    }
    obj.put("source", status.getSource()).put("text", status.getText());
    if (user != null) {
        obj.put("user_description", user.getDescription()).put("user_id", user.getId())
                .put("user_lang", user.getLang()).put("user_location", user.getLocation())
                .put("user_name", user.getName()).put("user_url", user.getURL());
    }
    obj.put("isTruncated", status.isTruncated()).put("isRetweet", status.isRetweet());

    return obj;
}