List of usage examples for twitter4j Status getLang
String getLang();
From source file:io.rakam.datasource.twitter.TweetProcessor.java
License:Apache License
@Override public void onStatus(Status status) { Map<String, Object> map = new HashMap<>(); GeoLocation geoLocation = status.getGeoLocation(); if (geoLocation != null) { map.put("latitude", geoLocation.getLatitude()); map.put("longitude", geoLocation.getLongitude()); }// w w w . j ava2s .c o m map.put("_time", status.getCreatedAt().getTime()); Place place = status.getPlace(); if (place != null) { map.put("country_code", place.getCountryCode()); map.put("place", place.getName()); map.put("place_type", place.getPlaceType()); map.put("place_id", place.getId()); } User user = status.getUser(); map.put("_user", user.getId()); map.put("user_lang", user.getLang()); map.put("user_created", user.getCreatedAt()); map.put("user_followers", user.getFollowersCount()); map.put("user_status_count", user.getStatusesCount()); map.put("user_verified", user.isVerified()); map.put("id", status.getId()); map.put("is_reply", status.getInReplyToUserId() > -1); map.put("is_retweet", status.isRetweet()); map.put("has_media", status.getMediaEntities().length > 0); map.put("urls", Arrays.stream(status.getURLEntities()).map(URLEntity::getText).collect(Collectors.toList())); map.put("hashtags", Arrays.stream(status.getHashtagEntities()).map(HashtagEntity::getText) .collect(Collectors.toList())); map.put("user_mentions", Arrays.stream(status.getUserMentionEntities()).map(UserMentionEntity::getText) .collect(Collectors.toList())); map.put("language", "und".equals(status.getLang()) ? null : status.getLang()); map.put("is_positive", classifier.isPositive(status.getText())); Event event = new Event().properties(map).collection(collection); buffer.add(event); commitIfNecessary(); }
From source file:mapper.TweetDataMapper.java
/** * Transform a {@link Status} into an {@link Tweet}. * * @param status Object to be transformed. * @return {@link Tweet}.// w w w . jav a2 s.c o m */ @Override public Tweet transform(Status status) { if (status == null) { throw new IllegalArgumentException("Cannot transform a null value"); } Tweet tweet = new Tweet(); tweet.setCreateAt(status.getCreatedAt()); tweet.setLang(status.getLang()); if (status.getGeoLocation() != null) { tweet.setLat(status.getGeoLocation().getLatitude()); tweet.setLon(status.getGeoLocation().getLongitude()); } tweet.setReTweetCount(status.getRetweetCount()); tweet.setText(status.getText()); return tweet; }
From source file:nl.utwente.bigdata.bolts.FileOutputBolt.java
License:Apache License
@Override public void execute(Tuple tuple, BasicOutputCollector collector) { Status tweet = (Status) tuple.getValueByField("tweet"); this.pw.println(String.format("%s;%s;%s;%s;%s;%s", tweet.getLang(), tweet.getCreatedAt().toGMTString(), tuple.getStringByField("normalized_text"), tuple.getIntegerByField("sentiment"), tuple.getStringByField("home"), tuple.getStringByField("away"))); // logger.info("Wrote to " + this.f.getAbsolutePath()); try {//from w w w . jav a 2 s .co m this.writer.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:nl.utwente.bigdata.bolts.FilterLanguageBolt.java
License:Apache License
@Override public void execute(Tuple tuple, BasicOutputCollector collector) { String lang = tuple.getStringByField("lang"); Status status = (Status) tuple.getValueByField("tweet"); if (Arrays.asList(this.languages).contains(status.getLang())) { collector.emit(status.getLang(), tuple.getValues()); } else {/*from w ww . ja va 2 s . c o m*/ //logger.info(lang + " not found"); } }
From source file:nl.utwente.bigdata.bolts.NormalizerBolt.java
License:Apache License
@Override public void execute(Tuple tuple, BasicOutputCollector collector) { Status tweet; tweet = (Status) tuple.getValueByField("tweet"); // from: http://stackoverflow.com/questions/1008802/converting-symbols-accent-letters-to-english-alphabet Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); String nfdNormalizedString = ""; nfdNormalizedString = Normalizer.normalize(tweet.getText(), Normalizer.Form.NFD); String normalizedTweet = (String) pattern.matcher(nfdNormalizedString.toLowerCase()).replaceAll("") .replace("\n", "").replace("\r", ""); // Also remove prefixed with rt if (!normalizedTweet.startsWith("rt")) { collector.emit(new Values(tweet, normalizedTweet, tweet.getLang())); }//from ww w . j a v a2 s. c o m }
From source file:nlptexthatespeechdetection.dataCollection.GetTwitterDoc2VecTrainingData.java
public static void main(String[] args) throws FileNotFoundException, IOException { File dir = new File(folderName); if (!dir.exists()) dir.mkdir();/*ww w . jav a2 s . co m*/ if (!dir.isDirectory()) { System.out.println(folderName + " is not a directory"); return; } System.out.println("number of tweets required: "); int numTweetsRequired = (new Scanner(System.in)).nextInt(); String path = folderName + "/" + fileName; File file = new File(path); if (!file.exists()) file.createNewFile(); FileWriter writer = new FileWriter(path, true); TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); StatusListener listener = new StatusListener() { int numTweets = 0; @Override public void onStatus(Status status) { if (status.getLang().equals("in")) { try { String statusText = status.getText(); writer.write("\n"); writer.write(statusText); numTweets++; System.out.println("numTweets: " + numTweets); if (numTweets >= numTweetsRequired) { writer.close(); System.exit(0); } } catch (IOException ex) { Logger.getLogger(GetTwitterDoc2VecTrainingData.class.getName()).log(Level.SEVERE, null, ex); } } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { // System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; twitterStream.addListener(listener); FilterQuery filterQuery = new FilterQuery(); filterQuery.track(new String[] { "a", "i", "u", "e", "o" }); filterQuery.language("in"); twitterStream.filter(filterQuery); }
From source file:nlptexthatespeechdetection.dataCollection.TwitterStreamingAnnotator.java
public static void main(String[] args) throws NotDirectoryException { Scanner sc = new Scanner(System.in); System.out.println("Nama Anda (sebagai anotator): "); String namaAnotator = sc.nextLine(); AnnotatedDataFolder annotatedDataFolder = new AnnotatedDataFolder(dataFolderName); TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); StatusListener listener = new StatusListener() { @Override/* www .j av a2s. co m*/ public void onStatus(Status status) { if (status.getLang().equals("in")) { System.out.println(); System.out.println(); System.out.println("=======ANOTASI======="); System.out.println("status: " + status.getText()); System.out.println(); System.out.println("is this a hate speech?(y/n. any other if you do not know)"); String annotatorResponse = sc.nextLine().trim().toLowerCase(); Date date = new Date(); String dateString = dateFormat.format(date); try { if (annotatorResponse.equals("y")) { String filePath = annotatedDataFolder.saveHateSpeechString(namaAnotator, dateString, status.getText()); System.out.println("Saved data to: " + filePath); } else if (annotatorResponse.equals("n")) { String filePath = annotatedDataFolder.saveNotHateSpeechString(namaAnotator, dateString, status.getText()); System.out.println("Saved data to: " + filePath); } System.out.println("thank you!"); } catch (FileNotFoundException ex) { ex.printStackTrace(); } catch (IOException ex) { Logger.getLogger(TwitterStreamingAnnotator.class.getName()).log(Level.SEVERE, null, ex); } } else { System.out.println("ignoring non-indonesian tweet"); } // if (status.getGeoLocation() != null) { // System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " $ " + status.getGeoLocation().toString()); // } // if (status.getLang().equals("id")) { // System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString())); // } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { // System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; twitterStream.addListener(listener); FilterQuery filterQuery = new FilterQuery(); filterQuery.track(new String[] { "a", "i", "u", "e", "o" }); filterQuery.language("in"); twitterStream.filter(filterQuery); }
From source file:nlptexthatespeechdetection.NLPTextHateSpeechDetection.java
/** * @param args the command line arguments *///from w w w .j av a 2 s . c o m public static void main(String[] args) throws TwitterException, NotDirectoryException, IOException { HateSpeechClassifier1 classifier = new HateSpeechClassifier1(); AnnotatedDataFolder data = new AnnotatedDataFolder("data"); boolean overSampling = false; classifier.train(data.getDateSortedLabeledData(overSampling)); TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); StatusListener listener = new StatusListener() { int numHateSpeech = 0; int numTweets = 0; @Override public void onStatus(Status status) { if (status.getLang().equals("in")) { numTweets++; if (classifier.isHateSpeech(status.getText(), 0.5)) { System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString())); System.out.println(); System.out.println("lang: " + status.getLang()); System.out.println("number of detected hate speech: " + numHateSpeech); System.out.println("total number of streamed tweets: " + numTweets); System.out.println(); System.out.println(); numHateSpeech++; } } else { System.out.println("ignoring non-Indonesian tweet"); } // if (status.getGeoLocation() != null) { // System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " $ " + status.getGeoLocation().toString()); // } // if (status.getLang().equals("id")) { // System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString())); // } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { // System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; twitterStream.addListener(listener); FilterQuery filterQuery = new FilterQuery(); filterQuery.track(new String[] { "a", "i", "u", "e", "o" }); filterQuery.language("in"); twitterStream.filter(filterQuery); twitterStream.sample(); }
From source file:org.apache.solr.handler.dataimport.TwitterEntityProcessor.java
License:Apache License
@Override public Map<String, Object> nextRow() { Map<String, Object> row = new HashMap<>(); if (twitter == null || query == null) return null; try {/* w w w .j av a 2 s . c om*/ if (results == null) { results = twitter.search(query); if (results == null || results.getCount() == 0) return null; } if (tweets == null) tweets = (ArrayList<Status>) results.getTweets(); Status tweet = null; if (index < tweets.size()) { tweet = (Status) tweets.get(index++); } else { query = results.nextQuery(); if (query != null) { results = twitter.search(query); if (results == null || results.getCount() == 0) return null; tweets = (ArrayList<Status>) results.getTweets(); index = 0; tweet = (Status) tweets.get(index++); } } if (tweet == null) return null; // id row.put(MESSAGE_ID, tweet.getId()); // lang row.put(MESSAGE_LANG, tweet.getLang()); // user User user = tweet.getUser(); // name row.put(MESSAGE_USER, user.getName()); // pseudo row.put(MESSAGE_PSEUDO, tweet.getUser().getScreenName()); // text row.put(MESSAGE_TEXT, tweet.getText()); // date Date date = tweet.getCreatedAt(); row.put(MESSAGE_DATE, date.toString()); } catch (TwitterException e) { e.printStackTrace(); return null; } return row; }
From source file:org.bireme.interop.toJson.Twitter2Json.java
License:Open Source License
private JSONObject getDocument(final Status status) { assert status != null; final JSONObject obj = new JSONObject(); final GeoLocation geo = status.getGeoLocation(); final Place place = status.getPlace(); final User user = status.getUser(); obj.put("createdAt", status.getCreatedAt()).put("id", status.getId()).put("lang", status.getLang()); if (geo != null) { obj.put("location_latitude", geo.getLatitude()).put("location_longitude", geo.getLongitude()); }/*w w w .ja v a2s.co m*/ if (place != null) { obj.put("place_country", place.getCountry()).put("place_fullName", place.getFullName()) .put("place_id", place.getId()).put("place_name", place.getName()) .put("place_type", place.getPlaceType()).put("place_streetAddress", place.getStreetAddress()) .put("place_url", place.getURL()); } obj.put("source", status.getSource()).put("text", status.getText()); if (user != null) { obj.put("user_description", user.getDescription()).put("user_id", user.getId()) .put("user_lang", user.getLang()).put("user_location", user.getLocation()) .put("user_name", user.getName()).put("user_url", user.getURL()); } obj.put("isTruncated", status.isTruncated()).put("isRetweet", status.isRetweet()); return obj; }