Chapter 4 Search tweets
Chapter progress bar
██████░░░░░░░░░░░░░░░░░░░░░░░░ 20%
Let’s first specify where we plan to store our json data. Notably, we also take care of programmatically create the directory if this doesn’t exist (the !
in front of dir.exists
is a logical negation (i.e. NOT
)).
<-
json_data_dir "json_data"
if(!dir.exists(json_data_dir)) {
dir.create(json_data_dir)
}
4.1 Tweets from a given account
<-
url "https://api.twitter.com/2/tweets/search/all"
<-
headers c(`Authorization` = sprintf('Bearer %s',
Sys.getenv("BEARER_TOKEN")))
<-
params list(query = "from:matteosalvinimi",
start_time = "2022-03-01T00:00:00Z",
tweet.fields = "attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,id,in_reply_to_user_id,lang,public_metrics,possibly_sensitive,referenced_tweets,reply_settings,source,text,withheld",
expansions = "attachments.poll_ids,attachments.media_keys,author_id,entities.mentions.username,geo.place_id,in_reply_to_user_id,referenced_tweets.id,referenced_tweets.id.author_id",
user.fields = "created_at,description,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld",
poll.fields = "duration_minutes,end_datetime,id,options,voting_status",
place.fields = "contained_within,country,country_code,full_name,geo,id,name,place_type",
media.fields = "duration_ms,height,media_key,preview_image_url,type,url,width,public_metrics,alt_text",
max_results = 100)
If we don’t set an end_time
, this is going to be default to now -30 seconds.
<-
res ::GET(url,
httr::add_headers(.headers = headers),
httrquery = params)
<-
obj.r ::content(res, as = "text") %>%
httr::fromJSON()
jsonlite
::write_json(httr::content(res, as = "parsed"),
jsonlitepath = sprintf("%s/%s_%s.json",
json_data_dir,$meta$oldest_id,
obj.r$meta$newest_id)) obj.r
Do we have additional pages?
if (!is.null(obj.r$meta$next_token)) {
while(TRUE) {
'pagination_token']] <-
params[[$meta$next_token
obj.r
print(sprintf("Next token: %s...", obj.r$meta$next_token))
<-
res ::GET(url,
httr::add_headers(.headers = headers),
httrquery = params)
<-
obj.r ::content(res, as = "text") %>%
httr::fromJSON()
jsonlite
if (!is.null(obj.r$status) && obj.r$status == 429) {
while(TRUE) {
print(obj.r$title)
Sys.sleep(60)
<-
res ::GET(url,
httr::add_headers(.headers = headers),
httrquery = params)
<-
obj.r ::content(res, as = "text") %>%
httr::fromJSON()
jsonlite
if (is.null(obj.r$status)) {
break
}
}
}
::write_json(httr::content(res, as = "parsed"),
jsonlitepath = sprintf("%s/%s_%s.json",
json_data_dir,$meta$oldest_id,
obj.r$meta$newest_id))
obj.r
if (is.null(obj.r$meta$next_token)) {
break
}
}
}