@fcorowe
)This notebook contains the code to obtain sentiment analysis scores for a sample of tweets relating to public opinion on migration originated from the United Kingdom during January 1st to December 31st 2019.
df <- read_csv("./data/uk_geo_tweets_01012019_31012019.csv")
## Rows: 3919 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (20): place_id, place_name, full_place_name, bbox, place_type, country_...
## dbl (15): tweet_id, author_id, lat, long, retweet_count, reply_count, like_...
## lgl (2): exact_coords, possibly_sensitive
## dttm (1): created_at
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
## # A tibble: 6 × 38
## tweet_id created_at author_id place_id place_name full_place_name
## <dbl> <dttm> <dbl> <chr> <chr> <chr>
## 1 1.08e18 2019-01-01 23:57:21 246459126 544762ebf7… Islington Islington, Lon…
## 2 1.08e18 2019-01-01 23:24:20 1934557914 40b868fc3f… Haslingden Haslingden, En…
## 3 1.08e18 2019-01-01 23:03:35 460397753 0af014accd… Scotland Scotland, Unit…
## 4 1.08e18 2019-01-01 23:02:35 20471144 44225138ca… Loughboro… Loughborough, …
## 5 1.08e18 2019-01-01 22:34:35 1024602584 00a55bd976… West End West End, Engl…
## 6 1.08e18 2019-01-01 22:19:47 2951504181 03a5820911… Southend-… Southend-on-Se…
## # … with 32 more variables: lat <dbl>, long <dbl>, exact_coords <lgl>,
## # bbox <chr>, place_type <chr>, country_code <chr>, country <chr>,
## # lang <chr>, retweet_count <dbl>, reply_count <dbl>, like_count <dbl>,
## # quote_count <dbl>, text <chr>, username <chr>, user_name <chr>,
## # followers_count <dbl>, following_count <dbl>, tweet_count <dbl>,
## # listed_count <dbl>, user_url <chr>, user_loc <chr>, user_desc <chr>,
## # source <chr>, conversation_id <dbl>, reply_settings <chr>, …
vader_sentiment <- vader_df(df$text)
final_df <- cbind(df$tweet_id, df$created_at, df$place_name, df$full_place_name, df$lat, df$long, df$exact_coords, df$place_type, df$country_code, df$username, vader_sentiment) %>%
rename(
tweet_id = "df$tweet_id",
created_at = "df$created_at",
place_name = "df$place_name",
full_place_name = "df$full_place_name",
lat = "df$long",
long = "df$lat",
exact_coords = "df$exact_coords",
place_type = "df$place_type",
country_code = "df$country_code",
username = "df$username"
)
write_csv(final_df, "./data/uk-sentiment-data.csv")