|
| 1 | +--- |
| 2 | +title: "Sentiment and Emotion Analysis of Comments" |
| 3 | +editor: visual |
| 4 | +--- |
| 5 | + |
| 6 | +## Setup: Installing and Loading Packages |
| 7 | + |
| 8 | +```{r} |
| 9 | +# Install all required packages |
| 10 | +install.packages("sentimentr") |
| 11 | +install.packages("syuzhet") |
| 12 | +# install.packages("dplyr") |
| 13 | +# install.packages("tidyr") |
| 14 | +# install.packages("readr") |
| 15 | +# install.packages("ggplot2") |
| 16 | +# install.packages("RColorBrewer") |
| 17 | +# install.packages("stringr") |
| 18 | +``` |
| 19 | + |
| 20 | +```{r} |
| 21 | +# Load all packages |
| 22 | +library(sentimentr) |
| 23 | +library(syuzhet) |
| 24 | +library(dplyr) |
| 25 | +library(tidyr) |
| 26 | +library(readr) |
| 27 | +library(ggplot2) |
| 28 | +library(RColorBrewer) |
| 29 | +library(stringr) |
| 30 | +``` |
| 31 | + |
| 32 | +### Polarity Analysis |
| 33 | + |
| 34 | +```{r} |
| 35 | +# Load Data |
| 36 | +comments <- readr::read_csv("./data/comments_preprocessed.csv") |
| 37 | +
|
| 38 | +# Compute sentiment per row/case |
| 39 | +sentiment_scores <- sentiment_by(comments$comments) |
| 40 | +
|
| 41 | +# Add scores and labels to original dataset |
| 42 | +polarity <- comments %>% |
| 43 | + mutate(score = sentiment_scores$ave_sentiment, |
| 44 | + sentiment_label = case_when( |
| 45 | + score > 0.1 ~ "positive", |
| 46 | + score < -0.1 ~ "negative", |
| 47 | + TRUE ~ "neutral" |
| 48 | + )) |
| 49 | +
|
| 50 | +# Check first rows with results |
| 51 | +head(polarity) |
| 52 | +
|
| 53 | +# Scores per label |
| 54 | +table(polarity$sentiment_label) |
| 55 | +``` |
| 56 | + |
| 57 | +#### Plotting |
| 58 | + |
| 59 | +```{r} |
| 60 | +# Visualize |
| 61 | +ggplot(polarity, aes(x = score)) + |
| 62 | + geom_histogram(binwidth = 0.1, fill = "skyblue", color = "white") + |
| 63 | + theme_minimal() + |
| 64 | + labs(title = "Sentiment Score Distribution", x = "Average Sentiment", y = "Count") |
| 65 | +
|
| 66 | +# Extract season info (s1, s2) into a new column |
| 67 | +polarity_seasons <- mutate(polarity, |
| 68 | + season = str_extract(id, "s\\d+")) |
| 69 | +
|
| 70 | +# Histogram comparison by season, using Density |
| 71 | +ggplot(polarity_seasons, aes(x = score, fill = season)) + |
| 72 | + geom_histogram(aes(y = after_stat(density)), |
| 73 | + binwidth = 0.1, |
| 74 | + position = "dodge", |
| 75 | + color = "white") + |
| 76 | + theme_minimal() + |
| 77 | + labs(title = "Sentiment Score Distribution by Season (Normalized)", |
| 78 | + x = "Average Sentiment Score (Polarity)", |
| 79 | + y = "Density (Proportion of Comments)") + |
| 80 | + scale_fill_brewer(palette = "Set1") |
| 81 | +
|
| 82 | +# Save results |
| 83 | +write_csv(polarity, "output/polarity_results.csv") |
| 84 | +``` |
| 85 | + |
| 86 | +### Emotion Detection with Syuzhet's NRC Lexicon |
| 87 | + |
| 88 | +```{r} |
| 89 | +# Detecting Emotions per Comment/Sentence |
| 90 | +sentences <- get_sentences(comments$comments) |
| 91 | +
|
| 92 | +# Compute Emotion Scores per Sentence |
| 93 | +# Assign NRC emotion scores (anger, joy, etc.) + positive/negative |
| 94 | +emotion_score <- get_nrc_sentiment(sentences) |
| 95 | +
|
| 96 | +# Review Summary of Emotion Scores |
| 97 | +summary(emotion_score) |
| 98 | +
|
| 99 | +# Regroup with Original Comments/IDs |
| 100 | +comments$comments <- sentences |
| 101 | +emotion_data <- bind_cols(comments, emotion_score) |
| 102 | +
|
| 103 | +# Summarize Overall Emotion Counts |
| 104 | +emotion_summary <- emotion_data %>% |
| 105 | + select(anger:trust) %>% # only emotion columns |
| 106 | + summarise(across(everything(), sum)) %>% |
| 107 | + pivot_longer(cols = everything(), names_to = "emotion", values_to = "count") %>% |
| 108 | + arrange(desc(count)) |
| 109 | +``` |
| 110 | + |
| 111 | +### Plotting Things |
| 112 | + |
| 113 | +```{r} |
| 114 | +# Plot Overall Emotion Distribution |
| 115 | +ggplot(emotion_summary, aes(x = emotion, y = count, fill = emotion)) + |
| 116 | + geom_col(show.legend = FALSE) + |
| 117 | + geom_text(aes(label = count), hjust = -0.2, size = 2) + |
| 118 | + scale_fill_manual(values = brewer.pal(10, "Paired")) + |
| 119 | + theme_minimal(base_size = 12) + |
| 120 | + labs(title = "Overall Emotion Distribution", x = "Emotion", y = "Total Count") + |
| 121 | + coord_flip() |
| 122 | +
|
| 123 | +# Add "Season" Variable and Summarize by Season |
| 124 | +# Create season variable based on ID pattern |
| 125 | +emotion_seasons <- emotion_data %>% |
| 126 | + mutate(season = ifelse(grepl("^s1_", id), "s1", |
| 127 | + ifelse(grepl("^s2_", id), "s2", NA))) |
| 128 | +
|
| 129 | +# Aggregate emotion counts per season |
| 130 | +emotion_by_season <- emotion_seasons %>% |
| 131 | + group_by(season) %>% |
| 132 | + summarise( |
| 133 | + across(anger:positive, ~sum(., na.rm = TRUE)) |
| 134 | + ) |
| 135 | +
|
| 136 | +# Compare Emotions by Season |
| 137 | +emotion_long <- emotion_by_season %>% |
| 138 | + pivot_longer(cols = anger:positive, names_to = "emotion", values_to = "count") |
| 139 | +
|
| 140 | +ggplot(emotion_long, aes(x = reorder(emotion, -count), y = count, fill = season)) + |
| 141 | + geom_col(position = "dodge") + |
| 142 | + geom_text(aes(label = count), hjust = -0.2, size = 2) + |
| 143 | + scale_fill_brewer(palette = "Set2") + |
| 144 | + theme_minimal(base_size = 12) + |
| 145 | + labs(title = "Emotion Distribution by Season", x = "Emotion", y = "Total Count", fill = "Season") + |
| 146 | + coord_flip() |
| 147 | +
|
| 148 | +# Emotion Co-occurrence Heatmap |
| 149 | +# Compute correlations between emotions |
| 150 | +emotion_matrix <- emotion_data %>% select(anger:trust) |
| 151 | +co_occurrence <- cor(emotion_matrix, method = "pearson") |
| 152 | +diag(co_occurrence) <- NA # remove self-correlations |
| 153 | +
|
| 154 | +# Convert to long format for plotting |
| 155 | +co_occurrence_long <- as.data.frame(as.table(co_occurrence)) |
| 156 | +colnames(co_occurrence_long) <- c("emotion1", "emotion2", "correlation") |
| 157 | +
|
| 158 | +# Plot heatmap |
| 159 | +ggplot(co_occurrence_long, aes(x = emotion1, y = emotion2, fill = correlation)) + |
| 160 | + geom_tile(color = "white") + |
| 161 | + scale_fill_gradient2(mid = "white", high = "red", midpoint = 0, |
| 162 | + limits = c(0, 1), na.value = "grey95", name = "Correlation") + |
| 163 | + theme_minimal(base_size = 12) + |
| 164 | + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + |
| 165 | + labs(title = "Emotion Co-occurrence Heatmap", x = "Emotion", y = "Emotion") |
| 166 | +
|
| 167 | +# Save Results |
| 168 | +write_csv(emotion_data, "output/sentiment_emotion_results.csv") |
| 169 | +``` |
0 commit comments