Skip to content

Commit ca51d9e

Browse files
authored
Add files via upload
1 parent 832d0f0 commit ca51d9e

File tree

2 files changed

+337
-0
lines changed

2 files changed

+337
-0
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
---
2+
title: "Sentiment and Emotion Analysis of Comments"
3+
editor: visual
4+
---
5+
6+
## Setup: Installing and Loading Packages
7+
8+
```{r}
9+
# Install all required packages
10+
install.packages("sentimentr")
11+
install.packages("syuzhet")
12+
# install.packages("dplyr")
13+
# install.packages("tidyr")
14+
# install.packages("readr")
15+
# install.packages("ggplot2")
16+
# install.packages("RColorBrewer")
17+
# install.packages("stringr")
18+
```
19+
20+
```{r}
21+
# Load all packages
22+
library(sentimentr)
23+
library(syuzhet)
24+
library(dplyr)
25+
library(tidyr)
26+
library(readr)
27+
library(ggplot2)
28+
library(RColorBrewer)
29+
library(stringr)
30+
```
31+
32+
### Polarity Analysis
33+
34+
```{r}
35+
# Load Data
36+
comments <- readr::read_csv("./data/comments_preprocessed.csv")
37+
38+
# Compute sentiment per row/case
39+
sentiment_scores <- sentiment_by(comments$comments)
40+
41+
# Add scores and labels to original dataset
42+
polarity <- comments %>%
43+
mutate(score = sentiment_scores$ave_sentiment,
44+
sentiment_label = case_when(
45+
score > 0.1 ~ "positive",
46+
score < -0.1 ~ "negative",
47+
TRUE ~ "neutral"
48+
))
49+
50+
# Check first rows with results
51+
head(polarity)
52+
53+
# Scores per label
54+
table(polarity$sentiment_label)
55+
```
56+
57+
#### Plotting
58+
59+
```{r}
60+
# Visualize
61+
ggplot(polarity, aes(x = score)) +
62+
geom_histogram(binwidth = 0.1, fill = "skyblue", color = "white") +
63+
theme_minimal() +
64+
labs(title = "Sentiment Score Distribution", x = "Average Sentiment", y = "Count")
65+
66+
# Extract season info (s1, s2) into a new column
67+
polarity_seasons <- mutate(polarity,
68+
season = str_extract(id, "s\\d+"))
69+
70+
# Histogram comparison by season, using Density
71+
ggplot(polarity_seasons, aes(x = score, fill = season)) +
72+
geom_histogram(aes(y = after_stat(density)),
73+
binwidth = 0.1,
74+
position = "dodge",
75+
color = "white") +
76+
theme_minimal() +
77+
labs(title = "Sentiment Score Distribution by Season (Normalized)",
78+
x = "Average Sentiment Score (Polarity)",
79+
y = "Density (Proportion of Comments)") +
80+
scale_fill_brewer(palette = "Set1")
81+
82+
# Save results
83+
write_csv(polarity, "output/polarity_results.csv")
84+
```
85+
86+
### Emotion Detection with Syuzhet's NRC Lexicon
87+
88+
```{r}
89+
# Detecting Emotions per Comment/Sentence
90+
sentences <- get_sentences(comments$comments)
91+
92+
# Compute Emotion Scores per Sentence
93+
# Assign NRC emotion scores (anger, joy, etc.) + positive/negative
94+
emotion_score <- get_nrc_sentiment(sentences)
95+
96+
# Review Summary of Emotion Scores
97+
summary(emotion_score)
98+
99+
# Regroup with Original Comments/IDs
100+
comments$comments <- sentences
101+
emotion_data <- bind_cols(comments, emotion_score)
102+
103+
# Summarize Overall Emotion Counts
104+
emotion_summary <- emotion_data %>%
105+
select(anger:trust) %>% # only emotion columns
106+
summarise(across(everything(), sum)) %>%
107+
pivot_longer(cols = everything(), names_to = "emotion", values_to = "count") %>%
108+
arrange(desc(count))
109+
```
110+
111+
### Plotting Things
112+
113+
```{r}
114+
# Plot Overall Emotion Distribution
115+
ggplot(emotion_summary, aes(x = emotion, y = count, fill = emotion)) +
116+
geom_col(show.legend = FALSE) +
117+
geom_text(aes(label = count), hjust = -0.2, size = 2) +
118+
scale_fill_manual(values = brewer.pal(10, "Paired")) +
119+
theme_minimal(base_size = 12) +
120+
labs(title = "Overall Emotion Distribution", x = "Emotion", y = "Total Count") +
121+
coord_flip()
122+
123+
# Add "Season" Variable and Summarize by Season
124+
# Create season variable based on ID pattern
125+
emotion_seasons <- emotion_data %>%
126+
mutate(season = ifelse(grepl("^s1_", id), "s1",
127+
ifelse(grepl("^s2_", id), "s2", NA)))
128+
129+
# Aggregate emotion counts per season
130+
emotion_by_season <- emotion_seasons %>%
131+
group_by(season) %>%
132+
summarise(
133+
across(anger:positive, ~sum(., na.rm = TRUE))
134+
)
135+
136+
# Compare Emotions by Season
137+
emotion_long <- emotion_by_season %>%
138+
pivot_longer(cols = anger:positive, names_to = "emotion", values_to = "count")
139+
140+
ggplot(emotion_long, aes(x = reorder(emotion, -count), y = count, fill = season)) +
141+
geom_col(position = "dodge") +
142+
geom_text(aes(label = count), hjust = -0.2, size = 2) +
143+
scale_fill_brewer(palette = "Set2") +
144+
theme_minimal(base_size = 12) +
145+
labs(title = "Emotion Distribution by Season", x = "Emotion", y = "Total Count", fill = "Season") +
146+
coord_flip()
147+
148+
# Emotion Co-occurrence Heatmap
149+
# Compute correlations between emotions
150+
emotion_matrix <- emotion_data %>% select(anger:trust)
151+
co_occurrence <- cor(emotion_matrix, method = "pearson")
152+
diag(co_occurrence) <- NA # remove self-correlations
153+
154+
# Convert to long format for plotting
155+
co_occurrence_long <- as.data.frame(as.table(co_occurrence))
156+
colnames(co_occurrence_long) <- c("emotion1", "emotion2", "correlation")
157+
158+
# Plot heatmap
159+
ggplot(co_occurrence_long, aes(x = emotion1, y = emotion2, fill = correlation)) +
160+
geom_tile(color = "white") +
161+
scale_fill_gradient2(mid = "white", high = "red", midpoint = 0,
162+
limits = c(0, 1), na.value = "grey95", name = "Correlation") +
163+
theme_minimal(base_size = 12) +
164+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
165+
labs(title = "Emotion Co-occurrence Heatmap", x = "Emotion", y = "Emotion")
166+
167+
# Save Results
168+
write_csv(emotion_data, "output/sentiment_emotion_results.csv")
169+
```
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
---
2+
title: "Sentiment and Emotion Analysis of Comments"
3+
editor: visual
4+
---
5+
6+
## Setup: Installing and Loading Packages
7+
8+
```{r}
9+
# Install all required packages
10+
install.packages("sentimentr")
11+
install.packages("syuzhet")
12+
# install.packages("dplyr")
13+
# install.packages("tidyr")
14+
# install.packages("readr")
15+
# install.packages("ggplot2")
16+
# install.packages("RColorBrewer")
17+
# install.packages("stringr")
18+
```
19+
20+
```{r}
21+
# Load all packages
22+
library(sentimentr)
23+
library(syuzhet)
24+
library(dplyr)
25+
library(tidyr)
26+
library(readr)
27+
library(ggplot2)
28+
library(RColorBrewer)
29+
library(stringr)
30+
```
31+
32+
### Polarity Analysis
33+
34+
```{r}
35+
# Load Data
36+
comments <- readr::read_csv("./data/comments_preprocessed.csv")
37+
38+
# Compute sentiment per row/case
39+
40+
# Add scores and labels to original dataset
41+
polarity <- comments %>%
42+
mutate(score = sentiment_scores$ave_sentiment,
43+
sentiment_label = case_when(
44+
score > 0.1 ~ "positive",
45+
score < -0.1 ~ "negative",
46+
TRUE ~ "neutral"
47+
))
48+
49+
# Check first rows with results
50+
51+
52+
# Scores per label
53+
54+
```
55+
56+
#### Plotting
57+
58+
```{r}
59+
# Visualize
60+
ggplot(polarity, aes(x = score)) +
61+
geom_histogram(binwidth = 0.1, fill = "skyblue", color = "white") +
62+
theme_minimal() +
63+
labs(title = "Sentiment Score Distribution", x = "Average Sentiment", y = "Count")
64+
65+
# Extract season info (s1, s2) into a new column
66+
polarity_seasons <- mutate(polarity,
67+
season = str_extract(id, "s\\d+"))
68+
69+
# Histogram comparison by season, using Density
70+
ggplot(polarity_seasons, aes(x = score, fill = season)) +
71+
geom_histogram(aes(y = after_stat(density)),
72+
binwidth = 0.1,
73+
position = "dodge",
74+
color = "white") +
75+
theme_minimal() +
76+
labs(title = "Sentiment Score Distribution by Season (Normalized)",
77+
x = "Average Sentiment Score (Polarity)",
78+
y = "Density (Proportion of Comments)") +
79+
scale_fill_brewer(palette = "Set1")
80+
81+
# Save results
82+
write_csv(polarity, "output/polarity_results.csv")
83+
```
84+
85+
### Emotion Detection with Syuzhet's NRC Lexicon
86+
87+
```{r}
88+
# Detecting Emotions per Comment/Sentence
89+
90+
91+
# Compute Emotion Scores per Sentence
92+
# Assign NRC emotion scores (anger, joy, etc.) + positive/negative
93+
94+
95+
# Review Summary of Emotion Scores
96+
97+
98+
# Regroup with Original Comments/IDs
99+
comments$comments <- sentences
100+
emotion_data <- bind_cols(comments, emotion_score)
101+
102+
# Summarize Overall Emotion Counts
103+
emotion_summary <- emotion_data %>%
104+
select(anger:trust) %>% # only emotion columns
105+
summarise(across(everything(), sum)) %>%
106+
pivot_longer(cols = everything(), names_to = "emotion", values_to = "count") %>%
107+
arrange(desc(count))
108+
```
109+
110+
### Plotting Things
111+
112+
```{r}
113+
# Plot Overall Emotion Distribution
114+
ggplot(emotion_summary, aes(x = emotion, y = count, fill = emotion)) +
115+
geom_col(show.legend = FALSE) +
116+
geom_text(aes(label = count), hjust = -0.2, size = 2) +
117+
scale_fill_manual(values = brewer.pal(10, "Paired")) +
118+
theme_minimal(base_size = 12) +
119+
labs(title = "Overall Emotion Distribution", x = "Emotion", y = "Total Count") +
120+
coord_flip()
121+
122+
# Add "Season" Variable and Summarize by Season
123+
# Create season variable based on ID pattern
124+
emotion_seasons <- emotion_data %>%
125+
mutate(season = ifelse(grepl("^s1_", id), "s1",
126+
ifelse(grepl("^s2_", id), "s2", NA)))
127+
128+
# Aggregate emotion counts per season
129+
emotion_by_season <- emotion_seasons %>%
130+
group_by(season) %>%
131+
summarise(
132+
across(anger:positive, ~sum(., na.rm = TRUE))
133+
)
134+
135+
# Compare Emotions by Season
136+
emotion_long <- emotion_by_season %>%
137+
pivot_longer(cols = anger:positive, names_to = "emotion", values_to = "count")
138+
139+
ggplot(emotion_long, aes(x = reorder(emotion, -count), y = count, fill = season)) +
140+
geom_col(position = "dodge") +
141+
geom_text(aes(label = count), hjust = -0.2, size = 2) +
142+
scale_fill_brewer(palette = "Set2") +
143+
theme_minimal(base_size = 12) +
144+
labs(title = "Emotion Distribution by Season", x = "Emotion", y = "Total Count", fill = "Season") +
145+
coord_flip()
146+
147+
# Emotion Co-occurrence Heatmap
148+
# Compute correlations between emotions
149+
emotion_matrix <- emotion_data %>% select(anger:trust)
150+
co_occurrence <- cor(emotion_matrix, method = "pearson")
151+
diag(co_occurrence) <- NA # remove self-correlations
152+
153+
# Convert to long format for plotting
154+
co_occurrence_long <- as.data.frame(as.table(co_occurrence))
155+
colnames(co_occurrence_long) <- c("emotion1", "emotion2", "correlation")
156+
157+
# Plot heatmap
158+
ggplot(co_occurrence_long, aes(x = emotion1, y = emotion2, fill = correlation)) +
159+
geom_tile(color = "white") +
160+
scale_fill_gradient2(mid = "white", high = "red", midpoint = 0,
161+
limits = c(0, 1), na.value = "grey95", name = "Correlation") +
162+
theme_minimal(base_size = 12) +
163+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
164+
labs(title = "Emotion Co-occurrence Heatmap", x = "Emotion", y = "Emotion")
165+
166+
# Save Results
167+
write_csv(emotion_data, "output/sentiment_emotion_results.csv")
168+
```

0 commit comments

Comments
 (0)