-
Notifications
You must be signed in to change notification settings - Fork 2
/
08_cumulative-acoustic-detections-birds.Rmd
279 lines (215 loc) · 14.1 KB
/
08_cumulative-acoustic-detections-birds.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
---
editor_options:
chunk_output_type: console
---
# Acoustic Detections
In this script, we will calculate:
a) Total Number of detections across sites (reported for varying time intervals 10s, 30s, 1-min, 2-min, 4-min).
b) Repeat the above calculations, but using species traits - If a species is a rainforest specialist or an open-country generalist.
## Install required libraries
```{r}
library(tidyverse)
library(dplyr)
library(stringr)
library(vegan)
library(ggplot2)
library(scico)
library(extrafont)
```
## Load data
We will use the data that was subset previously for further analysis.
```{r}
# Attach the annotated data
datSubset <- read.csv("results/datSubset.csv")
# Load the species-trait-data
trait_dat <- read.csv("data/species-trait-dat.csv")
```
## Overall number of detections
Given the data annotated so far, we will calculate the overall number of detections across different temporal periods, starting from 10s to 30-s, 1-min, 2-min and 4-min. We will first calculate the overall number of detections for the shortest possible temporal duration which could be annotated confidently - 10s.
Other durations are chosen to confirm if the number of detections vary as a function of the temporal duration.
```{r}
# Calculate the overall number of detections for each site where each temporal duration chosen is a 10s clip
nDetections_10s <- datSubset %>%
group_by(Site, Restoration.type) %>%
transform() %>% replace(is.na(.), 0) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Calculate the overall number of detections for each site where each temporal duration chosen is a 30s clip (In this case, every third row is chosen after grouping by Site, Date and Time)
nDetections_30s <- datSubset %>%
mutate(Splits = case_when((Splits == "01-10" | Splits=="10-20" | Splits =="20-30") ~ "1",(Splits == "30-40" | Splits=="40-50" | Splits =="50-60") ~ "2",(Splits == "60-70" | Splits=="70-80" | Splits =="80-90") ~ "3", (Splits == "90-100" | Splits=="100-110" | Splits =="110-120") ~ "4",(Splits == "120-130" | Splits=="130-140" | Splits =="140-150") ~ "5",(Splits == "150-160" | Splits=="160-170" | Splits =="170-180") ~ "6",(Splits == "180-190" | Splits=="190-200" | Splits =="200-210") ~ "7", (Splits == "210-220" | Splits=="220-230" | Splits =="230-240") ~"8")) %>% group_by(Site, Date, Time, Splits, Restoration.type) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Convert nDetections >1 within a 30s period to 1 (since your temporal unit here is 30s)
nDetections_30s <- nDetections_30s %>%
group_by(Site, Restoration.type) %>%
transform() %>% replace(is.na(.), 0) %>%
mutate_at(vars(c("IP":"HSWP")),~ replace(., . > 0, 1)) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Calculate the overall number of detections for each site where each temporal duration chosen is a 60s clip (In this case, every sixth row is chosen after grouping by Site, Date and Time)
nDetections_1min <- datSubset %>%
mutate(Splits = case_when((Splits == "01-10" | Splits=="10-20" | Splits =="20-30" |
Splits == "30-40" | Splits=="40-50" | Splits =="50-60") ~ "1", (Splits == "60-70" | Splits=="70-80" | Splits =="80-90" |
Splits == "90-100" | Splits=="100-110" | Splits =="110-120") ~ "2",
(Splits == "120-130" | Splits=="130-140" | Splits =="140-150" |
Splits == "150-160" | Splits=="160-170" | Splits =="170-180") ~ "3",
(Splits == "180-190" | Splits=="190-200" | Splits =="200-210" |
Splits == "210-220" | Splits=="220-230" | Splits =="230-240") ~"4")) %>% group_by(Site, Date, Time, Splits, Restoration.type) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Convert nDetections>1 within a 1-min period to 1 (since your temporal unit here is 1-min)
nDetections_1min <- nDetections_1min %>%
group_by(Site, Restoration.type) %>%
transform() %>% replace(is.na(.), 0) %>%
mutate_at(vars(c("IP":"HSWP")),~ replace(., . > 0, 1)) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Calculate the overall number of detections for each site where each temporal duration chosen is a 120s clip (In this case, every twelfth row is chosen after grouping by Site, Date and Time)
nDetections_2min <- datSubset %>%
mutate(Splits = case_when((Splits == "01-10" | Splits=="10-20" | Splits =="20-30" |
Splits == "30-40" | Splits=="40-50" | Splits =="50-60" |
Splits == "60-70" | Splits=="70-80" | Splits =="80-90" |
Splits == "90-100" | Splits=="100-110" | Splits =="110-120") ~ "1",
(Splits == "120-130" | Splits=="130-140" | Splits =="140-150" |
Splits == "150-160" | Splits=="160-170" | Splits =="170-180" |
Splits == "180-190" | Splits=="190-200" | Splits =="200-210" |
Splits == "210-220" | Splits=="220-230" | Splits =="230-240") ~"2")) %>% group_by(Site, Date, Time, Splits, Restoration.type) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Convert nDetections>1 within a 2-min period to 1 (since your temporal unit here is 2-min)
nDetections_2min <- nDetections_2min %>%
group_by(Site, Restoration.type) %>%
transform() %>% replace(is.na(.), 0) %>%
mutate_at(vars(c("IP":"HSWP")),~ replace(., . > 0, 1)) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Calculate the overall number of detections for each site where each temporal duration chosen is a 240s clip (In this case, every twentyfourth row is chosen after grouping by Site, Date and Time)
nDetections_4min <- datSubset %>%
group_by(Site, Date, Time, Restoration.type) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
# Convert nDetections>1 within a 4-min period to 1 (since your temporal unit here is 4-min)
nDetections_4min <- nDetections_4min %>%
group_by(Site, Restoration.type) %>%
transform() %>% replace(is.na(.), 0) %>%
mutate_at(vars(c("IP":"HSWP")),~ replace(., . > 0, 1)) %>%
summarise_at(.vars = vars(c("IP":"HSWP")),.funs = sum)
```
## Detections by treatment type
How does the number of detections vary as a function of restoration type? (tested across different temporal durations)?
```{r}
# Testing if there is significant differences in overall number of detections across treatment types (this has been done only for the smallest temporal duration ~10s, while total number of detections have been estimated for different temporal durations)
sum_Detections10s <- nDetections_10s %>%
rowwise() %>%
mutate(sumDetections = sum(c_across(IP:HSWP))) %>%
dplyr::select(Site, Restoration.type, sumDetections)
# Test if there are significant differences in detections across treatment types
anovaAllDetect <- aov(sumDetections~Restoration.type, data = sum_Detections10s)
# Tukey test to study each pair of treatment - reveals no signficant difference across treatment types
tukeyAllDetect <- TukeyHSD(anovaAllDetect)
# Estimating total number of detections for different temporal durations
sum_Detections30s <- nDetections_30s %>%
rowwise() %>%
mutate(sumDetections = sum(c_across(IP:HSWP))) %>%
dplyr::select(Site, Restoration.type, sumDetections)
sum_Detections1min <- nDetections_1min %>%
rowwise() %>%
mutate(sumDetections = sum(c_across(IP:HSWP))) %>%
dplyr::select(Site, Restoration.type, sumDetections)
sum_Detections2min <- nDetections_2min %>%
rowwise() %>%
mutate(sumDetections = sum(c_across(IP:HSWP))) %>%
dplyr::select(Site, Restoration.type, sumDetections)
sum_Detections4min <- nDetections_4min %>%
rowwise() %>%
mutate(sumDetections = sum(c_across(IP:HSWP))) %>%
dplyr::select(Site, Restoration.type, sumDetections)
# Plotting the above (multiple plots for each temporal duration)
# Note: the cumulative number of detections across all species was obtained by summing every 16-min to 48-min set of detections across each site, including all species.
# reordering factors for plotting
sum_Detections10s$Restoration.type <- factor(sum_Detections10s$Restoration.type, levels = c("Benchmark", "Active", "Passive"))
# Add a custom set of colors
mycolors <- c(brewer.pal(name="Dark2", n = 3), brewer.pal(name="Paired", n = 3))
fig_sumDetections10s <- ggplot(sum_Detections10s, aes(x=Restoration.type, y=sumDetections, fill=Restoration.type)) +
geom_boxplot(alpha=0.7) +
scale_fill_manual("Treatment type",values=mycolors, labels=c("BM","AR","NR")) +
theme_bw() +
labs(x="\nTreatment Type",
y="Cumulative number of detections\n") +
scale_x_discrete(labels = c('BM','AR','NR')) +
theme(axis.title = element_text(family = "Century Gothic",
size = 14, face = "bold"),
axis.text = element_text(family="Century Gothic",size = 14),
legend.position = "none")
ggsave(fig_sumDetections10s, filename = "figs/fig_sumDetections10s.png", width=12, height=7, device = png(), units="in", dpi = 300); dev.off()
```
![No significant difference in overall number of detections across treatment types.](figs/fig_sumDetections10s.png)
## Detections by species traits
How does the cumulative number of detections vary by treatment type, as a function of whether a species is a rainforest specialist or an open country generalist? (These calculations are repeated for different temporal durations to assess differences, if any)
```{r}
# First we merge the species trait dataset with the nDetections dataframe (across different temporal durations)
detections_trait10s <- nDetections_10s %>%
pivot_longer(cols=IP:HSWP, names_to="Species_Code", values_to="count") %>%
left_join(.,trait_dat, by=c("Species_Code"="species_annotation_codes"))
detections_trait30s <- nDetections_30s %>%
pivot_longer(cols=IP:HSWP, names_to="Species_Code", values_to="count") %>%
left_join(.,trait_dat, by=c("Species_Code"="species_annotation_codes"))
detections_trait1min <- nDetections_1min %>%
pivot_longer(cols=IP:HSWP, names_to="Species_Code", values_to="count") %>%
left_join(.,trait_dat, by=c("Species_Code"="species_annotation_codes"))
detections_trait2min <- nDetections_2min %>%
pivot_longer(cols=IP:HSWP, names_to="Species_Code", values_to="count") %>%
left_join(.,trait_dat, by=c("Species_Code"="species_annotation_codes"))
detections_trait4min <- nDetections_4min %>%
pivot_longer(cols=IP:HSWP, names_to="Species_Code", values_to="count") %>%
left_join(.,trait_dat, by=c("Species_Code"="species_annotation_codes"))
# Calculate overall number of detections for each site as a function of rainforest species and open-country species and test for differences across treatment types (Calculated only for the smallest temporal duration - 10s)
detections_trait10s <- detections_trait10s %>%
dplyr::select(Site, Restoration.type, Species_Code, habitat, count) %>%
group_by(Site, Restoration.type, habitat) %>%
summarise(sumDetections = sum(count)) %>%
drop_na()
# Split the above data into rainforest species and open country
detections_10s_rainforest <- detections_trait10s %>%
filter(habitat=="RF")
detections_10s_openCountry <- detections_trait10s %>%
filter(habitat=="OC")
# Test if there are significant differences in detections across treatment types as a function of species trait
anova_rainforestDet <- aov(sumDetections~Restoration.type, data = detections_10s_rainforest)
anova_opencountryDet <- aov(sumDetections~Restoration.type, data = detections_10s_openCountry)
# Tukey test to study each pair of treatment - reveals no signficant difference across treatment types
tukey_rainforestDet <- TukeyHSD(anova_rainforestDet)
tukey_opencountryDet <- TukeyHSD(anova_opencountryDet)
# The above results for rainforest birds reveal a significant difference in rainforest bird detections between benchmark sites and active sites and benchmark sites and passive sites.
# For open-country birds, there is a significant difference between every pair of treatment type.
# Calculating overall number of detections for other temporal durations as a function of species trait
detections_trait30s <- detections_trait30s %>%
dplyr::select(Site, Restoration.type, Species_Code, habitat, count) %>%
group_by(Site, Restoration.type, habitat) %>%
summarise(sumDetections = sum(count))
detections_trait1min <- detections_trait1min %>%
dplyr::select(Site, Restoration.type, Species_Code, habitat, count) %>%
group_by(Site, Restoration.type, habitat) %>%
summarise(sumDetections = sum(count))
detections_trait2min <- detections_trait2min %>%
dplyr::select(Site, Restoration.type, Species_Code, habitat, count) %>%
group_by(Site, Restoration.type, habitat) %>%
summarise(sumDetections = sum(count))
detections_trait4min <- detections_trait4min %>%
dplyr::select(Site, Restoration.type, Species_Code, habitat, count) %>%
group_by(Site, Restoration.type, habitat) %>%
summarise(sumDetections = sum(count))
# Plot the figures
# reordering factors for plotting
detections_trait10s$Restoration.type <- factor(detections_trait10s$Restoration.type, levels = c("Benchmark", "Active", "Passive"))
fig_detections_trait10s <- ggplot(detections_trait10s, aes(x=Restoration.type, y=sumDetections, fill=habitat)) +
geom_boxplot(alpha=0.7) +
scale_fill_scico_d(palette = "roma",
labels=c("Open country","Rainforest")) +
theme_bw() +
labs(x="\nTreatment Type",
y="Cumulative number of detections\n") +
scale_x_discrete(labels = c('BM','AR','NR')) +
theme(axis.title = element_text(family="Century Gothic",
size = 14, face = "bold"),
axis.text = element_text(family="Century Gothic",size = 14),
legend.title = element_text(family="Century Gothic",
size = 14, face = "bold"),
legend.key.size = unit(1,"cm"),
legend.text = element_text(family="Century Gothic",size = 14))
# Save the above plot
ggsave(fig_detections_trait10s, filename = "figs/fig_detections_trait10s.png", width=12, height=7,device = png(), units="in", dpi = 300); dev.off()
```
![Overall number of rainforest bird species detections differed between BM and AR and BM and NR sites. For open-country bird species detections, we observed significant differences across every pair of treatment types.](figs/fig_detections_trait10s.png)