WoRking with time seRies data in R

Denis Arnold

Date and Time

xkcd cc-by-nc 2.5

Standards

xkcd cc-by-nc 2.5

Dates and time in base R

bsp1=as.POSIXct("2025-02-13 11:00:00")
str(bsp1)
 POSIXct[1:1], format: "2025-02-13 11:00:00"
bsp2=as.POSIXct("2025-02-13 12:30:00")
print(bsp2)
[1] "2025-02-13 12:30:00 UTC"
print(bsp1-bsp2)
Time difference of -1.5 hours
grep("12:30",c(bsp1,bsp2))
[1] 2

What about non ISO Dates?

bsp3=as.POSIXct("13.02.2025 12:30", format="%d.%m.%Y %H:%M")
print(bsp3)
[1] "2025-02-13 12:30:00 UTC"
bsp4=as.POSIXct("12.02.2025 12:30", format="%m.%d.%Y %H:%M")
print(bsp4)
[1] "2025-12-02 12:30:00 UTC"

Further reading

Further reading II

Data

The University and City Library of Cologne keeps track how many visitors are inside the public part of the building. The provided csv file contains all data for January 2025.

Download

Download data

Download data

Data

ls -lah data/USB_occupancy_2025-01.csv
file data/USB_occupancy_2025-01.csv
head -n 5 data/USB_occupancy_2025-01.csv
-rw-rw-rw-. 1 root root 1.5M Dec 15 10:39 data/USB_occupancy_2025-01.csv
bash: line 2: file: command not found
"tstamp","num_entries","num_exits","num_occupancy"
"2025-01-01 00:00:00",0,0,0
"2025-01-01 00:01:00",0,0,0
"2025-01-01 00:02:00",0,0,0
"2025-01-01 00:03:00",0,0,0

Data

jan=read.csv("data/USB_occupancy_2025-01.csv")
str(jan)
'data.frame':   44609 obs. of  4 variables:
 $ tstamp       : chr  "2025-01-01 00:00:00" "2025-01-01 00:01:00" "2025-01-01 00:02:00" "2025-01-01 00:03:00" ...
 $ num_entries  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ num_exits    : int  0 0 0 0 0 0 0 0 0 0 ...
 $ num_occupancy: int  0 0 0 0 0 0 0 0 0 0 ...
# How many data point do we expect?
31*24*60
[1] 44640
# % Data missing
((31*24*60)-dim(jan)[1])/(31*24*60)
[1] 0.0006944444

Converting to dates

jan=read.csv("data/USB_occupancy_2025-01.csv")
class(jan$tstamp)
[1] "character"
jan$tstamp=as.POSIXct(jan$tstamp)
class(jan$tstamp)
[1] "POSIXct" "POSIXt" 

A first plot of the data

library(ggplot2)

jan=read.csv("data/USB_occupancy_2025-01.csv")
jan$tstamp=as.POSIXct(jan$tstamp)
ggplot(jan,aes(x=tstamp,y=num_occupancy))+geom_point(shape=".")

A first plot of the data

Plotting data from 2025-01-15

Code
library(ggplot2)

jan=read.csv("data/USB_occupancy_2025-01.csv")
jan$tstamp=as.POSIXct(jan$tstamp)
ggplot(jan[grep("2025-01-15",jan$tstamp),],aes(x=tstamp,y=num_occupancy))+geom_point()

Weeks

jan=read.csv("data/USB_occupancy_2025-01.csv")
jan$tstamp=as.POSIXct(jan$tstamp)
jan$week=strftime(jan$tstamp,format="%W")
str(jan$week)
 chr [1:44609] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" ...

Weekdays

jan=read.csv("data/USB_occupancy_2025-01.csv")
jan$tstamp=as.POSIXct(jan$tstamp)
Sys.setlocale("LC_TIME","en_US.utf8")
[1] ""
jan$weekday=weekdays(jan$tstamp)
jan$weekday=factor(x=jan$weekday,
                  levels=c("Monday","Tuesday","Wednesday",
                           "Thursday","Friday","Saturday",
                           "Sunday"))
str(jan$weekday)
 Factor w/ 7 levels "Monday","Tuesday",..: 3 3 3 3 3 3 3 3 3 3 ...

Plotting all data by weekday and week

library(ggplot2)

jan=read.csv("data/USB_occupancy_2025-01.csv")
jan$tstamp=as.POSIXct(jan$tstamp)
Sys.setlocale("LC_TIME","en_US.utf8")
jan$weekday=weekdays(jan$tstamp)
jan$weekday=factor(x=jan$weekday,
                  levels=c("Monday","Tuesday","Wednesday",
                           "Thursday","Friday","Saturday",
                           "Sunday"))
jan$week=strftime(jan$tstamp,format="%W")

ggplot(jan,aes(x=tstamp,y=num_occupancy))+
  geom_point(shape=".")+facet_grid(week~weekday)

Plotting all data by weekday and week

Exploiting the double nature of POSIXct

library(ggplot2)

jan=read.csv("data/USB_occupancy_2025-01.csv")
jan$tstamp=as.POSIXct(jan$tstamp)
invisible(Sys.setlocale("LC_TIME","en_US.utf8"))
jan$weekday=weekdays(jan$tstamp)
jan$weekday=factor(x=jan$weekday,
                  levels=c("Monday","Tuesday","Wednesday",
                           "Thursday","Friday","Saturday",
                           "Sunday"))
jan$week=strftime(jan$tstamp,format="%W")
jan$time_day=format(jan$tstamp,format="%H:%M:%S")

ggplot(jan,aes(x=time_day,y=num_occupancy))+
  geom_point(shape=".")+facet_grid(week~weekday)

Exploiting the double nature

THANK YOU!

Contact


 


Denis Arnold

denis.arnold@uni-koeln.de