The data for New York, Moscow and the world were extracted from the temperatures database (the database was recreated locally from the downloaded .csv files in order to include an executable query in R Markdown).
SELECT sub.year, world, new_york, c2.avg_temp AS moscow
FROM (SELECT g.year, c1.avg_temp AS new_york, g.avg_temp AS world
FROM city_data AS c1
JOIN global_data AS g USING(year)
WHERE c1.city = 'New York') sub
JOIN city_data as c2 USING(year)
WHERE c2.city = 'Moscow'
ORDER BY 1;
year | world | new_york | moscow |
---|---|---|---|
1750 | 8.72 | 10.07 | 4.84 |
1751 | 7.98 | 10.79 | 5.07 |
1752 | 5.78 | 2.81 | -2.08 |
1753 | 8.39 | 9.52 | 3.87 |
1754 | 8.47 | 9.88 | 4.07 |
1755 | 8.36 | 6.61 | 3.99 |
1756 | 8.85 | 9.94 | 4.51 |
1757 | 9.02 | 8.89 | 4.66 |
1758 | 6.74 | 8.15 | 2.22 |
1759 | 7.99 | 9.01 | 3.45 |
The data type was converted from character to numeric for calculations. The missing data for New York in 1780 were filled in using linear interpolation, as recommended for time series data with trends and without seasonality [1].
## 'data.frame': 264 obs. of 4 variables:
## $ year : num 1750 1751 1752 1753 1754 ...
## $ world : num 8.72 7.98 5.78 8.39 8.47 8.36 8.85 9.02 6.74 7.99 ...
## $ new_york: num 10.07 10.79 2.81 9.52 9.88 ...
## $ moscow : num 4.84 5.07 -2.08 3.87 4.07 3.99 4.51 4.66 2.22 3.45 ...
## year world new_york moscow
## Min. :1750 Min. :5.780 Min. : 0.250 Min. :-2.080
## 1st Qu.:1816 1st Qu.:8.078 1st Qu.: 9.068 1st Qu.: 3.310
## Median :1882 Median :8.365 Median : 9.545 Median : 4.005
## Mean :1882 Mean :8.359 Mean : 9.470 Mean : 4.001
## 3rd Qu.:1947 3rd Qu.:8.700 3rd Qu.:10.023 3rd Qu.: 4.630
## Max. :2013 Max. :9.730 Max. :12.160 Max. : 6.800
The raw data were plotted as a line chart.
The simple moving averages were calculated for all three temprature variables to minimize the fluctuations [2]. The new data were againg plotted as line charts. Two different windows - of 5 and of 10 years - were used to see the trends in shorter and longer time perspective.
World - New York: 0.51
World - Moscow: 0.66
New York - Moscow: 0.42
World - New York: 0.65
World - Moscow: 0.83
New York - Moscow: 0.58
World - New York: 0.76
World - Moscow: 0.89
New York - Moscow: 0.7
To estimate the local temperatures in New York or Moscow depending on the global temperature level, we can use the differences obtained earlier from the line charts and summary statistics and add ~1ÂșC for New York or subtract ~4ÂșC for Moscow. Another option is scatterplots with trendlines. It should be kept in mind that for New York the estimates based on global temperatures in the range of 7.7-8.6 ÂșC may be less accurate.
### Step 1
library(DBI)
db <- dbConnect(RSQLite::SQLite(), dbname = 'avg_temp.db')
SELECT sub.year, world, new_york, c2.avg_temp AS moscow
FROM (SELECT g.year, c1.avg_temp AS new_york, g.avg_temp AS world
FROM city_data AS c1
JOIN global_data AS g USING(year)
WHERE c1.city = 'New York') sub
JOIN city_data as c2 USING(year)
WHERE c2.city = 'Moscow'
ORDER BY 1;
### Step 2
avg_temp$year = as.numeric(avg_temp$year)
avg_temp$world = as.numeric(avg_temp$world)
avg_temp$new_york = as.numeric(avg_temp$new_york)
avg_temp$moscow = as.numeric(avg_temp$moscow)
library(imputeTS)
avg_temp <- na.interpolation(avg_temp)
str(avg_temp)
summary(avg_temp)
###Step 3
plot_temp <- function(year, world, new_york, moscow, title) {
plot(year,world, type='l', col='black', main=title, ylim = c(-5, 15), ylab = 'Average Temperature, ÂșC', xlim = c(1750, 2020), xlab = 'Years', xaxp = c(1750, 2020, 27), las=2)
lines(year, moscow, col='blue')
lines(year, new_york, col='red')
grid(nx = 28, ny=22, col = 'lightgray', lty = 'dotted')
legend('bottomright', legend = c('World', 'New York', 'Moscow'), col = c('black', 'red', 'blue'), lty=1, bty = "n", pt.cex = 2, cex = 0.8, text.col = "black")
}
plot_temp(avg_temp$year, avg_temp$world, avg_temp$new_york, avg_temp$moscow, 'Temperature Trends')
### Step 4
window = 5
avgtype = "s" # for testing other moving average types
library(pracma)
avg_temp$world_1 <- movavg(avg_temp$world, window, avgtype)
avg_temp$new_york_1 <- movavg(avg_temp$new_york, window, avgtype)
avg_temp$moscow_1 <- movavg(avg_temp$moscow, window, avgtype)
plot_temp(avg_temp$year, avg_temp$world_1, avg_temp$new_york_1, avg_temp$moscow_1, 'Temperature Trends\n(SMA for 5 years)')
window = 10
avg_temp$world_2 <- movavg(avg_temp$world, window, avgtype)
avg_temp$new_york_2 <- movavg(avg_temp$new_york, window, avgtype)
avg_temp$moscow_2 <- movavg(avg_temp$moscow, window, avgtype)
plot_temp(avg_temp$year, avg_temp$world_2, avg_temp$new_york_2, avg_temp$moscow_2, 'Temperature Trends\n(SMA for 10 years)')
### Step 5
city_scatter <- function(world, city, city_name) {
plot(world, city, main = paste("Average yearly temperature: World vs", city_name), xlab = "World", ylab = city_name)
grid(col = "lightgray", lty = "dotted")
abline(lm(city~world), col="red")
}
city_scatter(avg_temp$world_2, avg_temp$new_york_2, "New York")
city_scatter(avg_temp$world_2, avg_temp$moscow_2, "Moscow")