Chapter 3 Spatial Lattice Data Analysis
3.1 Prerequisites
library(tmap)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
3.2 Columbus Dataset
example(columbus)
##
## colmbs> columbus <- st_read(system.file("shapes/columbus.shp", package="spData")[1], quiet=TRUE)
##
## colmbs> col.gal.nb <- read.gal(system.file("weights/columbus.gal", package="spData")[1])
plot(columbus$geometry)
library(tmap)
tm_shape(columbus) + tm_polygons()
## Warning: Currect projection of shape columbus unknown. Long-lat (WGS84) is
## assumed.
3.3 Obtain the Centroids
#columbus <- st_read(system.file("shapes/columbus.shp", package="spData")[1], quiet=TRUE)
#col.gal.nb <- read.gal(system.file("weights/columbus.gal", package="spData")[1])
<- st_coordinates(st_centroid(st_geometry(columbus))) coords
3.4 Obtain the Neighbourhood Relationship
3.4.1 Contiguity Based Neighbours
3.4.1.1 Rook style NB
<- poly2nb(as(columbus, "Spatial"), queen = FALSE)
nbRook class(nbRook)
## [1] "nb"
nbRook
## Neighbour list object:
## Number of regions: 49
## Number of nonzero links: 200
## Percentage nonzero weights: 8.329863
## Average number of links: 4.081633
As you can see this is an “nb” class object and for each polygon (in the Columbus dataset we have 49 polygons) in our polygon object, nbRook lists all neighboring polygons. For example, to see the neighbors for the first polygon in the object, type:
1]] nbRook[[
## [1] 2 3
Polygon 1 has 2 neighbours according to Queen Style Contiguity Based Neighbour definition. These 2 neighbours are Poly2 and Poly3.
plot(st_geometry(columbus))
plot(nbRook, coords, add=TRUE, col="blue")
3.4.1.2 Queen style NB
<- poly2nb(as(columbus, "Spatial"), queen = TRUE)
nbQueen plot(st_geometry(columbus))
plot(nbQueen, coords, add=TRUE, col="red")
3.4.1.3 Difference between Rook and Queen
<- diffnb(nbQueen, nbRook)
dQueenRook plot(st_geometry(columbus))
plot(nbRook, coords, add = TRUE, col = "blue")
plot(dQueenRook, coords, add = TRUE, col = "red")
title(main=paste("Differences (red) in Rook",
"and polygon generated queen weights", sep="\n"), cex.main=0.6)
# poly2nb with sf sfc_MULTIPOLYGON objects
3.4.2 Distance Based Neighbours
3.4.2.1 1st Nearest Neighbour
=knearneigh(coords,k=1,longlat=TRUE)
whoisthefirstnear=knn2nb(whoisthefirstnear)
knn1columbusplot(st_geometry(columbus))
plot(knn1columbus, coords, add=TRUE, col="blue")
3.4.2.2 Two Nearest Neighbour
=knearneigh(coords,k=2,longlat=TRUE)
whois2near=knn2nb(whois2near)
knn2columbusplot(st_geometry(columbus))
plot(knn2columbus, coords, add=TRUE, col="blue")
3.4.2.3 Critical Cut-off
=nbdists(knn1columbus,coords,longlat=TRUE)
distBetwNeigh1=max(unlist(distBetwNeigh1))
all.linkedTresh all.linkedTresh
## [1] 67.50447
=dnearneigh(coords,0,68,longlat=TRUE)
dnbTresh1summary(dnbTresh1)
## Neighbour list object:
## Number of regions: 49
## Number of nonzero links: 252
## Percentage nonzero weights: 10.49563
## Average number of links: 5.142857
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11
## 4 8 6 2 5 8 6 2 6 1 1
## 4 least connected regions:
## 6 10 21 47 with 1 link
## 1 most connected region:
## 28 with 11 links
plot(st_geometry(columbus))
plot(dnbTresh1, coords, add=TRUE, col="blue")
If you examine this neighbourhood list object a bit more closely you will see that, Polygon1 is neighbours with Poly2 and Poly3; Polygon2 is neighbours with Poly1 and Poly4; Polygon3 is neighbours with Poly1, Poly4 and Poly5 and so on:
1:3] dnbTresh1[
## [[1]]
## [1] 2 3
##
## [[2]]
## [1] 1 4
##
## [[3]]
## [1] 1 4 5
In calculation of the weights that will be used to create the spatially lagged variables, only the neighbours values are going to be used. For example for the 1st Polygon, the values of neighbouring Poly2 and Poly3 will be used. Before doing this the nb values need to be standardized, so that the row
3.5 Neighbours to Weights Matrix
Here we will work with the Critical Cut-off nearest neighbours
=nb2listw(dnbTresh1,style="W",zero.policy=FALSE)
dnbTresh1.listwclass(dnbTresh1.listw)
## [1] "listw" "nb"
$weights[1:5] dnbTresh1.listw
## [[1]]
## [1] 0.5 0.5
##
## [[2]]
## [1] 0.5 0.5
##
## [[3]]
## [1] 0.3333333 0.3333333 0.3333333
##
## [[4]]
## [1] 0.25 0.25 0.25 0.25
##
## [[5]]
## [1] 0.2 0.2 0.2 0.2 0.2
In matrix format rather than a list:
listw2mat(dnbTresh1.listw)[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## 1 0.0000000 0.50 0.50 0.0000000 0.0000000
## 2 0.5000000 0.00 0.00 0.5000000 0.0000000
## 3 0.3333333 0.00 0.00 0.3333333 0.3333333
## 4 0.0000000 0.25 0.25 0.0000000 0.0000000
## 5 0.0000000 0.00 0.20 0.0000000 0.0000000
3.6 Moran’s I
3.6.1 Manual Calculation of Moran’s I Statistic with Matrix Operations
We can manually calculate the Moran’s I statistic using the lag operation with our chosen weights matrix, nearest neighbours within 68kms of radius.
= listw2mat(dnbTresh1.listw)
weightsmatrix_s = columbus$CRIME - mean(columbus$CRIME)
Y_s
t(Y_s)%*%weightsmatrix_s%*%Y_s/(t(Y_s)%*%Y_s)
## [,1]
## [1,] 0.5518257
The spatially lagged values of Y variable are obtained with W*Y as in your Moran’s I test statistic. Moran’s I is measuring the dependency of your Y values to the neighbouring Y values after all.
3.6.2 Moran’s I Test with moran.test() function
The same value can be simply obtained using the moran.test() function.
moran.test(columbus$CRIME,dnbTresh1.listw)
##
## Moran I test under randomisation
##
## data: columbus$CRIME
## weights: dnbTresh1.listw
##
## Moran I statistic standard deviate = 5.6206, p-value = 9.514e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.55182569 -0.02083333 0.01038068
Examine the following:
= weightsmatrix_s%*%Y_s
lagY_s head(lagY_s)
## [,1]
## 1 -10.414556
## 2 -11.071954
## 3 -2.180407
## 4 -13.120658
## 5 12.195025
## 6 -4.612907
This is the spatialy lagged value for the Y variable. Technically it is the average Y values of the neighbouring polygons around each and every single polygon. For example take the 1st polygon, the lagged value of the 1st polygon is the average of the values for 2nd and 3rd polygons:
mean(c(Y_s[2], Y_s[3]))
## [1] -10.41456
# We can automatically create spatially lagged values of Y variable, this corresponds to W*Y in your Moran's I test statistic:
<- lag.listw(dnbTresh1.listw, Y_s) lagY_s
<- function(df, y, x){
lm_eqn <- lm(y ~ x, df);
m <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
eq list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
= data.frame(Y_s, lagY_s)
dataMoransI
library(ggplot2)
ggplot(dataMoransI, aes(x = Y_s, y = lagY_s)) + geom_point() + geom_smooth(method = "lm", se=FALSE)+geom_text(x = -10, y = 20, label = lm_eqn(dataMoransI,lagY_s,Y_s), parse = TRUE)+geom_vline(xintercept=0, linetype="dashed", color = "red")+geom_hline(yintercept=0.78, linetype="dashed", color = "red")
## `geom_smooth()` using formula = 'y ~ x'
Here the slope of the lm is the Moran’s I value. The same plot can be obtained using the moran.plot() function as follows:
moran.plot(columbus$CRIME, dnbTresh1.listw)
3.7 Local Moran’s I
= localmoran(Y_s,dnbTresh1.listw)
localmoranstatistics head(localmoranstatistics)
## Ii E.Ii Var.Ii Z.Ii Pr(z != E(Ii))
## 1 0.73681849 -0.0285985420 0.666144891 0.9378077 0.34834327
## 2 0.65915397 -0.0202502140 0.475741297 0.9850149 0.32461676
## 3 0.03579329 -0.0015396867 0.024041007 0.2407777 0.80972741
## 4 0.13113818 -0.0005707572 0.006541757 1.6284260 0.10343458
## 5 0.69380337 -0.0184931910 0.162742823 1.7656716 0.07745096
## 6 0.15242670 -0.0062384562 0.303777355 0.2878749 0.77344247
$localmoran = localmoranstatistics[,1] columbus
Plot of the Local Moran Statistics
tm_shape(columbus) + tm_polygons(style="quantile", col = "localmoran") +
tm_legend(outside = TRUE, text.size = .8)
## Warning: Currect projection of shape columbus unknown. Long-lat (WGS84) is
## assumed.
## Variable(s) "localmoran" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
3.9 OLS
<- lm(CRIME ~ INC + HOVAL, data=columbus)
OLScolumbus <- predict(OLScolumbus)
yhat = resid(OLScolumbus)
resids plot(OLScolumbus, which=1, col=c("blue")) # Residuals vs Fitted Plot
plot(OLScolumbus, which=2, col=c("red")) # Q-Q Plot
3.10 Moran’s I Test of Residual
<- lm.morantest(OLScolumbus, dnbTresh1.listw)
ols.resid.moran summary(ols.resid.moran)
## Length Class Mode
## statistic 1 -none- numeric
## p.value 1 -none- numeric
## estimate 3 -none- numeric
## method 1 -none- character
## alternative 1 -none- character
## data.name 1 -none- character
ols.resid.moran
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: dnbTresh1.listw
##
## Moran I statistic standard deviate = 3.0562, p-value = 0.001121
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.266348450 -0.032344498 0.009551988