Chapter 3 Spatial Lattice Data Analysis

3.1 Prerequisites

library(tmap)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.

3.2 Columbus Dataset

example(columbus)
## 
## colmbs> columbus <- st_read(system.file("shapes/columbus.shp", package="spData")[1], quiet=TRUE)
## 
## colmbs> col.gal.nb <- read.gal(system.file("weights/columbus.gal", package="spData")[1])
plot(columbus$geometry)

library(tmap)
tm_shape(columbus) + tm_polygons()
## Warning: Currect projection of shape columbus unknown. Long-lat (WGS84) is
## assumed.

3.2.1 Plot of Crime Variable

tm_shape(columbus) + tm_polygons(style="quantile", col = "CRIME") +
  tm_legend(outside = TRUE, text.size = .8)
## Warning: Currect projection of shape columbus unknown. Long-lat (WGS84) is
## assumed.

3.3 Obtain the Centroids

#columbus <- st_read(system.file("shapes/columbus.shp", package="spData")[1], quiet=TRUE)
#col.gal.nb <- read.gal(system.file("weights/columbus.gal", package="spData")[1])
coords <- st_coordinates(st_centroid(st_geometry(columbus)))

3.4 Obtain the Neighbourhood Relationship

3.4.1 Contiguity Based Neighbours

3.4.1.1 Rook style NB

nbRook <- poly2nb(as(columbus, "Spatial"), queen = FALSE)
class(nbRook)
## [1] "nb"
nbRook
## Neighbour list object:
## Number of regions: 49 
## Number of nonzero links: 200 
## Percentage nonzero weights: 8.329863 
## Average number of links: 4.081633

As you can see this is an “nb” class object and for each polygon (in the Columbus dataset we have 49 polygons) in our polygon object, nbRook lists all neighboring polygons. For example, to see the neighbors for the first polygon in the object, type:

nbRook[[1]]
## [1] 2 3

Polygon 1 has 2 neighbours according to Queen Style Contiguity Based Neighbour definition. These 2 neighbours are Poly2 and Poly3.

plot(st_geometry(columbus))
plot(nbRook, coords, add=TRUE, col="blue")

3.4.1.2 Queen style NB

nbQueen <- poly2nb(as(columbus, "Spatial"), queen = TRUE)
plot(st_geometry(columbus))
plot(nbQueen, coords, add=TRUE, col="red")

3.4.1.3 Difference between Rook and Queen

dQueenRook <- diffnb(nbQueen, nbRook)
plot(st_geometry(columbus))
plot(nbRook, coords, add = TRUE, col = "blue")
plot(dQueenRook, coords, add = TRUE, col = "red")
title(main=paste("Differences (red) in Rook",
 "and polygon generated queen weights", sep="\n"), cex.main=0.6)

# poly2nb with sf sfc_MULTIPOLYGON objects

3.4.2 Distance Based Neighbours

3.4.2.1 1st Nearest Neighbour

whoisthefirstnear=knearneigh(coords,k=1,longlat=TRUE)
knn1columbus=knn2nb(whoisthefirstnear)
plot(st_geometry(columbus))
plot(knn1columbus, coords, add=TRUE, col="blue")

3.4.2.2 Two Nearest Neighbour

whois2near=knearneigh(coords,k=2,longlat=TRUE)
knn2columbus=knn2nb(whois2near)
plot(st_geometry(columbus))
plot(knn2columbus, coords, add=TRUE, col="blue")

3.4.2.3 Critical Cut-off

distBetwNeigh1=nbdists(knn1columbus,coords,longlat=TRUE)
all.linkedTresh=max(unlist(distBetwNeigh1))
all.linkedTresh
## [1] 67.50447
dnbTresh1=dnearneigh(coords,0,68,longlat=TRUE)
summary(dnbTresh1)
## Neighbour list object:
## Number of regions: 49 
## Number of nonzero links: 252 
## Percentage nonzero weights: 10.49563 
## Average number of links: 5.142857 
## Link number distribution:
## 
##  1  2  3  4  5  6  7  8  9 10 11 
##  4  8  6  2  5  8  6  2  6  1  1 
## 4 least connected regions:
## 6 10 21 47 with 1 link
## 1 most connected region:
## 28 with 11 links
plot(st_geometry(columbus))
plot(dnbTresh1, coords, add=TRUE, col="blue")

If you examine this neighbourhood list object a bit more closely you will see that, Polygon1 is neighbours with Poly2 and Poly3; Polygon2 is neighbours with Poly1 and Poly4; Polygon3 is neighbours with Poly1, Poly4 and Poly5 and so on:

dnbTresh1[1:3]
## [[1]]
## [1] 2 3
## 
## [[2]]
## [1] 1 4
## 
## [[3]]
## [1] 1 4 5

In calculation of the weights that will be used to create the spatially lagged variables, only the neighbours values are going to be used. For example for the 1st Polygon, the values of neighbouring Poly2 and Poly3 will be used. Before doing this the nb values need to be standardized, so that the row

3.5 Neighbours to Weights Matrix

Here we will work with the Critical Cut-off nearest neighbours

dnbTresh1.listw=nb2listw(dnbTresh1,style="W",zero.policy=FALSE)
class(dnbTresh1.listw)
## [1] "listw" "nb"
dnbTresh1.listw$weights[1:5]
## [[1]]
## [1] 0.5 0.5
## 
## [[2]]
## [1] 0.5 0.5
## 
## [[3]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## [[4]]
## [1] 0.25 0.25 0.25 0.25
## 
## [[5]]
## [1] 0.2 0.2 0.2 0.2 0.2

In matrix format rather than a list:

listw2mat(dnbTresh1.listw)[1:5,1:5]
##        [,1] [,2] [,3]      [,4]      [,5]
## 1 0.0000000 0.50 0.50 0.0000000 0.0000000
## 2 0.5000000 0.00 0.00 0.5000000 0.0000000
## 3 0.3333333 0.00 0.00 0.3333333 0.3333333
## 4 0.0000000 0.25 0.25 0.0000000 0.0000000
## 5 0.0000000 0.00 0.20 0.0000000 0.0000000

3.6 Moran’s I

3.6.1 Manual Calculation of Moran’s I Statistic with Matrix Operations

We can manually calculate the Moran’s I statistic using the lag operation with our chosen weights matrix, nearest neighbours within 68kms of radius.

weightsmatrix_s = listw2mat(dnbTresh1.listw)
Y_s = columbus$CRIME - mean(columbus$CRIME)

t(Y_s)%*%weightsmatrix_s%*%Y_s/(t(Y_s)%*%Y_s)
##           [,1]
## [1,] 0.5518257

The spatially lagged values of Y variable are obtained with W*Y as in your Moran’s I test statistic. Moran’s I is measuring the dependency of your Y values to the neighbouring Y values after all.

3.6.2 Moran’s I Test with moran.test() function

The same value can be simply obtained using the moran.test() function.

moran.test(columbus$CRIME,dnbTresh1.listw)
## 
##  Moran I test under randomisation
## 
## data:  columbus$CRIME  
## weights: dnbTresh1.listw    
## 
## Moran I statistic standard deviate = 5.6206, p-value = 9.514e-09
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.55182569       -0.02083333        0.01038068

Examine the following:

lagY_s = weightsmatrix_s%*%Y_s
head(lagY_s)
##         [,1]
## 1 -10.414556
## 2 -11.071954
## 3  -2.180407
## 4 -13.120658
## 5  12.195025
## 6  -4.612907

This is the spatialy lagged value for the Y variable. Technically it is the average Y values of the neighbouring polygons around each and every single polygon. For example take the 1st polygon, the lagged value of the 1st polygon is the average of the values for 2nd and 3rd polygons:

mean(c(Y_s[2], Y_s[3]))
## [1] -10.41456
# We can automatically create spatially lagged values of Y variable, this corresponds to W*Y in your Moran's I test statistic:
lagY_s <- lag.listw(dnbTresh1.listw, Y_s)
lm_eqn <- function(df, y, x){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

dataMoransI = data.frame(Y_s, lagY_s)

library(ggplot2)

ggplot(dataMoransI, aes(x = Y_s, y = lagY_s)) + geom_point() + geom_smooth(method = "lm", se=FALSE)+geom_text(x = -10, y = 20, label = lm_eqn(dataMoransI,lagY_s,Y_s), parse = TRUE)+geom_vline(xintercept=0, linetype="dashed", color = "red")+geom_hline(yintercept=0.78, linetype="dashed", color = "red")
## `geom_smooth()` using formula = 'y ~ x'

Here the slope of the lm is the Moran’s I value. The same plot can be obtained using the moran.plot() function as follows:

moran.plot(columbus$CRIME, dnbTresh1.listw)

3.7 Local Moran’s I

localmoranstatistics = localmoran(Y_s,dnbTresh1.listw)
head(localmoranstatistics)
##           Ii          E.Ii      Var.Ii      Z.Ii Pr(z != E(Ii))
## 1 0.73681849 -0.0285985420 0.666144891 0.9378077     0.34834327
## 2 0.65915397 -0.0202502140 0.475741297 0.9850149     0.32461676
## 3 0.03579329 -0.0015396867 0.024041007 0.2407777     0.80972741
## 4 0.13113818 -0.0005707572 0.006541757 1.6284260     0.10343458
## 5 0.69380337 -0.0184931910 0.162742823 1.7656716     0.07745096
## 6 0.15242670 -0.0062384562 0.303777355 0.2878749     0.77344247
columbus$localmoran = localmoranstatistics[,1]

Plot of the Local Moran Statistics

tm_shape(columbus) + tm_polygons(style="quantile", col = "localmoran") +
     tm_legend(outside = TRUE, text.size = .8)
## Warning: Currect projection of shape columbus unknown. Long-lat (WGS84) is
## assumed.
## Variable(s) "localmoran" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

3.8 Week 2 of Lattice

3.9 OLS

OLScolumbus <- lm(CRIME ~ INC + HOVAL, data=columbus)
yhat <- predict(OLScolumbus)
resids = resid(OLScolumbus)
plot(OLScolumbus, which=1, col=c("blue")) # Residuals vs Fitted Plot

plot(OLScolumbus, which=2, col=c("red"))  # Q-Q Plot

3.10 Moran’s I Test of Residual

ols.resid.moran <- lm.morantest(OLScolumbus, dnbTresh1.listw)
summary(ols.resid.moran)
##             Length Class  Mode     
## statistic   1      -none- numeric  
## p.value     1      -none- numeric  
## estimate    3      -none- numeric  
## method      1      -none- character
## alternative 1      -none- character
## data.name   1      -none- character
ols.resid.moran
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: dnbTresh1.listw
## 
## Moran I statistic standard deviate = 3.0562, p-value = 0.001121
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.266348450     -0.032344498      0.009551988