You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
2.7 KiB
R
61 lines
2.7 KiB
R
5 years ago
|
# Goal: Experiment with fitting nonlinear functional forms in
|
||
|
# OLS, using orthogonal polynomials to avoid difficulties with
|
||
|
# near-singular design matrices that occur with ordinary polynomials.
|
||
|
# Shriya Anand, Gabor Grothendieck, Ajay Shah, March 2006.
|
||
|
|
||
|
# We will deal with noisy data from the d.g.p. y = sin(x) + e
|
||
|
x <- seq(0, 2*pi, length.out=50)
|
||
|
set.seed(101)
|
||
|
y <- sin(x) + 0.3*rnorm(50)
|
||
|
basicplot <- function(x, y, minx=0, maxx=3*pi, title="") {
|
||
|
plot(x, y, xlim=c(minx,maxx), ylim=c(-2,2), main=title)
|
||
|
lines(x, sin(x), col="blue", lty=2, lwd=2)
|
||
|
abline(h=0, v=0)
|
||
|
}
|
||
|
x.outsample <- seq(0, 3*pi, length.out=100)
|
||
|
|
||
|
# Severe multicollinearity with ordinary polynomials
|
||
|
x2 <- x*x
|
||
|
x3 <- x2*x
|
||
|
x4 <- x3*x
|
||
|
cor(cbind(x, x2, x3, x4))
|
||
|
# and a perfect design matrix using orthogonal polynomials
|
||
|
m <- poly(x, 4)
|
||
|
all.equal(cor(m), diag(4)) # Correlation matrix is I.
|
||
|
|
||
|
par(mfrow=c(2,2))
|
||
|
# Ordinary polynomial regression --
|
||
|
p <- lm(y ~ x + I(x^2) + I(x^3) + I(x^4))
|
||
|
summary(p)
|
||
|
basicplot(x, y, title="Polynomial, insample") # Data
|
||
|
lines(x, fitted(p), col="red", lwd=3) # In-sample
|
||
|
basicplot(x, y, title="Polynomial, out-of-sample")
|
||
|
predictions.p <- predict(p, list(x = x.outsample)) # Out-of-sample
|
||
|
lines(x.outsample, predictions.p, type="l", col="red", lwd=3)
|
||
|
lines(x.outsample, sin(x.outsample), type="l", col="blue", lwd=2, lty=2)
|
||
|
# As expected, polynomial fitting gives terrible results out of sample.
|
||
|
|
||
|
# These IDENTICAL things using orthogonal polynomials
|
||
|
d <- lm(y ~ poly(x, 4))
|
||
|
summary(d)
|
||
|
basicplot(x, y, title="Orth. poly., insample") # Data
|
||
|
lines(x, fitted(d), col="red", lwd=3) # In-sample
|
||
|
basicplot(x, y, title="Orth. poly., out-of-sample")
|
||
|
predictions.op <- predict(d, list(x = x.outsample)) # Out-of-sample
|
||
|
lines(x.outsample, predictions.op, type="l", col="red", lwd=3)
|
||
|
lines(x.outsample, sin(x.outsample), type="l", col="blue", lwd=2, lty=2)
|
||
|
|
||
|
# predict(d) is magical! See ?SafePrediction
|
||
|
# The story runs at two levels. First, when you do an OLS model,
|
||
|
# predict()ion requires applying coefficients to an appropriate
|
||
|
# X matrix. But one level deeper, the polynomial or orthogonal-polynomial
|
||
|
# needs to be utilised for computing the X matrix based on the
|
||
|
# supplied x.outsample data.
|
||
|
# If you say p <- poly(x, n)
|
||
|
# then you can say predict(p, new) where predict.poly() gets invoked.
|
||
|
# And when you say predict(lm()), the full steps are worked out for
|
||
|
# you automatically: predict.poly() is used to make an X matrix and
|
||
|
# then prediction based on the regression results is done.
|
||
|
|
||
|
all.equal(predictions.p, predictions.op) # Both paths are identical for this
|
||
|
# (tame) problem.
|