Adding R programming language
This commit is contained in:
parent
599b63599b
commit
026079a47d
@ -0,0 +1,31 @@
|
||||
# Goals: A first look at R objects - vectors, lists, matrices, data frames.
|
||||
|
||||
# To make vectors "x" "y" "year" and "names"
|
||||
x <- c(2,3,7,9)
|
||||
y <- c(9,7,3,2)
|
||||
year <- 1990:1993
|
||||
names <- c("payal", "shraddha", "kritika", "itida")
|
||||
# Accessing the 1st and last elements of y --
|
||||
y[1]
|
||||
y[length(y)]
|
||||
|
||||
# To make a list "person" --
|
||||
person <- list(name="payal", x=2, y=9, year=1990)
|
||||
person
|
||||
# Accessing things inside a list --
|
||||
person$name
|
||||
person$x
|
||||
|
||||
# To make a matrix, pasting together the columns "year" "x" and "y"
|
||||
# The verb cbind() stands for "column bind"
|
||||
cbind(year, x, y)
|
||||
|
||||
# To make a "data frame", which is a list of vectors of the same length --
|
||||
D <- data.frame(names, year, x, y)
|
||||
nrow(D)
|
||||
# Accessing one of these vectors
|
||||
D$names
|
||||
# Accessing the last element of this vector
|
||||
D$names[nrow(D)]
|
||||
# Or equally,
|
||||
D$names[length(D$names)]
|
11
r/A histogram with tails shown in red.r
Normal file
11
r/A histogram with tails shown in red.r
Normal file
@ -0,0 +1,11 @@
|
||||
# Goal: A histogram with tails shown in red.
|
||||
|
||||
# This happened on the R mailing list on 7 May 2004.
|
||||
# This is by Martin Maechler <maechler@stat.math.ethz.ch>, who was
|
||||
# responding to a slightly imperfect version of this by
|
||||
# "Guazzetti Stefano" <Stefano.Guazzetti@ausl.re.it>
|
||||
|
||||
x <- rnorm(1000)
|
||||
hx <- hist(x, breaks=100, plot=FALSE)
|
||||
plot(hx, col=ifelse(abs(hx$breaks) < 1.669, 4, 2))
|
||||
# What is cool is that "col" is supplied a vector.
|
67
r/ARMA modeling - estimation, diagnostics, forecasting.r
Normal file
67
r/ARMA modeling - estimation, diagnostics, forecasting.r
Normal file
@ -0,0 +1,67 @@
|
||||
# Goals: ARMA modeling - estimation, diagnostics, forecasting.
|
||||
|
||||
|
||||
# 0. SETUP DATA
|
||||
rawdata <- c(-0.21,-2.28,-2.71,2.26,-1.11,1.71,2.63,-0.45,-0.11,4.79,5.07,-2.24,6.46,3.82,4.29,-1.47,2.69,7.95,4.46,7.28,3.43,-3.19,-3.14,-1.25,-0.50,2.25,2.77,6.72,9.17,3.73,6.72,6.04,10.62,9.89,8.23,5.37,-0.10,1.40,1.60,3.40,3.80,3.60,4.90,9.60,18.20,20.60,15.20,27.00,15.42,13.31,11.22,12.77,12.43,15.83,11.44,12.32,12.10,12.02,14.41,13.54,11.36,12.97,10.00,7.20,8.74,3.92,8.73,2.19,3.85,1.48,2.28,2.98,4.21,3.85,6.52,8.16,5.36,8.58,7.00,10.57,7.12,7.95,7.05,3.84,4.93,4.30,5.44,3.77,4.71,3.18,0.00,5.25,4.27,5.14,3.53,4.54,4.70,7.40,4.80,6.20,7.29,7.30,8.38,3.83,8.07,4.88,8.17,8.25,6.46,5.96,5.88,5.03,4.99,5.87,6.78,7.43,3.61,4.29,2.97,2.35,2.49,1.56,2.65,2.49,2.85,1.89,3.05,2.27,2.91,3.94,2.34,3.14,4.11,4.12,4.53,7.11,6.17,6.25,7.03,4.13,6.15,6.73,6.99,5.86,4.19,6.38,6.68,6.58,5.75,7.51,6.22,8.22,7.45,8.00,8.29,8.05,8.91,6.83,7.33,8.52,8.62,9.80,10.63,7.70,8.91,7.50,5.88,9.82,8.44,10.92,11.67)
|
||||
|
||||
# Make a R timeseries out of the rawdata: specify frequency & startdate
|
||||
gIIP <- ts(rawdata, frequency=12, start=c(1991,4))
|
||||
print(gIIP)
|
||||
plot.ts(gIIP, type="l", col="blue", ylab="IIP Growth (%)", lwd=2,
|
||||
main="Full data")
|
||||
grid()
|
||||
|
||||
# Based on this, I decide that 4/1995 is the start of the sensible period.
|
||||
gIIP <- window(gIIP, start=c(1995,4))
|
||||
print(gIIP)
|
||||
plot.ts(gIIP, type="l", col="blue", ylab="IIP Growth (%)", lwd=2,
|
||||
main="Estimation subset")
|
||||
grid()
|
||||
|
||||
# Descriptive statistics about gIIP
|
||||
mean(gIIP); sd(gIIP); summary(gIIP);
|
||||
plot(density(gIIP), col="blue", main="(Unconditional) Density of IIP growth")
|
||||
acf(gIIP)
|
||||
|
||||
|
||||
# 1. ARMA ESTIMATION
|
||||
m.ar2 <- arima(gIIP, order = c(2,0,0))
|
||||
print(m.ar2) # Print it out
|
||||
|
||||
|
||||
# 2. ARMA DIAGNOSTICS
|
||||
tsdiag(m.ar2) # His pretty picture of diagnostics
|
||||
## Time series structure in errors
|
||||
print(Box.test(m.ar2$residuals, lag=12, type="Ljung-Box"));
|
||||
## Sniff for ARCH
|
||||
print(Box.test(m.ar2$residuals^2, lag=12, type="Ljung-Box"));
|
||||
## Eyeball distribution of residuals
|
||||
plot(density(m.ar2$residuals), col="blue", xlim=c(-8,8),
|
||||
main=paste("Residuals of AR(2)"))
|
||||
|
||||
|
||||
# 3. FORECASTING
|
||||
## Make a picture of the residuals
|
||||
plot.ts(m.ar2$residual, ylab="Innovations", col="blue", lwd=2)
|
||||
s <- sqrt(m.ar2$sigma2)
|
||||
abline(h=c(-s,s), lwd=2, col="lightGray")
|
||||
|
||||
p <- predict(m.ar2, n.ahead = 12) # Make 12 predictions.
|
||||
print(p)
|
||||
|
||||
## Watch the forecastability decay away from fat values to 0.
|
||||
## sd(x) is the naive sigma. p$se is the prediction se.
|
||||
gain <- 100*(1-p$se/sd(gIIP))
|
||||
plot.ts(gain, main="Gain in forecast s.d.", ylab="Per cent",
|
||||
col="blue", lwd=2)
|
||||
|
||||
## Make a pretty picture that puts it all together
|
||||
ts.plot(gIIP, p$pred, p$pred-1.96*p$se, p$pred+1.96*p$se,
|
||||
gpars=list(lty=c(1,1,2,2), lwd=c(2,2,1,1),
|
||||
ylab="IIP growth (%)", col=c("blue","red", "red", "red")))
|
||||
grid()
|
||||
abline(h=mean(gIIP), lty=2, lwd=2, col="lightGray")
|
||||
legend(x="bottomleft", cex=0.8, bty="n",
|
||||
lty=c(1,1,2,2), lwd=c(2,1,1,2),
|
||||
col=c("blue", "red", "red", "lightGray"),
|
||||
legend=c("IIP", "AR(2) forecasts", "95% C.I.", "Mean IIP growth"))
|
16
r/Add Two Vectors.r
Normal file
16
r/Add Two Vectors.r
Normal file
@ -0,0 +1,16 @@
|
||||
> x
|
||||
[1] 3 6 8
|
||||
> y
|
||||
[1] 2 9 0
|
||||
|
||||
> x + y
|
||||
[1] 5 15 8
|
||||
|
||||
> x + 1 # 1 is recycled to (1,1,1)
|
||||
[1] 4 7 9
|
||||
|
||||
> x + c(1,4) # (1,4) is recycled to (1,4,1) but warning issued
|
||||
[1] 4 10 9
|
||||
Warning message:
|
||||
In x + c(1, 4) :
|
||||
longer object length is not a multiple of shorter object length
|
33
r/All manner of import and export of datasets.r
Normal file
33
r/All manner of import and export of datasets.r
Normal file
@ -0,0 +1,33 @@
|
||||
# Goal: All manner of import and export of datasets.
|
||||
|
||||
# Invent a dataset --
|
||||
A <- data.frame(
|
||||
name=c("a","b","c"),
|
||||
ownership=c("Case 1","Case 1","Case 2"),
|
||||
listed.at=c("NSE",NA,"BSE"),
|
||||
# Firm "b" is unlisted.
|
||||
is.listed=c(TRUE,FALSE,TRUE),
|
||||
# R convention - boolean variables are named "is.something"
|
||||
x=c(2.2,3.3,4.4),
|
||||
date=as.Date(c("2004-04-04","2005-05-05","2006-06-06"))
|
||||
)
|
||||
|
||||
# To a spreadsheet through a CSV file --
|
||||
write.table(A,file="demo.csv",sep = ",",col.names = NA,qmethod = "double")
|
||||
B <- read.table("demo.csv", header = TRUE, sep = ",", row.names = 1)
|
||||
|
||||
# To R as a binary file --
|
||||
save(A, file="demo.rda")
|
||||
load("demo.rda")
|
||||
|
||||
# To the Open XML standard for transport for statistical data --
|
||||
library(StatDataML)
|
||||
writeSDML(A, "/tmp/demo.sdml")
|
||||
B <- readSDML("/tmp/demo.sdml")
|
||||
|
||||
# To Stata --
|
||||
library(foreign)
|
||||
write.dta(A, "/tmp/demo.dta")
|
||||
B <- read.dta("/tmp/demo.dta")
|
||||
|
||||
# foreign::write.foreign() also has a pathway to SAS and SPSS.
|
33
r/An example of simulation-based inference.r
Normal file
33
r/An example of simulation-based inference.r
Normal file
@ -0,0 +1,33 @@
|
||||
# Goal: An example of simulation-based inference.
|
||||
# This is in the context of testing for time-series dependence in
|
||||
# stock market returns data.
|
||||
# The code here does the idea of Kim, Nelson, Startz (1991).
|
||||
# We want to use the distribution of realworld returns data, without
|
||||
# needing assumptions about normality.
|
||||
# The null is lack of dependence (i.e. an efficient market).
|
||||
# So repeatedly, the data is permuted, and the sample ACF is computed.
|
||||
# This gives us the distribution of the ACF under H0: independence, but
|
||||
# while using the empirical distribution of the returns data.
|
||||
|
||||
# Weekly returns on Nifty, 1/1/2002 to 31/12/2003, 104 weeks of data.
|
||||
r <- c(-0.70031182197603, 0.421690133064168, -1.20098072984689, 0.143402360644984, 3.81836537549516, 3.17055939373247, 0.305580301919228, 1.23853814691852, 0.81584795095706, -1.51865139747764, -2.71223626421522, -0.784836480094242, 1.09180041170998, 0.397649587762761, -4.11309534220923, -0.263912425099111, -0.0410144239805454, 1.75756212770972, -2.3335373897992, -2.19228764624217, -3.64578978183987, 1.92535789661354, 3.45782867883164, -2.15532607229374, -0.448039988298987, 1.50124793565896, -1.45871585874362, -2.13459863369767, -6.2128068251802, -1.94482987066289, 0.751294815735637, 1.78244982829590, 1.61567494389745, 1.53557708728931, -1.53557708728931, -0.322061470004265, -2.28394919698225, 0.70399304137414, -2.93580952607737, 2.38125098034425, 0.0617697039252185, -4.14482733720716, 2.04397528093754, 0.576400673606603, 3.43072725191913, 2.96465382864843, 2.89833358015583, 1.85387040058336, 1.52136515035952, -0.637268376944444, 1.75418926224609, -0.804391905851354, -0.861816058320475, 0.576902488444109, -2.84259880663331, -1.35375536139417, 1.49096529042234, -2.05404881010045, 2.86868849528146, -0.258270670200478, -4.4515881438687, -1.73055019137092, 3.04427015714648, -2.94928202352018, 1.62081315773994, -6.83117945164824, -0.962715713711582, -1.75875847071740, 1.50330330252721, -0.0479705789653728, 3.68968303215933, -0.535807567290103, 3.94034871061182, 3.85787174417738, 0.932185956989873, 4.08598654183674, 2.27343783689715, 1.13958830440017, 2.01737201171230, -1.88131458327554, 1.97596267156648, 2.79857144562001, 2.22470306481695, 2.03212951411427, 4.95626853448883, 3.40400972901396, 3.03840139165246, -1.89863129741417, -3.70832135042951, 4.78478922155396, 4.3973589590097, 4.9667050392987, 2.99775078737081, -4.12349101552438, 3.25638269809945, 2.29683376253966, -2.64772825878214, -0.630835277076258, 4.72528848505451, 1.87368447333380, 3.17543946162564, 4.58174427843208, 3.23625985632168, 2.29777651227296)
|
||||
|
||||
# The 1st autocorrelation from the sample:
|
||||
acf(r, 1, plot=FALSE)$acf[2]
|
||||
|
||||
# Obtain 1000 draws from the distribution of the 1st autocorrelation
|
||||
# under the null of independence:
|
||||
set.seed <- 101
|
||||
simulated <- replicate(1000, acf(r[sample(1:104, replace=FALSE)], 1, plot=FALSE)$acf[2])
|
||||
# At 95% --
|
||||
quantile(simulated, probs=c(.025,.975))
|
||||
# At 99% --
|
||||
quantile(simulated, probs=c(.005,.995))
|
||||
|
||||
# So we can reject the null at 95% but not at 99%.
|
||||
|
||||
# A pretty picture.
|
||||
plot(density(simulated), col="blue")
|
||||
abline(v=0)
|
||||
abline(v=quantile(simulated, probs=c(.025,.975)), lwd=2, col="purple")
|
||||
abline(v=acf(r, 1, plot=FALSE)$acf[2], lty=2, lwd=4, col="yellow")
|
52
r/Associative arrays (as in awk) or hashes (as in perl).r
Normal file
52
r/Associative arrays (as in awk) or hashes (as in perl).r
Normal file
@ -0,0 +1,52 @@
|
||||
# Goal: Associative arrays (as in awk) or hashes (as in perl).
|
||||
# Or, more generally, adventures in R addressing.
|
||||
|
||||
# Here's a plain R vector:
|
||||
x <- c(2,3,7,9)
|
||||
# But now I tag every elem with labels:
|
||||
names(x) <- c("kal","sho","sad","aja")
|
||||
# Associative array operations:
|
||||
x["kal"] <- 12
|
||||
# Pretty printing the entire associative array:
|
||||
x
|
||||
|
||||
# This works for matrices too:
|
||||
m <- matrix(runif(10), nrow=5)
|
||||
rownames(m) <- c("violet","indigo","blue","green","yellow")
|
||||
colnames(m) <- c("Asia","Africa")
|
||||
# The full matrix --
|
||||
m
|
||||
# Or even better --
|
||||
library(xtable)
|
||||
xtable(m)
|
||||
|
||||
# Now address symbolically --
|
||||
m[,"Africa"]
|
||||
m["indigo",]
|
||||
m["indigo","Africa"]
|
||||
|
||||
# The "in" operator, as in awk --
|
||||
for (colour in c("yellow", "orange", "red")) {
|
||||
if (colour %in% rownames(m)) {
|
||||
cat("For Africa and ", colour, " we have ", m[colour, "Africa"], "\n")
|
||||
} else {
|
||||
cat("Colour ", colour, " does not exist in the hash.\n")
|
||||
}
|
||||
}
|
||||
|
||||
# This works for data frames also --
|
||||
D <- data.frame(m)
|
||||
D
|
||||
# Look closely at what happened --
|
||||
str(D) # The colours are the rownames(D).
|
||||
|
||||
# Operations --
|
||||
D$Africa
|
||||
D[,"Africa"]
|
||||
D["yellow",]
|
||||
# or
|
||||
subset(D, rownames(D)=="yellow")
|
||||
|
||||
colnames(D) <- c("Antarctica","America")
|
||||
D
|
||||
D$America
|
16
r/Binary to Decimal.r
Normal file
16
r/Binary to Decimal.r
Normal file
@ -0,0 +1,16 @@
|
||||
Binary to Decimal
|
||||
# Program to convert decimal
|
||||
# number into binary number
|
||||
# using recursive function
|
||||
|
||||
convert_to_binary <- function(n) {
|
||||
|
||||
if(n > 1) {
|
||||
convert_to_binary(as.integer(n/2))
|
||||
}
|
||||
cat(n %% 2)
|
||||
}
|
||||
Output
|
||||
|
||||
> convert_to_binary(52)
|
||||
110100
|
25
r/Check Armstrong number.r
Normal file
25
r/Check Armstrong number.r
Normal file
@ -0,0 +1,25 @@
|
||||
Check Armstrong number
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
|
||||
# initialize sum
|
||||
sum = 0
|
||||
|
||||
# find the sum of the cube of each digit
|
||||
temp = num
|
||||
while(temp > 0) {
|
||||
digit = temp %% 10
|
||||
sum = sum + (digit ^ 3)
|
||||
temp = floor(temp / 10)
|
||||
}
|
||||
|
||||
# display the result
|
||||
if(num == sum) {
|
||||
print(paste(num, "is an Armstrong number"))
|
||||
} else {
|
||||
print(paste(num, "is not an Armstrong number"))
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a number: 23
|
||||
[1] "23 is not an Armstrong number"
|
27
r/Check Leap Year.r
Normal file
27
r/Check Leap Year.r
Normal file
@ -0,0 +1,27 @@
|
||||
Check Leap Year
|
||||
# Program to check if
|
||||
# the input year is
|
||||
# a leap year or not
|
||||
|
||||
year = as.integer(readline(prompt="Enter a year: "))
|
||||
if((year %% 4) == 0) {
|
||||
if((year %% 100) == 0) {
|
||||
if((year %% 400) == 0) {
|
||||
print(paste(year,"is a leap year"))
|
||||
} else {
|
||||
print(paste(year,"is not a leap year"))
|
||||
}
|
||||
} else {
|
||||
print(paste(year,"is a leap year"))
|
||||
}
|
||||
} else {
|
||||
print(paste(year,"is not a leap year"))
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a year: 1900
|
||||
[1] "1900 is not a leap year"
|
||||
Output 2
|
||||
|
||||
Enter a year: 2000
|
||||
[1] "2000 is a leap year"
|
21
r/Check Odd and Even Number.r
Normal file
21
r/Check Odd and Even Number.r
Normal file
@ -0,0 +1,21 @@
|
||||
Check Odd and Even Number
|
||||
# Program to check if
|
||||
# the input number is odd or even.
|
||||
# A number is even if division
|
||||
# by 2 give a remainder of 0.
|
||||
# If remainder is 1, it is odd.
|
||||
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
if((num %% 2) == 0) {
|
||||
print(paste(num,"is Even"))
|
||||
} else {
|
||||
print(paste(num,"is Odd"))
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a number: 89
|
||||
[1] "89 is Odd"
|
||||
Output 2
|
||||
|
||||
Enter a number: 0
|
||||
[1] "0 is Even"
|
24
r/Check Positive, Negative or Zero.r
Normal file
24
r/Check Positive, Negative or Zero.r
Normal file
@ -0,0 +1,24 @@
|
||||
Check Positive, Negative or Zero
|
||||
# In this program, we input a number
|
||||
# check if the number is positive or
|
||||
# negative or zero and display
|
||||
# an appropriate message
|
||||
|
||||
num = as.double(readline(prompt="Enter a number: "))
|
||||
if(num > 0) {
|
||||
print("Positive number")
|
||||
} else {
|
||||
if(num == 0) {
|
||||
print("Zero")
|
||||
} else {
|
||||
print("Negative number")
|
||||
}
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a number: -9.6
|
||||
[1] "Negative number"
|
||||
Output 2
|
||||
|
||||
Enter a number: 2
|
||||
[1] "Positive number"
|
27
r/Check Prime Number.r
Normal file
27
r/Check Prime Number.r
Normal file
@ -0,0 +1,27 @@
|
||||
Check Prime Number
|
||||
# Program to check if
|
||||
# the input number is
|
||||
# prime or not
|
||||
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
|
||||
flag = 0
|
||||
# prime numbers are greater than 1
|
||||
if(num > 1) {
|
||||
# check for factors
|
||||
flag = 1
|
||||
for(i in 2:(num-1)) {
|
||||
if ((num %% i) == 0) {
|
||||
flag = 0
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(num == 2) flag = 1
|
||||
if(flag == 1) {
|
||||
print(paste(num,"is a prime number"))
|
||||
} else {
|
||||
print(paste(num,"is not a prime number"))
|
||||
}
|
30
r/Compute LCM in R.r
Normal file
30
r/Compute LCM in R.r
Normal file
@ -0,0 +1,30 @@
|
||||
Compute LCM in R
|
||||
# Program to find the L.C.M. of two input number
|
||||
lcm <- function(x, y) {
|
||||
# choose the greater number
|
||||
if(x > y) {
|
||||
greater = x
|
||||
} else {
|
||||
greater = y
|
||||
}
|
||||
|
||||
while(TRUE) {
|
||||
if((greater %% x == 0) && (greater %% y == 0)) {
|
||||
lcm = greater
|
||||
break
|
||||
}
|
||||
greater = greater + 1
|
||||
}
|
||||
return(lcm)
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
num1 = as.integer(readline(prompt = "Enter first number: "))
|
||||
num2 = as.integer(readline(prompt = "Enter second number: "))
|
||||
|
||||
print(paste("The L.C.M. of", num1,"and", num2,"is", lcm(num1, num2)))
|
||||
Output
|
||||
|
||||
Enter first number: 24
|
||||
Enter second number: 25
|
||||
[1] "The L.C.M. of 24 and 25 is 600"
|
@ -0,0 +1,15 @@
|
||||
# Goals: Do bootstrap inference, as an example, for a sample median.
|
||||
|
||||
library(boot)
|
||||
|
||||
samplemedian <- function(x, d) { # d is a vector of integer indexes
|
||||
return(median(x[d])) # The genius is in the x[d] notation
|
||||
}
|
||||
|
||||
data <- rnorm(50) # Generate a dataset with 50 obs
|
||||
b <- boot(data, samplemedian, R=2000) # 2000 bootstrap replications
|
||||
cat("Sample median has a sigma of ", sd(b$t[,1]), "\n")
|
||||
plot(b)
|
||||
|
||||
# Make a 99% confidence interval
|
||||
boot.ci(b, conf=0.99, type="basic")
|
44
r/Doing OLS.r
Normal file
44
r/Doing OLS.r
Normal file
@ -0,0 +1,44 @@
|
||||
# Goal: Simulate a dataset from the OLS model and obtain
|
||||
# obtain OLS estimates for it.
|
||||
|
||||
x <- runif(100, 0, 10) # 100 draws from U(0,10)
|
||||
y <- 2 + 3*x + rnorm(100) # beta = [2, 3] and sigma = 1
|
||||
|
||||
# You want to just look at OLS results?
|
||||
summary(lm(y ~ x))
|
||||
|
||||
# Suppose x and y were packed together in a data frame --
|
||||
D <- data.frame(x,y)
|
||||
summary(lm(y ~ x, D))
|
||||
|
||||
# Full and elaborate steps --
|
||||
d <- lm(y ~ x)
|
||||
# Learn about this object by saying ?lm and str(d)
|
||||
# Compact model results --
|
||||
print(d)
|
||||
# Pretty graphics for regression diagnostics --
|
||||
par(mfrow=c(2,2))
|
||||
plot(d)
|
||||
|
||||
d <- summary(d)
|
||||
# Detailed model results --
|
||||
print(d)
|
||||
# Learn about this object by saying ?summary.lm and by saying str(d)
|
||||
cat("OLS gave slope of ", d$coefficients[2,1],
|
||||
"and a error sigma of ", d$sigma, "\n")
|
||||
|
||||
|
||||
## I need to drop down to a smaller dataset now --
|
||||
x <- runif(10)
|
||||
y <- 2 + 3*x + rnorm(10)
|
||||
m <- lm(y ~ x)
|
||||
|
||||
# Now R supplies a wide range of generic functions which extract
|
||||
# useful things out of the result of estimation of many kinds of models.
|
||||
|
||||
residuals(m)
|
||||
fitted(m)
|
||||
AIC(m)
|
||||
AIC(m, k=log(10)) # SBC
|
||||
vcov(m)
|
||||
logLik(m)
|
55
r/Dummy variables in regression.r
Normal file
55
r/Dummy variables in regression.r
Normal file
@ -0,0 +1,55 @@
|
||||
# Goal: "Dummy variables" in regression.
|
||||
|
||||
# Suppose you have this data:
|
||||
people = data.frame(
|
||||
age = c(21,62,54,49,52,38),
|
||||
education = c("college", "school", "none", "school", "college", "none"),
|
||||
education.code = c( 2, 1, 0, 1, 2, 0 )
|
||||
)
|
||||
# Here people$education is a string categorical variable and
|
||||
# people$education.code is the same thing, with a numerical coding system.
|
||||
people
|
||||
|
||||
# Note the structure of the dataset --
|
||||
str(people)
|
||||
# The strings supplied for `education' have been treated (correctly) as
|
||||
# a factor, but education.code is being treated as an integer and not as
|
||||
# a factor.
|
||||
|
||||
|
||||
# We want to do a dummy variable regression. Normally you would have:
|
||||
# 1 Chosen college as the omitted category
|
||||
# 2 Made a dummy for "none" named educationnone
|
||||
# 3 Made a dummy for "school" named educationschool
|
||||
# 4 Ran a regression like lm(age ~ educationnone + educationschool, people)
|
||||
# But this is R. Things are cool:
|
||||
lm(age ~ education, people)
|
||||
|
||||
# ! :-)
|
||||
# When you feed him an explanatory variable like education, he does all
|
||||
# these steps automatically. (He chose college as the omitted category).
|
||||
|
||||
# If you use an integer coding, then the obvious thing goes wrong --
|
||||
lm(age ~ education.code, people)
|
||||
# because he's thinking that education.code is an integer explanatory
|
||||
# variable. So you need to:
|
||||
|
||||
lm(age ~ factor(education.code), people)
|
||||
# (he choose a different omitted category)
|
||||
|
||||
# Alternatively, fix up the dataset --
|
||||
people$education.code <- factor(people$education.code)
|
||||
lm(age ~ education.code, people)
|
||||
|
||||
#
|
||||
# Bottom line:
|
||||
# Once the dataset has categorical variables correctly represented as factors, i.e. as
|
||||
str(people)
|
||||
# doing OLS in R induces automatic generation of dummy variables while leaving one out:
|
||||
lm(age ~ education, people)
|
||||
lm(age ~ education.code, people)
|
||||
|
||||
# But what if you want the X matrix?
|
||||
m <- lm(age ~ education, people)
|
||||
model.matrix(m)
|
||||
# This is the design matrix that went into the regression m.
|
38
r/Fibonacci Sequence in R.r
Normal file
38
r/Fibonacci Sequence in R.r
Normal file
@ -0,0 +1,38 @@
|
||||
Fibonacci Sequence in R
|
||||
# Program to diplay the Fibonacci
|
||||
# sequence up to n-th term using
|
||||
# recursive functions
|
||||
|
||||
recurse_fibonacci <- function(n) {
|
||||
if(n <= 1) {
|
||||
return(n)
|
||||
} else {
|
||||
return(recurse_fibonacci(n-1) + recurse_fibonacci(n-2))
|
||||
}
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
nterms = as.integer(readline(prompt="How many terms? "))
|
||||
|
||||
# check if the number of terms is valid
|
||||
if(nterms <= 0) {
|
||||
print("Plese enter a positive integer")
|
||||
} else {
|
||||
print("Fibonacci sequence:")
|
||||
for(i in 0:(nterms-1)) {
|
||||
print(recurse_fibonacci(i))
|
||||
}
|
||||
}
|
||||
Output
|
||||
|
||||
How many terms? 9
|
||||
[1] "Fibonacci sequence:"
|
||||
[1] 0
|
||||
[1] 1
|
||||
[1] 1
|
||||
[1] 2
|
||||
[1] 3
|
||||
[1] 5
|
||||
[1] 8
|
||||
[1] 13
|
||||
[1] 21
|
14
r/Find Factorial of a number using recursion.r
Normal file
14
r/Find Factorial of a number using recursion.r
Normal file
@ -0,0 +1,14 @@
|
||||
Find Factorial of a number using recursion
|
||||
recur_factorial <- function(n) {
|
||||
if(n <= 1) {
|
||||
return(1)
|
||||
} else {
|
||||
return(n * recur_factorial(n-1))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Output
|
||||
|
||||
> recur_factorial(5)
|
||||
[1] 120
|
33
r/Find Minimum and Maximum.r
Normal file
33
r/Find Minimum and Maximum.r
Normal file
@ -0,0 +1,33 @@
|
||||
Find Minimum and Maximum
|
||||
> x
|
||||
[1] 5 8 3 9 2 7 4 6 10
|
||||
|
||||
> # find the minimum
|
||||
> min(x)
|
||||
[1] 2
|
||||
|
||||
> # find the maximum
|
||||
> max(x)
|
||||
[1] 10
|
||||
|
||||
> # find the range
|
||||
> range(x)
|
||||
[1] 2 10
|
||||
If we want to find where the minimum or maximum is located, i.e. the index instead of the actual value, then we can use which.min() and which.max() functions.
|
||||
|
||||
Note that these functions will return the index of the first minimum or maximum in case multiple of them exists.
|
||||
|
||||
> x
|
||||
[1] 5 8 3 9 2 7 4 6 10
|
||||
|
||||
> # find index of the minimum
|
||||
> which.min(x)
|
||||
[1] 5
|
||||
|
||||
> # find index of the minimum
|
||||
> which.max(x)
|
||||
[1] 9
|
||||
|
||||
> # alternate way to find the minimum
|
||||
> x[which.min(x)]
|
||||
[1] 2
|
29
r/Find factors of a number.r
Normal file
29
r/Find factors of a number.r
Normal file
@ -0,0 +1,29 @@
|
||||
Find factors of a number
|
||||
print_factors <- function(x) {
|
||||
print(paste("The factors of",x,"are:"))
|
||||
for(i in 1:x) {
|
||||
if((x %% i) == 0) {
|
||||
print(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
Output
|
||||
|
||||
> print_factors(120)
|
||||
[1] "The factors of 120 are:"
|
||||
[1] 1
|
||||
[1] 2
|
||||
[1] 3
|
||||
[1] 4
|
||||
[1] 5
|
||||
[1] 6
|
||||
[1] 8
|
||||
[1] 10
|
||||
[1] 12
|
||||
[1] 15
|
||||
[1] 20
|
||||
[1] 24
|
||||
[1] 30
|
||||
[1] 40
|
||||
[1] 60
|
||||
[1] 120
|
21
r/Find sum of natural numbers without formula.r
Normal file
21
r/Find sum of natural numbers without formula.r
Normal file
@ -0,0 +1,21 @@
|
||||
Find sum of natural numbers without formula
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt = "Enter a number: "))
|
||||
|
||||
if(num < 0) {
|
||||
print("Enter a positive number")
|
||||
} else {
|
||||
sum = 0
|
||||
|
||||
# use while loop to iterate until zero
|
||||
while(num > 0) {
|
||||
sum = sum + num
|
||||
num = num - 1
|
||||
}
|
||||
|
||||
print(paste("The sum is", sum))
|
||||
}
|
||||
Output
|
||||
|
||||
Enter a number: 10
|
||||
[1] "The sum is 55"
|
20
r/Find the factorial of a number.r
Normal file
20
r/Find the factorial of a number.r
Normal file
@ -0,0 +1,20 @@
|
||||
#Find the factorial of a number
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
factorial = 1
|
||||
|
||||
# check is the number is negative, positive or zero
|
||||
if(num < 0) {
|
||||
print("Sorry, factorial does not exist for negative numbers")
|
||||
} else if(num == 0) {
|
||||
print("The factorial of 0 is 1")
|
||||
} else {
|
||||
for(i in 1:num) {
|
||||
factorial = factorial * i
|
||||
}
|
||||
print(paste("The factorial of", num ,"is",factorial))
|
||||
}
|
||||
Output
|
||||
|
||||
Enter a number: 8
|
||||
[1] "The factorial of 8 is 40320"
|
8
r/From Uniform Distribution.r
Normal file
8
r/From Uniform Distribution.r
Normal file
@ -0,0 +1,8 @@
|
||||
> runif(1) # generates 1 random number
|
||||
[1] 0.3984754
|
||||
|
||||
> runif(3) # generates 3 random number
|
||||
[1] 0.8090284 0.1797232 0.6803607
|
||||
|
||||
> runif(3, min=5, max=10) # define the range between 5 and 10
|
||||
[1] 7.099781 8.355461 5.173133
|
22
r/Handling missing data.r
Normal file
22
r/Handling missing data.r
Normal file
@ -0,0 +1,22 @@
|
||||
# Goal:
|
||||
# A stock is traded on 2 exchanges.
|
||||
# Price data is missing at random on both exchanges owing to non-trading.
|
||||
# We want to make a single price time-series utilising information
|
||||
# from both exchanges. I.e., missing data for exchange 1 will
|
||||
# be replaced by information for exchange 2 (if observed).
|
||||
|
||||
# Let's create some example data for the problem.
|
||||
e1 <- runif(15) # Prices on exchange 1
|
||||
e2 <- e1 + 0.05*rnorm(15) # Prices on exchange 2.
|
||||
cbind(e1, e2)
|
||||
# Blow away 5 points from each at random.
|
||||
e1[sample(1:15, 5)] <- NA
|
||||
e2[sample(1:15, 5)] <- NA
|
||||
cbind(e1, e2)
|
||||
|
||||
# Now how do we reconstruct a time-series that tries to utilise both?
|
||||
combined <- e1 # Do use the more liquid exchange here.
|
||||
missing <- is.na(combined)
|
||||
combined[missing] <- e2[missing] # if it's also missing, I don't care.
|
||||
cbind(e1, e2, combined)
|
||||
# There you are.
|
@ -0,0 +1,43 @@
|
||||
# Goal: Joint distributions, marginal distributions, useful tables.
|
||||
|
||||
# First let me invent some fake data
|
||||
set.seed(102) # This yields a good illustration.
|
||||
x <- sample(1:3, 15, replace=TRUE)
|
||||
education <- factor(x, labels=c("None", "School", "College"))
|
||||
x <- sample(1:2, 15, replace=TRUE)
|
||||
gender <- factor(x, labels=c("Male", "Female"))
|
||||
age <- runif(15, min=20,max=60)
|
||||
|
||||
D <- data.frame(age, gender, education)
|
||||
rm(x,age,gender,education)
|
||||
print(D)
|
||||
|
||||
# Table about education
|
||||
table(D$education)
|
||||
|
||||
# Table about education and gender --
|
||||
table(D$gender, D$education)
|
||||
# Joint distribution of education and gender --
|
||||
table(D$gender, D$education)/nrow(D)
|
||||
|
||||
# Add in the marginal distributions also
|
||||
addmargins(table(D$gender, D$education))
|
||||
addmargins(table(D$gender, D$education))/nrow(D)
|
||||
|
||||
# Generate a good LaTeX table out of it --
|
||||
library(xtable)
|
||||
xtable(addmargins(table(D$gender, D$education))/nrow(D),
|
||||
digits=c(0,2,2,2,2)) # You have to do | and \hline manually.
|
||||
|
||||
# Study age by education category
|
||||
by(D$age, D$gender, mean)
|
||||
by(D$age, D$gender, sd)
|
||||
by(D$age, D$gender, summary)
|
||||
|
||||
# Two-way table showing average age depending on education & gender
|
||||
a <- matrix(by(D$age, list(D$gender, D$education), mean), nrow=2)
|
||||
rownames(a) <- levels(D$gender)
|
||||
colnames(a) <- levels(D$education)
|
||||
print(a)
|
||||
# or, of course,
|
||||
print(xtable(a))
|
@ -0,0 +1,23 @@
|
||||
# Goals: Simulate a dataset from a "fixed effects" model, and
|
||||
# obtain "least squares dummy variable" (LSDV) estimates.
|
||||
#
|
||||
# We do this in the context of a familiar "earnings function" -
|
||||
# log earnings is quadratic in log experience, with parallel shifts by
|
||||
# education category.
|
||||
|
||||
# Create an education factor with 4 levels --
|
||||
education <- factor(sample(1:4,1000, replace=TRUE),
|
||||
labels=c("none", "school", "college", "beyond"))
|
||||
# Simulate an experience variable with a plausible range --
|
||||
experience <- 30*runif(1000) # experience from 0 to 20 years
|
||||
# Make the intercept vary by education category between 4 given values --
|
||||
intercept <- c(0.5,1,1.5,2)[education]
|
||||
|
||||
# Simulate the log earnings --
|
||||
log.earnings <- intercept +
|
||||
2*experience - 0.05*experience*experience + rnorm(1000)
|
||||
A <- data.frame(education, experience, e2=experience*experience, log.earnings)
|
||||
summary(A)
|
||||
|
||||
# The OLS path to LSDV --
|
||||
summary(lm(log.earnings ~ -1 + education + experience + e2, A))
|
31
r/Make a time-series object using the zoo package.r
Normal file
31
r/Make a time-series object using the zoo package.r
Normal file
@ -0,0 +1,31 @@
|
||||
# Goal: Make a time-series object using the "zoo" package
|
||||
|
||||
A <- data.frame(date=c("1995-01-01", "1995-01-02", "1995-01-03", "1995-01-06"),
|
||||
x=runif(4),
|
||||
y=runif(4))
|
||||
A$date <- as.Date(A$date) # yyyy-mm-dd is the default format
|
||||
# So far there's nothing new - it's just a data frame. I have hand-
|
||||
# constructed A but you could equally have obtained it using read.table().
|
||||
|
||||
# I want to make a zoo matrix out of the numerical columns of A
|
||||
library(zoo)
|
||||
B <- A
|
||||
B$date <- NULL
|
||||
z <- zoo(as.matrix(B), order.by=A$date)
|
||||
rm(A, B)
|
||||
|
||||
# So now you are holding "z", a "zoo" object. You can do many cool
|
||||
# things with it.
|
||||
# See http://www.google.com/search?hl=en&q=zoo+quickref+achim&btnI=I%27m+Feeling+Lucky
|
||||
|
||||
# To drop down to a plain data matrix, say
|
||||
C <- coredata(z)
|
||||
rownames(C) <- as.character(time(z))
|
||||
# Compare --
|
||||
str(C)
|
||||
str(z)
|
||||
|
||||
# The above is a tedious way of doing these things, designed to give you
|
||||
# an insight into what is going on. If you just want to read a file
|
||||
# into a zoo object, a very short path is something like:
|
||||
# z <- read.zoo(filename, format="%d %b %Y")
|
20
r/Make pictures in PDF files that can be put into a paper.r
Normal file
20
r/Make pictures in PDF files that can be put into a paper.r
Normal file
@ -0,0 +1,20 @@
|
||||
# Goal: Make pictures in PDF files that can be put into a paper.
|
||||
|
||||
xpts <- seq(-3,3,.05)
|
||||
|
||||
# Here is my suggested setup for a two-column picture --
|
||||
pdf("demo2.pdf", width=5.6, height=2.8, bg="cadetblue1", pointsize=8)
|
||||
par(mai=c(.6,.6,.2,.2))
|
||||
plot(xpts, sin(xpts*xpts), type="l", lwd=2, col="cadetblue4",
|
||||
xlab="x", ylab="sin(x*x)")
|
||||
grid(col="white", lty=1, lwd=.2)
|
||||
abline(h=0, v=0)
|
||||
|
||||
# My suggested setup for a square one-column picture --
|
||||
pdf("demo1.pdf", width=2.8, height=2.8, bg="cadetblue1", pointsize=8)
|
||||
par(mai=c(.6,.6,.2,.2))
|
||||
plot(xpts, sin(xpts*xpts), type="l", lwd=2, col="cadetblue4",
|
||||
xlab="x", ylab="sin(x*x)")
|
||||
grid(col="white", lty=1, lwd=.2)
|
||||
abline(h=0, v=0)
|
||||
|
25
r/Multiplication Table.r
Normal file
25
r/Multiplication Table.r
Normal file
@ -0,0 +1,25 @@
|
||||
Multiplication Table
|
||||
# Program to find the multiplication
|
||||
# table (from 1 to 10)
|
||||
# of a number input by the user
|
||||
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt = "Enter a number: "))
|
||||
|
||||
# use for loop to iterate 10 times
|
||||
for(i in 1:10) {
|
||||
print(paste(num,'x', i, '=', num*i))
|
||||
}
|
||||
Output
|
||||
|
||||
Enter a number: 7
|
||||
[1] "7 x 1 = 7"
|
||||
[1] "7 x 2 = 14"
|
||||
[1] "7 x 3 = 21"
|
||||
[1] "7 x 4 = 28"
|
||||
[1] "7 x 5 = 35"
|
||||
[1] "7 x 6 = 42"
|
||||
[1] "7 x 7 = 49"
|
||||
[1] "7 x 8 = 56"
|
||||
[1] "7 x 9 = 63"
|
||||
[1] "7 x 10 = 70"
|
@ -0,0 +1,13 @@
|
||||
# Goal: Display two series on one plot, one with a left y axis
|
||||
# and another with a right y axis.
|
||||
|
||||
y1 <- cumsum(rnorm(100))
|
||||
y2 <- cumsum(rnorm(100, mean=0.2))
|
||||
|
||||
par(mai=c(.8, .8, .2, .8))
|
||||
plot(1:100, y1, type="l", col="blue", xlab="X axis label", ylab="Left legend")
|
||||
par(new=TRUE)
|
||||
plot(1:100, y2, type="l", ann=FALSE, yaxt="n")
|
||||
axis(4)
|
||||
legend(x="topleft", bty="n", lty=c(1,1), col=c("blue","black"),
|
||||
legend=c("String 1 (left scale)", "String 2 (right scale)"))
|
99
r/Prices and returns.r
Normal file
99
r/Prices and returns.r
Normal file
@ -0,0 +1,99 @@
|
||||
# Goal: Prices and returns
|
||||
|
||||
# I like to multiply returns by 100 so as to have "units in percent".
|
||||
# In other words, I like it for 5% to be a value like 5 rather than 0.05.
|
||||
|
||||
###################################################################
|
||||
# I. Simulate random-walk prices, switch between prices & returns.
|
||||
###################################################################
|
||||
# Simulate a time-series of PRICES drawn from a random walk
|
||||
# where one-period returns are i.i.d. N(mu, sigma^2).
|
||||
ranrw <- function(mu, sigma, p0=100, T=100) {
|
||||
cumprod(c(p0, 1 + (rnorm(n=T, mean=mu, sd=sigma)/100)))
|
||||
}
|
||||
prices2returns <- function(x) {
|
||||
100*diff(log(x))
|
||||
}
|
||||
returns2prices <- function(r, p0=100) {
|
||||
c(p0, p0 * exp(cumsum(r/100)))
|
||||
}
|
||||
|
||||
cat("Simulate 25 points from a random walk starting at 1500 --\n")
|
||||
p <- ranrw(0.05, 1.4, p0=1500, T=25)
|
||||
# gives you a 25-long series, starting with a price of 1500, where
|
||||
# one-period returns are N(0.05,1.4^2) percent.
|
||||
print(p)
|
||||
|
||||
cat("Convert to returns--\n")
|
||||
r <- prices2returns(p)
|
||||
print(r)
|
||||
|
||||
cat("Go back from returns to prices --\n")
|
||||
goback <- returns2prices(r, 1500)
|
||||
print(goback)
|
||||
|
||||
###################################################################
|
||||
# II. Plenty of powerful things you can do with returns....
|
||||
###################################################################
|
||||
summary(r); sd(r) # summary statistics
|
||||
plot(density(r)) # kernel density plot
|
||||
acf(r) # Autocorrelation function
|
||||
ar(r) # Estimate a AIC-minimising AR model
|
||||
Box.test(r, lag=2, type="Ljung") # Box-Ljung test
|
||||
library(tseries)
|
||||
runs.test(factor(sign(r))) # Runs test
|
||||
bds.test(r) # BDS test.
|
||||
|
||||
###################################################################
|
||||
# III. Visualisation and the random walk
|
||||
###################################################################
|
||||
# I want to obtain intuition into what kinds of price series can happen,
|
||||
# given a starting price, a mean return, and a given standard deviation.
|
||||
# This function simulates out 10000 days of a price time-series at a time,
|
||||
# and waits for you to click in the graph window, after which a second
|
||||
# series is painted, and so on. Make the graph window very big and
|
||||
# sit back and admire.
|
||||
# The point is to eyeball many series and thus obtain some intuition
|
||||
# into what the random walk does.
|
||||
visualisation <- function(p0, s, mu, labelstring) {
|
||||
N <- 10000
|
||||
x <- (1:(N+1))/250 # Unit of years
|
||||
while (1) {
|
||||
plot(x, ranrw(mu, s, p0, N), ylab="Level", log="y",
|
||||
type="l", col="red", xlab="Time (years)",
|
||||
main=paste("40 years of a process much like", labelstring))
|
||||
grid()
|
||||
z=locator(1)
|
||||
}
|
||||
}
|
||||
|
||||
# Nifty -- assuming sigma of 1.4% a day and E(returns) of 13% a year
|
||||
visualisation(2600, 1.4, 13/250, "Nifty")
|
||||
|
||||
# The numerical values here are used to think about what the INR/USD
|
||||
# exchange rate would have looked like if it started from 31.37, had
|
||||
# a mean depreciation of 5% per year, and had the daily vol of a floating
|
||||
# exchange rate like EUR/USD.
|
||||
visualisation(31.37, 0.7, 5/365, "INR/USD (NOT!) with daily sigma=0.7")
|
||||
# This is of course not like the INR/USD series in the real world -
|
||||
# which is neither a random walk nor does it have a vol of 0.7% a day.
|
||||
|
||||
# The numerical values here are used to think about what the USD/EUR
|
||||
# exchange rate, starting with 1, having no drift, and having the observed
|
||||
# daily vol of 0.7. (This is about right).
|
||||
visualisation(1, 0.7, 0, "USD/EUR with no drift")
|
||||
|
||||
###################################################################
|
||||
# IV. A monte carlo experiment about the runs test
|
||||
###################################################################
|
||||
# Measure the effectiveness of the runs test when faced with an
|
||||
# AR(1) process of length 100 with a coeff of 0.1
|
||||
set.seed(101)
|
||||
one.ts <- function() {arima.sim(list(order = c(1,0,0), ar = 0.1), n=100)}
|
||||
table(replicate(1000, runs.test(factor(sign(one.ts())))$p.value < 0.05))
|
||||
# We find that the runs test throws up a prob value of below 0.05
|
||||
# for 91 out of 1000 experiments.
|
||||
# Wow! :-)
|
||||
# To understand this, you need to look up the man pages of:
|
||||
# set.seed, arima.sim, sign, factor, runs.test, replicate, table.
|
||||
# e.g. say ?replicate
|
41
r/Print Fibonacci Sequence.r
Normal file
41
r/Print Fibonacci Sequence.r
Normal file
@ -0,0 +1,41 @@
|
||||
Print Fibonacci Sequence
|
||||
# take input from the user
|
||||
nterms = as.integer(readline(prompt="How many terms? "))
|
||||
|
||||
# first two terms
|
||||
n1 = 0
|
||||
n2 = 1
|
||||
count = 2
|
||||
|
||||
# check if the number of terms is valid
|
||||
if(nterms <= 0) {
|
||||
print("Plese enter a positive integer")
|
||||
} else {
|
||||
if(nterms == 1) {
|
||||
print("Fibonacci sequence:")
|
||||
print(n1)
|
||||
} else {
|
||||
print("Fibonacci sequence:")
|
||||
print(n1)
|
||||
print(n2)
|
||||
while(count < nterms) {
|
||||
nth = n1 + n2
|
||||
print(nth)
|
||||
# update values
|
||||
n1 = n2
|
||||
n2 = nth
|
||||
count = count + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
Output
|
||||
|
||||
How many terms? 7
|
||||
[1] "Fibonacci sequence:"
|
||||
[1] 0
|
||||
[1] 1
|
||||
[1] 1
|
||||
[1] 2
|
||||
[1] 3
|
||||
[1] 5
|
||||
[1] 8
|
30
r/Program to Find GCD.r
Normal file
30
r/Program to Find GCD.r
Normal file
@ -0,0 +1,30 @@
|
||||
Program to Find GCD
|
||||
# Program to find the
|
||||
# H.C.F of two input number
|
||||
|
||||
# define a function
|
||||
hcf <- function(x, y) {
|
||||
# choose the smaller number
|
||||
if(x > y) {
|
||||
smaller = y
|
||||
} else {
|
||||
smaller = x
|
||||
}
|
||||
for(i in 1:smaller) {
|
||||
if((x %% i == 0) && (y %% i == 0)) {
|
||||
hcf = i
|
||||
}
|
||||
}
|
||||
return(hcf)
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
num1 = as.integer(readline(prompt = "Enter first number: "))
|
||||
num2 = as.integer(readline(prompt = "Enter second number: "))
|
||||
|
||||
print(paste("The H.C.F. of", num1,"and", num2,"is", hcf(num1, num2)))
|
||||
Output
|
||||
|
||||
Enter first number: 72
|
||||
Enter second number: 120
|
||||
[1] "The H.C.F. of 72 and 120 is 24"
|
52
r/Quartiles deciles tables graphs.r
Normal file
52
r/Quartiles deciles tables graphs.r
Normal file
@ -0,0 +1,52 @@
|
||||
# Get the data in place --
|
||||
load(file="demo.rda")
|
||||
summary(firms)
|
||||
|
||||
# Look at it --
|
||||
plot(density(log(firms$mktcap)))
|
||||
plot(firms$mktcap, firms$spread, type="p", cex=.2, col="blue", log="xy",
|
||||
xlab="Market cap (Mln USD)", ylab="Bid/offer spread (bps)")
|
||||
m=lm(log(spread) ~ log(mktcap), firms)
|
||||
summary(m)
|
||||
|
||||
# Making deciles --
|
||||
library(gtools)
|
||||
library(gdata)
|
||||
# for deciles (default=quartiles)
|
||||
size.category = quantcut(firms$mktcap, q=seq(0, 1, 0.1), labels=F)
|
||||
table(size.category)
|
||||
means = aggregate(firms, list(size.category), mean)
|
||||
print(data.frame(means$mktcap,means$spread))
|
||||
|
||||
# Make a picture combining the sample mean of spread (in each decile)
|
||||
# with the weighted average sample mean of the spread (in each decile),
|
||||
# where weights are proportional to size.
|
||||
wtd.means = by(firms, size.category,
|
||||
function(piece) (sum(piece$mktcap*piece$spread)/sum(piece$mktcap)))
|
||||
lines(means$mktcap, means$spread, type="b", lwd=2, col="green", pch=19)
|
||||
lines(means$mktcap, wtd.means, type="b", lwd=2, col="red", pch=19)
|
||||
legend(x=0.25, y=0.5, bty="n",
|
||||
col=c("blue", "green", "red"),
|
||||
lty=c(0, 1, 1), lwd=c(0,2,2),
|
||||
pch=c(0,19,19),
|
||||
legend=c("firm", "Mean spread in size deciles",
|
||||
"Size weighted mean spread in size deciles"))
|
||||
|
||||
# Within group standard deviations --
|
||||
aggregate(firms, list(size.category), sd)
|
||||
|
||||
# Now I do quartiles by BOTH mktcap and spread.
|
||||
size.quartiles = quantcut(firms$mktcap, labels=F)
|
||||
spread.quartiles = quantcut(firms$spread, labels=F)
|
||||
table(size.quartiles, spread.quartiles)
|
||||
# Re-express everything as joint probabilities
|
||||
table(size.quartiles, spread.quartiles)/nrow(firms)
|
||||
# Compute cell means at every point in the joint table:
|
||||
aggregate(firms, list(size.quartiles, spread.quartiles), mean)
|
||||
|
||||
# Make pretty two-way tables
|
||||
aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, nobs)
|
||||
aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, mean)
|
||||
aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, sd)
|
||||
aggregate.table(firms$spread, size.quartiles, spread.quartiles, mean)
|
||||
aggregate.table(firms$spread, size.quartiles, spread.quartiles, sd)
|
11
r/R Hello World Program.r
Normal file
11
r/R Hello World Program.r
Normal file
@ -0,0 +1,11 @@
|
||||
> # We can use the print() function
|
||||
> print("Hello World!")
|
||||
[1] "Hello World!"
|
||||
|
||||
> # Quotes can be suppressed in the output
|
||||
> print("Hello World!", quote = FALSE)
|
||||
[1] Hello World!
|
||||
|
||||
> # If there are more than 1 item, we can concatenate using paste()
|
||||
> print(paste("How","are","you?"))
|
||||
[1] "How are you?"
|
@ -0,0 +1,12 @@
|
||||
# Goal: R syntax where model specification is an argument to a function.
|
||||
|
||||
# Invent a dataset
|
||||
x <- runif(100); y <- runif(100); z <- 2 + 3*x + 4*y + rnorm(100)
|
||||
D <- data.frame(x=x, y=y, z=z)
|
||||
|
||||
amodel <- function(modelstring) {
|
||||
summary(lm(modelstring, D))
|
||||
}
|
||||
|
||||
amodel(z ~ x)
|
||||
amodel(z ~ y)
|
3
r/README.md
Normal file
3
r/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# R programming language
|
||||
|
||||
R is a programming language designed for statistical computing and graphics purposes. Contains code that can be executed within the R software environment.
|
@ -0,0 +1,43 @@
|
||||
# Goal: Reading and writing ascii files, reading and writing binary files.
|
||||
# And, to measure how much faster it is working with binary files.
|
||||
|
||||
# First manufacture a tall data frame:
|
||||
# FYI -- runif(10) yields 10 U(0,1) random numbers.
|
||||
B = data.frame(x1=runif(100000), x2=runif(100000), x3=runif(100000))
|
||||
summary(B)
|
||||
|
||||
# Write out ascii file:
|
||||
write.table(B, file = "/tmp/foo.csv", sep = ",", col.names = NA)
|
||||
# Read in this resulting ascii file:
|
||||
C=read.table("/tmp/foo.csv", header = TRUE, sep = ",", row.names=1)
|
||||
# Write a binary file out of dataset C:
|
||||
save(C, file="/tmp/foo.binary")
|
||||
# Delete the dataset C:
|
||||
rm(C)
|
||||
# Restore from foo.binary:
|
||||
load("/tmp/foo.binary")
|
||||
summary(C) # should yield the same results
|
||||
# as summary(B) above.
|
||||
|
||||
|
||||
# Now we time all these operations --
|
||||
cat("Time creation of dataset:\n")
|
||||
system.time({
|
||||
B = data.frame(x1=runif(100000), x2=runif(100000), x3=runif(100000))
|
||||
})
|
||||
|
||||
cat("Time writing an ascii file out of dataset B:\n")
|
||||
system.time(
|
||||
write.table(B, file = "/tmp/foo.csv", sep = ",", col.names = NA)
|
||||
)
|
||||
|
||||
cat("Time reading an ascii file into dataset C:\n")
|
||||
system.time(
|
||||
{C=read.table("/tmp/foo.csv", header = TRUE, sep=",", row.names=1)
|
||||
})
|
||||
|
||||
cat("Time writing a binary file out of dataset C:\n")
|
||||
system.time(save(C, file="/tmp/foo.binary"))
|
||||
|
||||
cat("Time reading a binary file + variablenames from /tmp/foo.binary:\n")
|
||||
system.time(load("/tmp/foo.binary")) # and then read it in from binary file
|
15
r/Reading in a Microsoft .r
Normal file
15
r/Reading in a Microsoft .r
Normal file
@ -0,0 +1,15 @@
|
||||
# Goal: Reading in a Microsoft .xls file directly
|
||||
|
||||
library(gdata)
|
||||
a <- read.xls("file.xls", sheet=2) # This reads in the 2nd sheet
|
||||
|
||||
# Look at what the cat dragged in
|
||||
str(a)
|
||||
|
||||
# If you have a date column, you'll want to fix it up like this:
|
||||
a$date <- as.Date(as.character(a$X), format="%d-%b-%y")
|
||||
a$X <- NULL
|
||||
|
||||
|
||||
# Also see http://tolstoy.newcastle.edu.au/R/help/06/04/25674.html for
|
||||
# another path.
|
17
r/Reading in a file made by CMIE's Business Beacon program.r
Normal file
17
r/Reading in a file made by CMIE's Business Beacon program.r
Normal file
@ -0,0 +1,17 @@
|
||||
# Goal: To read in files produced by CMIE's "Business Beacon".
|
||||
# This assumes you have made a file of MONTHLY data using CMIE's
|
||||
# Business Beacon program. This contains 2 columns: M3 and M0.
|
||||
|
||||
A <- read.table(
|
||||
# Generic to all BB files --
|
||||
sep="|", # CMIE's .txt file is pipe delimited
|
||||
skip=3, # Skip the 1st 3 lines
|
||||
na.strings=c("N.A.","Err"), # The ways they encode missing data
|
||||
# Specific to your immediate situation --
|
||||
file="bb_data.text",
|
||||
col.names=c("junk", "date", "M3", "M0")
|
||||
)
|
||||
A$junk <- NULL # Blow away this column
|
||||
|
||||
# Parse the CMIE-style "Mmm yy" date string that's used on monthly data
|
||||
A$date <- as.Date(paste("1", as.character(A$date)), format="%d %b %Y")
|
16
r/Sample From a Population.r
Normal file
16
r/Sample From a Population.r
Normal file
@ -0,0 +1,16 @@
|
||||
> # sample with replacement
|
||||
> sample(x, replace = TRUE)
|
||||
[1] 15 17 13 9 5 15 11 15 1
|
||||
|
||||
> # if we simply pass in a positive number n, it will sample
|
||||
> # from 1:n without replacement
|
||||
> sample(10)
|
||||
[1] 2 4 7 9 1 3 10 5 8 6
|
||||
|
||||
|
||||
|
||||
|
||||
An example to simulate a coin toss for 10 times.
|
||||
|
||||
> sample(c("H","T"),10, replace = TRUE)
|
||||
[1] "H" "H" "H" "T" "H" "T" "H" "H" "H" "T"
|
@ -0,0 +1,40 @@
|
||||
# Goals: Scare the hell out of children with the Cauchy distribution.
|
||||
|
||||
# A function which simulates N draws from one of two distributions,
|
||||
# and returns the mean obtained thusly.
|
||||
one.simulation <- function(N=100, distribution="normal") {
|
||||
if (distribution == "normal") {
|
||||
x <- rnorm(N)
|
||||
} else {
|
||||
x <- rcauchy(N)
|
||||
}
|
||||
mean(x)
|
||||
}
|
||||
|
||||
k1 <- density(replicate(1000, one.simulation(20)))
|
||||
k2 <- density(replicate(1000, one.simulation(20, distribution="cauchy")))
|
||||
|
||||
xrange <- range(k1$x, k2$x)
|
||||
plot(k1$x, k1$y, xlim=xrange, type="l", xlab="Estimated value", ylab="")
|
||||
grid()
|
||||
lines(k2$x, k2$y, col="red")
|
||||
abline(v=.5)
|
||||
legend(x="topleft", bty="n",
|
||||
lty=c(1,1),
|
||||
col=c("black", "red"),
|
||||
legend=c("Mean of Normal", "Mean of Cauchy"))
|
||||
# The distribution of the mean of normals collapses into a point;
|
||||
# that of the cauchy does not.
|
||||
|
||||
# Here's more scary stuff --
|
||||
for (i in 1:10) {
|
||||
cat("Sigma of distribution of 1000 draws from mean of normal - ",
|
||||
sd(replicate(1000, one.simulation(20))), "\n")
|
||||
}
|
||||
for (i in 1:10) {
|
||||
cat("Sigma of distribution of 1000 draws from mean of cauchy - ",
|
||||
sd(replicate(1000, one.simulation(20, distribution="cauchy"))), "\n")
|
||||
}
|
||||
|
||||
# Exercise for the reader: Compare the distribution of the median of
|
||||
# the Normal against the distribution of the median of the Cauchy.
|
@ -0,0 +1,33 @@
|
||||
# Goals: Lots of times, you need to give an R object to a friend,
|
||||
# or embed data into an email.
|
||||
|
||||
# First I invent a little dataset --
|
||||
set.seed(101) # To make sure you get the same random numbers as me
|
||||
# FYI -- runif(10) yields 10 U(0,1) random numbers.
|
||||
A = data.frame(x1=runif(10), x2=runif(10), x3=runif(10))
|
||||
# Look at it --
|
||||
print(A)
|
||||
|
||||
# Writing to a binary file that can be transported
|
||||
save(A, file="/tmp/my_data_file.rda") # You can give this file to a friend
|
||||
load("/tmp/my_data_file.rda")
|
||||
|
||||
# Plan B - you want pure ascii, which can be put into an email --
|
||||
dput(A)
|
||||
# This gives you a block of R code. Let me utilise that generated code
|
||||
# to create a dataset named "B".
|
||||
B <- structure(list(x1 = c(0.372198376338929, 0.0438248154241592,
|
||||
0.709684018278494, 0.657690396532416, 0.249855723232031, 0.300054833060130,
|
||||
0.584866625955328, 0.333467143354937, 0.622011963743716, 0.54582855431363
|
||||
), x2 = c(0.879795730113983, 0.706874740775675, 0.731972594512627,
|
||||
0.931634427979589, 0.455120594473556, 0.590319729177281, 0.820436094887555,
|
||||
0.224118480458856, 0.411666829371825, 0.0386105608195066), x3 = c(0.700711545301601,
|
||||
0.956837461562827, 0.213352001970634, 0.661061500199139, 0.923318882007152,
|
||||
0.795719761401415, 0.0712125543504953, 0.389407767681405, 0.406451216200367,
|
||||
0.659355078125373)), .Names = c("x1", "x2", "x3"), row.names = c("1",
|
||||
"2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "data.frame")
|
||||
|
||||
# Verify that A and B are near-identical --
|
||||
A-B
|
||||
# or,
|
||||
all.equal(A,B)
|
@ -0,0 +1,18 @@
|
||||
# Goal: Display of a macroeconomic time-series, with a filled colour
|
||||
# bar showing a recession.
|
||||
|
||||
years <- 1950:2000
|
||||
timeseries <- cumsum(c(100, runif(50)*5))
|
||||
hilo <- range(timeseries)
|
||||
plot(years, timeseries, type="l", lwd=3)
|
||||
# A recession from 1960 to 1965 --
|
||||
polygon(x=c(1960,1960, 1965,1965),
|
||||
y=c(hilo, rev(hilo)),
|
||||
density=NA, col="orange", border=NA)
|
||||
lines(years, timeseries, type="l", lwd=3) # paint again so line comes on top
|
||||
|
||||
# alternative method -- though not as good looking --
|
||||
# library(plotrix)
|
||||
# gradient.rect(1960, hilo[1], 1965, hilo[2],
|
||||
# reds=c(0,1), greens=c(0,0), blues=c(0,0),
|
||||
# gradient="y")
|
@ -0,0 +1,26 @@
|
||||
# Goal: Show the efficiency of the mean when compared with the median
|
||||
# using a large simulation where both estimators are applied on
|
||||
# a sample of U(0,1) uniformly distributed random numbers.
|
||||
|
||||
one.simulation <- function(N=100) { # N defaults to 100 if not supplied
|
||||
x <- runif(N)
|
||||
return(c(mean(x), median(x)))
|
||||
}
|
||||
|
||||
# Simulation --
|
||||
results <- replicate(100000, one.simulation(20)) # Gives back a 2x100000 matrix
|
||||
|
||||
# Two kernel densities --
|
||||
k1 <- density(results[1,]) # results[1,] is the 1st row
|
||||
k2 <- density(results[2,])
|
||||
|
||||
# A pretty picture --
|
||||
xrange <- range(k1$x, k2$x)
|
||||
plot(k1$x, k1$y, xlim=xrange, type="l", xlab="Estimated value", ylab="")
|
||||
grid()
|
||||
lines(k2$x, k2$y, col="red")
|
||||
abline(v=.5)
|
||||
legend(x="topleft", bty="n",
|
||||
lty=c(1,1),
|
||||
col=c("black", "red"),
|
||||
legend=c("Mean", "Median"))
|
48
r/Simple Calculator in R.r
Normal file
48
r/Simple Calculator in R.r
Normal file
@ -0,0 +1,48 @@
|
||||
Simple Calculator in R
|
||||
# Program make a simple calculator
|
||||
# that can add, subtract, multiply
|
||||
# and divide using functions
|
||||
|
||||
add <- function(x, y) {
|
||||
return(x + y)
|
||||
}
|
||||
|
||||
subtract <- function(x, y) {
|
||||
return(x - y)
|
||||
}
|
||||
|
||||
multiply <- function(x, y) {
|
||||
return(x * y)
|
||||
}
|
||||
|
||||
divide <- function(x, y) {
|
||||
return(x / y)
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
print("Select operation.")
|
||||
print("1.Add")
|
||||
print("2.Subtract")
|
||||
print("3.Multiply")
|
||||
print("4.Divide")
|
||||
|
||||
choice = as.integer(readline(prompt="Enter choice[1/2/3/4]: "))
|
||||
|
||||
num1 = as.integer(readline(prompt="Enter first number: "))
|
||||
num2 = as.integer(readline(prompt="Enter second number: "))
|
||||
|
||||
operator <- switch(choice,"+","-","*","/")
|
||||
result <- switch(choice, add(num1, num2), subtract(num1, num2), multiply(num1, num2), divide(num1, num2))
|
||||
|
||||
print(paste(num1, operator, num2, "=", result))
|
||||
Output
|
||||
|
||||
[1] "Select operation."
|
||||
[1] "1.Add"
|
||||
[1] "2.Subtract"
|
||||
[1] "3.Multiply"
|
||||
[1] "4.Divide"
|
||||
Enter choice[1/2/3/4]: 4
|
||||
Enter first number: 20
|
||||
Enter second number: 4
|
||||
[1] "20 / 4 = 5"
|
53
r/Simulation to study size and power in a simple problem.r
Normal file
53
r/Simulation to study size and power in a simple problem.r
Normal file
@ -0,0 +1,53 @@
|
||||
# Goal: Simulation to study size and power in a simple problem.
|
||||
|
||||
set.seed(101)
|
||||
|
||||
# The data generating process: a simple uniform distribution with stated mean
|
||||
dgp <- function(N,mu) {runif(N)-0.5+mu}
|
||||
|
||||
# Simulate one FIXED hypothesis test for H0:mu=0, given a true mu for a sample size N
|
||||
one.test <- function(N, truemu) {
|
||||
x <- dgp(N,truemu)
|
||||
muhat <- mean(x)
|
||||
s <- sd(x)/sqrt(N)
|
||||
# Under the null, the distribution of the mean has standard error s
|
||||
threshold <- 1.96*s
|
||||
(muhat < -threshold) || (muhat > threshold)
|
||||
} # Return of TRUE means reject the null
|
||||
|
||||
# Do one experiment, where the fixed H0:mu=0 is run Nexperiments times with a sample size N.
|
||||
# We return only one number: the fraction of the time that H0 is rejected.
|
||||
experiment <- function(Nexperiments, N, truemu) {
|
||||
sum(replicate(Nexperiments, one.test(N, truemu)))/Nexperiments
|
||||
}
|
||||
|
||||
# Measure the size of a test, i.e. rejections when H0 is true
|
||||
experiment(10000, 50, 0)
|
||||
# Measurement with sample size of 50, and true mu of 0.
|
||||
|
||||
# Power study: I.e. Pr(rejection) when H0 is false
|
||||
# (one special case in here is when the H0 is actually true)
|
||||
|
||||
muvalues <- seq(-.15,.15,.01)
|
||||
# When true mu < -0.15 and when true mu > 0.15,
|
||||
# the Pr(rejection) veers to 1 (full power) and it's not interesting.
|
||||
|
||||
# First do this with sample size of 50
|
||||
results <- NULL
|
||||
for (truth in muvalues) {
|
||||
results <- c(results, experiment(10000, 50, truth))
|
||||
}
|
||||
par(mai=c(.8,.8,.2,.2))
|
||||
plot(muvalues, results, type="l", lwd=2, ylim=c(0,1),
|
||||
xlab="True mu", ylab="Pr(Rejection of H0:mu=0)")
|
||||
abline(h=0.05, lty=2)
|
||||
|
||||
# Now repeat this with sample size of 100 (should yield a higher power)
|
||||
results <- NULL
|
||||
for (truth in muvalues) {
|
||||
results <- c(results, experiment(10000, 100, truth))
|
||||
}
|
||||
lines(muvalues, results, lwd=2, col="blue")
|
||||
legend(x=-0.15, y=.2, lwd=c(2,1,2), lty=c(1,2,1), cex=.8,
|
||||
col=c("black","black","blue"), bty="n",
|
||||
legend=c("N=50", "Size, 0.05", "N=100"))
|
25
r/Sort a Vector.r
Normal file
25
r/Sort a Vector.r
Normal file
@ -0,0 +1,25 @@
|
||||
Sort a Vector
|
||||
> x
|
||||
[1] 7 1 8 3 2 6 5 2 2 4
|
||||
|
||||
> # sort in ascending order
|
||||
> sort(x)
|
||||
[1] 1 2 2 2 3 4 5 6 7 8
|
||||
|
||||
> # sort in descending order
|
||||
> sort(x, decreasing=TRUE)
|
||||
[1] 8 7 6 5 4 3 2 2 2 1
|
||||
|
||||
> # vector x remains unaffected
|
||||
> x
|
||||
[1] 7 1 8 3 2 6 5 2 2 4
|
||||
Sometimes we would want the index of the sorted vector instead of the values. In such case we can use the function order().
|
||||
|
||||
> order(x)
|
||||
[1] 2 5 8 9 4 10 7 6 1 3
|
||||
|
||||
> order(x, decreasing=TRUE)
|
||||
[1] 3 1 6 7 10 4 5 8 9 2
|
||||
|
||||
> x[order(x)] # this will also sort x
|
||||
[1] 1 2 2 2 3 4 5 6 7 8
|
22
r/Special cases in reading files.r
Normal file
22
r/Special cases in reading files.r
Normal file
@ -0,0 +1,22 @@
|
||||
# Goal: Special cases in reading files
|
||||
|
||||
# Reading in a .bz2 file --
|
||||
read.table(bzfile("file.text.bz2")) # Requires you have ./file.text.bz2
|
||||
|
||||
# Reading in a .gz file --
|
||||
read.table(gzfile("file.text.gz")) # Requires you have ./file.text.bz2
|
||||
|
||||
# Reading from a pipe --
|
||||
mydata <- read.table(pipe("awk -f filter.awk input.txt"))
|
||||
|
||||
# Reading from a URL --
|
||||
read.table(url("http://www.mayin.org/ajayshah/A/demo.text"))
|
||||
|
||||
# This also works --
|
||||
read.table("http://www.mayin.org/ajayshah/A/demo.text")
|
||||
|
||||
# Hmm, I couldn't think of how to read a .bz2 file from a URL. How about:
|
||||
read.table(pipe("links -source http://www.mayin.org/ajayshah/A/demo.text.bz2 | bunzip2"))
|
||||
|
||||
# Reading binary files from a URL --
|
||||
load(url("http://www.mayin.org/ajayshah/A/nifty_weekly_returns.rda"))
|
30
r/Standard computations with well-studied distributions.r
Normal file
30
r/Standard computations with well-studied distributions.r
Normal file
@ -0,0 +1,30 @@
|
||||
# Goal: Standard computations with well-studied distributions.
|
||||
|
||||
# The normal distribution is named "norm". With this, we have:
|
||||
|
||||
# Normal density
|
||||
dnorm(c(-1.96,0,1.96))
|
||||
|
||||
# Cumulative normal density
|
||||
pnorm(c(-1.96,0,1.96))
|
||||
|
||||
# Inverse of this
|
||||
qnorm(c(0.025,.5,.975))
|
||||
pnorm(qnorm(c(0.025,.5,.975)))
|
||||
|
||||
# 1000 random numbers from the normal distribution
|
||||
summary(rnorm(1000))
|
||||
|
||||
|
||||
# Here's the same ideas, for the chi-squared distribution with 10 degrees
|
||||
# of freedom.
|
||||
dchisq(c(0,5,10), df=10)
|
||||
|
||||
# Cumulative normal density
|
||||
pchisq(c(0,5,10), df=10)
|
||||
|
||||
# Inverse of this
|
||||
qchisq(c(0.025,.5,.975), df=10)
|
||||
|
||||
# 1000 random numbers from the normal distribution
|
||||
summary(rchisq(1000, df=10))
|
24
r/Standard tests.r
Normal file
24
r/Standard tests.r
Normal file
@ -0,0 +1,24 @@
|
||||
# Goal: Some of the standard tests
|
||||
|
||||
# A classical setting --
|
||||
x <- runif(100, 0, 10) # 100 draws from U(0,10)
|
||||
y <- 2 + 3*x + rnorm(100) # beta = [2, 3] and sigma is 1
|
||||
d <- lm(y ~ x)
|
||||
# CLS results --
|
||||
summary(d)
|
||||
|
||||
library(sandwich)
|
||||
library(lmtest)
|
||||
# Durbin-Watson test --
|
||||
dwtest(d, alternative="two.sided")
|
||||
# Breusch-Pagan test --
|
||||
bptest(d)
|
||||
# Heteroscedasticity and autocorrelation consistent (HAC) tests
|
||||
coeftest(d, vcov=kernHAC)
|
||||
|
||||
# Tranplant the HAC values back in --
|
||||
library(xtable)
|
||||
sum.d <- summary(d)
|
||||
xtable(sum.d)
|
||||
sum.d$coefficients[1:2,1:4] <- coeftest(d, vcov=kernHAC)[1:2,1:4]
|
||||
xtable(sum.d)
|
13
r/Take input from user.r
Normal file
13
r/Take input from user.r
Normal file
@ -0,0 +1,13 @@
|
||||
my.name <- readline(prompt="Enter name: ")
|
||||
my.age <- readline(prompt="Enter age: ")
|
||||
|
||||
# convert character into integer
|
||||
my.age <- as.integer(my.age)
|
||||
|
||||
print(paste("Hi,", my.name, "next year you will be", my.age+1, "years old."))
|
||||
|
||||
Output
|
||||
|
||||
Enter name: Mary
|
||||
Enter age: 17
|
||||
[1] "Hi, Mary next year you will be 18 years old."
|
116
r/The amazing R vector notation.r
Normal file
116
r/The amazing R vector notation.r
Normal file
@ -0,0 +1,116 @@
|
||||
# Goal: The amazing R vector notation.
|
||||
|
||||
cat("EXAMPLE 1: sin(x) for a vector --\n")
|
||||
# Suppose you have a vector x --
|
||||
x = c(0.1,0.6,1.0,1.5)
|
||||
|
||||
# The bad way --
|
||||
n = length(x)
|
||||
r = numeric(n)
|
||||
for (i in 1:n) {
|
||||
r[i] = sin(x[i])
|
||||
}
|
||||
print(r)
|
||||
|
||||
# The good way -- don't use loops --
|
||||
print(sin(x))
|
||||
|
||||
|
||||
cat("\n\nEXAMPLE 2: Compute the mean of every row of a matrix --\n")
|
||||
# Here's another example. It isn't really about R; it's about thinking in
|
||||
# matrix notation. But still.
|
||||
# Let me setup a matrix --
|
||||
N=4; M=100;
|
||||
r = matrix(runif(N*M), N, M)
|
||||
|
||||
# So I face a NxM matrix
|
||||
# [r11 r12 ... r1N]
|
||||
# [r21 r22 ... r2N]
|
||||
# [r32 r32 ... r3N]
|
||||
# My goal: each column needs to be reduced to a mean.
|
||||
|
||||
# Method 1 uses loops:
|
||||
mean1 = numeric(M)
|
||||
for (i in 1:M) {
|
||||
mean1[i] = mean(r[,i])
|
||||
}
|
||||
|
||||
# Alternatively, just say:
|
||||
mean2 = rep(1/N, N) %*% r # Pretty!
|
||||
|
||||
# The two answers are the same --
|
||||
all.equal(mean1,mean2[,])
|
||||
#
|
||||
# As an aside, I should say that you can do this directly by using
|
||||
# the rowMeans() function. But the above is more about pedagogy rather
|
||||
# than showing you how to get rowmeans.
|
||||
|
||||
|
||||
cat("\n\nEXAMPLE 3: Nelson-Siegel yield curve\n")
|
||||
# Write this asif you're dealing with scalars --
|
||||
# Nelson Siegel function
|
||||
nsz <- function(b0, b1, b2, tau, t) {
|
||||
tmp = t/tau
|
||||
tmp2 = exp(-tmp)
|
||||
return(b0 + ((b1+b2)*(1-tmp2)/(tmp)) - (b2*tmp2))
|
||||
}
|
||||
|
||||
timepoints <- c(0.01,1:5)
|
||||
|
||||
# The bad way:
|
||||
z <- numeric(length(timepoints))
|
||||
for (i in 1:length(timepoints)) {
|
||||
z[i] <- nsz(14.084,-3.4107,0.0015,1.8832,timepoints[i])
|
||||
}
|
||||
print(z)
|
||||
|
||||
# The R way --
|
||||
print(z <- nsz(14.084,-3.4107,0.0015,1.8832,timepoints))
|
||||
|
||||
|
||||
cat("\n\nEXAMPLE 3: Making the NPV of a bond--\n")
|
||||
# You know the bad way - sum over all cashflows, NPVing each.
|
||||
# Now look at the R way.
|
||||
C = rep(100, 6)
|
||||
nsz(14.084,-3.4107,0.0015,1.8832,timepoints) # Print interest rates
|
||||
C/((1.05)^timepoints) # Print cashflows discounted @ 5%
|
||||
C/((1 + (0.01*nsz(14.084,-3.4107,0.0015,1.8832,timepoints))^timepoints)) # Using NS instead of 5%
|
||||
# NPV in two different ways --
|
||||
C %*% (1 + (0.01*nsz(14.084,-3.4107,0.0015,1.8832,timepoints)))^-timepoints
|
||||
sum(C * (1 + (0.01*nsz(14.084,-3.4107,0.0015,1.8832,timepoints)))^-timepoints)
|
||||
# You can drop back to a flat yield curve at 5% easily --
|
||||
sum(C * 1.05^-timepoints)
|
||||
|
||||
# Make a function for NPV --
|
||||
npv <- function(C, timepoints, r) {
|
||||
return(sum(C * (1 + (0.01*r))^-timepoints))
|
||||
}
|
||||
npv(C, timepoints, 5)
|
||||
|
||||
# Bottom line: Here's how you make the NPV of a bond with cashflows C
|
||||
# at timepoints timepoints when the zero curve is a Nelson-Siegel curve --
|
||||
npv(C, timepoints, nsz(14.084,-3.4107,0.0015,1.8832,timepoints))
|
||||
# Wow!
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Elegant vector notation is amazingly fast (in addition to being beautiful)
|
||||
N <- 1e5
|
||||
x <- runif(N, -3,3)
|
||||
y <- runif(N)
|
||||
|
||||
method1 <- function(x,y) {
|
||||
tmp <- NULL
|
||||
for (i in 1:N) {
|
||||
if (x[i] < 0) tmp <- c(tmp, y[i])
|
||||
}
|
||||
tmp
|
||||
}
|
||||
|
||||
method2 <- function(x,y) {
|
||||
y[x < 0]
|
||||
}
|
||||
|
||||
s1 <- system.time(ans1 <- method1(x,y))
|
||||
s2 <- system.time(ans2 <- method2(x,y))
|
||||
all.equal(ans1,ans2)
|
||||
s1/s2 # On my machine it's 2000x faster
|
66
r/To do OLS by MLE.r
Normal file
66
r/To do OLS by MLE.r
Normal file
@ -0,0 +1,66 @@
|
||||
# Goal: To do OLS by MLE.
|
||||
|
||||
# OLS likelihood function
|
||||
# Note: I am going to write the LF using sigma2=sigma^2 and not sigma.
|
||||
ols.lf1 <- function(theta, y, X) {
|
||||
beta <- theta[-1]
|
||||
sigma2 <- theta[1]
|
||||
if (sigma2 <= 0) return(NA)
|
||||
n <- nrow(X)
|
||||
e <- y - X%*%beta # t() = matrix transpose
|
||||
logl <- ((-n/2)*log(2*pi)) - ((n/2)*log(sigma2)) - ((t(e)%*%e)/(2*sigma2))
|
||||
return(-logl) # since optim() does minimisation by default.
|
||||
}
|
||||
|
||||
# Analytical derivatives
|
||||
ols.gradient <- function(theta, y, X) {
|
||||
beta <- theta[-1]
|
||||
sigma2 <- theta[1]
|
||||
e <- y - X%*%beta
|
||||
n <- nrow(X)
|
||||
|
||||
g <- numeric(length(theta))
|
||||
g[1] <- (-n/(2*sigma2)) + (t(e)%*%e)/(2*sigma2*sigma2) # d logl / d sigma
|
||||
g[-1] <- (t(X) %*% e)/sigma2 # d logl / d beta
|
||||
|
||||
return(-g)
|
||||
}
|
||||
|
||||
X <- cbind(1, runif(1000))
|
||||
theta.true <- c(2,4,6) # error variance = 2, intercept = 4, slope = 6.
|
||||
y <- X %*% theta.true[-1] + sqrt(theta.true[1]) * rnorm(1000)
|
||||
|
||||
# Estimation by OLS --
|
||||
d <- summary(lm(y ~ X[,2]))
|
||||
theta.ols <- c(sigma2 = d$sigma^2, d$coefficients[,1])
|
||||
cat("OLS theta = ", theta.ols, "\n\n")
|
||||
|
||||
cat("\nGradient-free (constrained optimisation) --\n")
|
||||
optim(c(1,1,1), method="L-BFGS-B", fn=ols.lf1,
|
||||
lower=c(1e-6,-Inf,-Inf), upper=rep(Inf,3), y=y, X=X)
|
||||
|
||||
cat("\nUsing the gradient (constrained optimisation) --\n")
|
||||
optim(c(1,1,1), method="L-BFGS-B", fn=ols.lf1, gr=ols.gradient,
|
||||
lower=c(1e-6,-Inf,-Inf), upper=rep(Inf,3), y=y, X=X)
|
||||
|
||||
cat("\n\nYou say you want a covariance matrix?\n")
|
||||
p <- optim(c(1,1,1), method="L-BFGS-B", fn=ols.lf1, gr=ols.gradient,
|
||||
lower=c(1e-6,-Inf,-Inf), upper=rep(Inf,3), hessian=TRUE,
|
||||
y=y, X=X)
|
||||
inverted <- solve(p$hessian)
|
||||
results <- cbind(p$par, sqrt(diag(inverted)), p$par/sqrt(diag(inverted)))
|
||||
colnames(results) <- c("Coefficient", "Std. Err.", "t")
|
||||
rownames(results) <- c("Sigma", "Intercept", "X")
|
||||
cat("MLE results --\n")
|
||||
print(results)
|
||||
cat("Compare with the OLS results --\n")
|
||||
d$coefficients
|
||||
|
||||
# Picture of how the loglikelihood changes if you perturb the sigma
|
||||
theta <- theta.ols
|
||||
delta.values <- seq(-1.5, 1.5, .01)
|
||||
logl.values <- as.numeric(lapply(delta.values,
|
||||
function(x) {-ols.lf1(theta+c(x,0,0),y,X)}))
|
||||
plot(sqrt(theta[1]+delta.values), logl.values, type="l", lwd=3, col="blue",
|
||||
xlab="Sigma", ylab="Log likelihood")
|
||||
grid()
|
28
r/To do `moving window volatility' of returns.r
Normal file
28
r/To do `moving window volatility' of returns.r
Normal file
@ -0,0 +1,28 @@
|
||||
# Goal: To do `moving window volatility' of returns.
|
||||
|
||||
library(zoo)
|
||||
|
||||
# Some data to play with (Nifty on all fridays for calendar 2004) --
|
||||
p <- structure(c(1946.05, 1971.9, 1900.65, 1847.55, 1809.75, 1833.65, 1913.6, 1852.65, 1800.3, 1867.7, 1812.2, 1725.1, 1747.5, 1841.1, 1853.55, 1868.95, 1892.45, 1796.1, 1804.45, 1582.4, 1560.2, 1508.75, 1521.1, 1508.45, 1491.2, 1488.5, 1537.5, 1553.2, 1558.8, 1601.6, 1632.3, 1633.4, 1607.2, 1590.35, 1609, 1634.1, 1668.75, 1733.65, 1722.5, 1775.15, 1820.2, 1795, 1779.75, 1786.9, 1852.3, 1872.95, 1872.35, 1901.05, 1996.2, 1969, 2012.1, 2062.7, 2080.5), index = structure(c(12419, 12426, 12433, 12440, 12447, 12454, 12461, 12468, 12475, 12482, 12489, 12496, 12503, 12510, 12517, 12524, 12531, 12538, 12545, 12552, 12559, 12566, 12573, 12580, 12587, 12594, 12601, 12608, 12615, 12622, 12629, 12636, 12643, 12650, 12657, 12664, 12671, 12678, 12685, 12692, 12699, 12706, 12713, 12720, 12727, 12734, 12741, 12748, 12755, 12762, 12769, 12776, 12783), class = "Date"), frequency = 0.142857142857143, class = c("zooreg", "zoo"))
|
||||
|
||||
# Shift to returns --
|
||||
r <- 100*diff(log(p))
|
||||
head(r)
|
||||
summary(r)
|
||||
sd(r)
|
||||
|
||||
# Compute the moving window vol --
|
||||
vol <- sqrt(250) * rollapply(r, 20, sd, align = "right")
|
||||
|
||||
# A pretty plot --
|
||||
plot(vol, type="l", ylim=c(0,max(vol,na.rm=TRUE)),
|
||||
lwd=2, col="purple", xlab="2004",
|
||||
ylab=paste("Annualised sigma, 20-week window"))
|
||||
grid()
|
||||
legend(x="bottomleft", col=c("purple", "darkgreen"),
|
||||
lwd=c(2,2), bty="n", cex=0.8,
|
||||
legend=c("Annualised 20-week vol (left scale)", "Nifty (right scale)"))
|
||||
par(new=TRUE)
|
||||
plot(p, type="l", lwd=2, col="darkgreen",
|
||||
xaxt="n", yaxt="n", xlab="", ylab="")
|
||||
axis(4)
|
58
r/To do nonlinear regression.r
Normal file
58
r/To do nonlinear regression.r
Normal file
@ -0,0 +1,58 @@
|
||||
# Goal: To do nonlinear regression, in three ways
|
||||
# By just supplying the function to be fit,
|
||||
# By also supplying the analytical derivatives, and
|
||||
# By having him analytically differentiate the function to be fit.
|
||||
#
|
||||
# John Fox has a book "An R and S+ companion to applied regression"
|
||||
# (abbreviated CAR).
|
||||
# An appendix associated with this book, titled
|
||||
# "Nonlinear regression and NLS"
|
||||
# is up on the web, and I strongly recommend that you go read it.
|
||||
#
|
||||
# This file is essentially from there (I have made slight changes).
|
||||
|
||||
# First take some data - from the CAR book --
|
||||
library(car)
|
||||
data(US.pop)
|
||||
attach(US.pop)
|
||||
plot(year, population, type="l", col="blue")
|
||||
|
||||
# So you see, we have a time-series of the US population. We want to
|
||||
# fit a nonlinear model to it.
|
||||
|
||||
library(stats) # Contains nonlinear regression
|
||||
time <- 0:20
|
||||
pop.mod <- nls(population ~ beta1/(1 + exp(beta2 + beta3*time)),
|
||||
start=list(beta1=350, beta2=4.5, beta3=-0.3), trace=TRUE)
|
||||
# You just write in the formula that you want to fit, and supply
|
||||
# starting values. "trace=TRUE" makes him show iterations go by.
|
||||
|
||||
summary(pop.mod)
|
||||
# Add in predicted values into the plot
|
||||
lines(year, fitted.values(pop.mod), lwd=3, col="red")
|
||||
|
||||
# Look at residuals
|
||||
plot(year, residuals(pop.mod), type="b")
|
||||
abline(h=0, lty=2)
|
||||
|
||||
# Using analytical derivatives:
|
||||
model <- function(beta1, beta2, beta3, time) {
|
||||
m <- beta1/(1+exp(beta2+beta3*time))
|
||||
term <- exp(beta2 + beta3*time)
|
||||
gradient <- cbind((1+term)^-1,
|
||||
-beta1*(1+term)^-2 * term,
|
||||
-beta1*(1+term)^-2 * term * time)
|
||||
attr(m, 'gradient') <- gradient
|
||||
return(m)
|
||||
}
|
||||
|
||||
summary(nls(population ~ model(beta1, beta2, beta3, time),
|
||||
start=list(beta1=350, beta2=4.5, beta3=-0.3)))
|
||||
|
||||
# Using analytical derivatives, using automatic differentiation (!!!):
|
||||
model <- deriv(~ beta1/(1 + exp(beta2+beta3*time)), # rhs of model
|
||||
c('beta1', 'beta2', 'beta3'), # parameter names
|
||||
function(beta1, beta2, beta3, time){} # arguments for result
|
||||
)
|
||||
summary(nls(population ~ model(beta1, beta2, beta3, time),
|
||||
start=list(beta1=350, beta2=4.5, beta3=-0.3)))
|
18
r/To do sorting.r
Normal file
18
r/To do sorting.r
Normal file
@ -0,0 +1,18 @@
|
||||
# Goal: To do sorting.
|
||||
#
|
||||
# The approach here needs to be explained. If `i' is a vector of
|
||||
# integers, then the data frame D[i,] picks up rows from D based
|
||||
# on the values found in `i'.
|
||||
#
|
||||
# The order() function makes an integer vector which is a correct
|
||||
# ordering for the purpose of sorting.
|
||||
|
||||
D <- data.frame(x=c(1,2,3,1), y=c(7,19,2,2))
|
||||
D
|
||||
|
||||
# Sort on x
|
||||
indexes <- order(D$x)
|
||||
D[indexes,]
|
||||
|
||||
# Print out sorted dataset, sorted in reverse by y
|
||||
D[rev(order(D$y)),]
|
35
r/To make a latex table with results of an OLS regression.r
Normal file
35
r/To make a latex table with results of an OLS regression.r
Normal file
@ -0,0 +1,35 @@
|
||||
# Goal: To make a latex table with results of an OLS regression.
|
||||
|
||||
# Get an OLS --
|
||||
x1 = runif(100)
|
||||
x2 = runif(100, 0, 2)
|
||||
y = 2 + 3*x1 + 4*x2 + rnorm(100)
|
||||
m = lm(y ~ x1 + x2)
|
||||
|
||||
# and print it out prettily --
|
||||
library(xtable)
|
||||
# Bare --
|
||||
xtable(m)
|
||||
xtable(anova(m))
|
||||
|
||||
# Better --
|
||||
print.xtable(xtable(m, caption="My regression",
|
||||
label="t:mymodel",
|
||||
digits=c(0,3,2,2,3)),
|
||||
type="latex",
|
||||
file="xtable_demo_ols.tex",
|
||||
table.placement = "tp",
|
||||
latex.environments=c("center", "footnotesize"))
|
||||
|
||||
print.xtable(xtable(anova(m),
|
||||
caption="ANOVA of my regression",
|
||||
label="t:anova_mymodel"),
|
||||
type="latex",
|
||||
file="xtable_demo_anova.tex",
|
||||
table.placement = "tp",
|
||||
latex.environments=c("center", "footnotesize"))
|
||||
|
||||
# Read the documentation of xtable. It actually knows how to generate
|
||||
# pretty latex tables for a lot more R objects than just OLS results.
|
||||
# It can be a workhorse for making tabular out of matrices, and
|
||||
# can also generate HTML.
|
33
r/To make a panel of pictures.r
Normal file
33
r/To make a panel of pictures.r
Normal file
@ -0,0 +1,33 @@
|
||||
# Goal: To make a panel of pictures.
|
||||
|
||||
par(mfrow=c(3,2)) # 3 rows, 2 columns.
|
||||
|
||||
# Now the next 6 pictures will be placed on these 6 regions. :-)
|
||||
|
||||
# Let me take some pains on the 1st
|
||||
plot(density(runif(100)), lwd=2)
|
||||
text(x=0, y=0.2, "100 uniforms") # Showing you how to place text at will
|
||||
abline(h=0, v=0)
|
||||
# All these statements effect the 1st plot.
|
||||
|
||||
x=seq(0.01,1,0.01)
|
||||
par(col="blue") # default colour to blue.
|
||||
|
||||
# 2 --
|
||||
plot(x, sin(x), type="l")
|
||||
lines(x, cos(x), type="l", col="red")
|
||||
|
||||
# 3 --
|
||||
plot(x, exp(x), type="l", col="green")
|
||||
lines(x, log(x), type="l", col="orange")
|
||||
|
||||
# 4 --
|
||||
plot(x, tan(x), type="l", lwd=3, col="yellow")
|
||||
|
||||
# 5 --
|
||||
plot(x, exp(-x), lwd=2)
|
||||
lines(x, exp(x), col="green", lwd=3)
|
||||
|
||||
# 6 --
|
||||
plot(x, sin(x*x), type="l")
|
||||
lines(x, sin(1/x), col="pink")
|
41
r/To make latex tabular out of an R matrix.r
Normal file
41
r/To make latex tabular out of an R matrix.r
Normal file
@ -0,0 +1,41 @@
|
||||
# Goal: To make latex tabular out of an R matrix
|
||||
|
||||
# Setup a nice R object:
|
||||
m <- matrix(rnorm(8), nrow=2)
|
||||
rownames(m) <- c("Age", "Weight")
|
||||
colnames(m) <- c("Person1", "Person2", "Person3", "Person4")
|
||||
m
|
||||
|
||||
# Translate it into a latex tabular:
|
||||
library(xtable)
|
||||
xtable(m, digits=rep(3,5))
|
||||
|
||||
# Production latex code that goes into a paper or a book --
|
||||
print(xtable(m,
|
||||
caption="String",
|
||||
label="t:"),
|
||||
type="latex",
|
||||
file="blah.gen",
|
||||
table.placement="tp",
|
||||
latex.environments=c("center", "footnotesize"))
|
||||
# Now you do \input{blah.gen} in your latex file.
|
||||
|
||||
|
||||
|
||||
# You're lazy, and want to use R to generate latex tables for you?
|
||||
data <- cbind(
|
||||
c(7,9,11,2),
|
||||
c(2,4,19,21)
|
||||
)
|
||||
colnames(data) <- c("a","b")
|
||||
rownames(data) <- c("x","y","z","a")
|
||||
xtable(data)
|
||||
|
||||
# or you could do
|
||||
data <- rbind(
|
||||
c(7,2),
|
||||
c(9,4),
|
||||
c(11,19),
|
||||
c(2,21)
|
||||
)
|
||||
# and the rest goes through identically.
|
24
r/To read in a simple data file where date data is present.r
Normal file
24
r/To read in a simple data file where date data is present.r
Normal file
@ -0,0 +1,24 @@
|
||||
# Goal: To read in a simple data file where date data is present.
|
||||
|
||||
# Suppose you have a file "x.data" which looks like this:
|
||||
# 1997-07-04,3.1,4
|
||||
# 1997-07-05,7.2,19
|
||||
# 1997-07-07,1.7,2
|
||||
# 1997-07-08,1.1,13
|
||||
|
||||
A <- read.table("x.data", sep=",",
|
||||
col.names=c("date", "my1", "my2"))
|
||||
A$date <- as.Date(A$date, format="%Y-%m-%d")
|
||||
# Say ?strptime to learn how to use "%" to specify
|
||||
# other date formats. Two examples --
|
||||
# "15/12/2002" needs "%d/%m/%Y"
|
||||
# "03 Jun 1997" needs "%d %b %Y"
|
||||
|
||||
# Actually, if you're using the ISO 8601 date format, i.e.
|
||||
# "%Y-%m-%d", that's the default setting and you don't need to
|
||||
# specify the format.
|
||||
|
||||
A$newcol <- A$my1 + A$my2 # Makes a new column in A
|
||||
newvar <- A$my1 - A$my2 # Makes a new R object "newvar"
|
||||
A$my1 <- NULL # Delete the `my1' column
|
||||
summary(A) # Makes summary statistics
|
@ -0,0 +1,26 @@
|
||||
# Goal: To read in a simple data file, and look around it's contents.
|
||||
|
||||
# Suppose you have a file "x.data" which looks like this:
|
||||
# 1997,3.1,4
|
||||
# 1998,7.2,19
|
||||
# 1999,1.7,2
|
||||
# 2000,1.1,13
|
||||
# To read it in --
|
||||
|
||||
A <- read.table("x.data", sep=",",
|
||||
col.names=c("year", "my1", "my2"))
|
||||
nrow(A) # Count the rows in A
|
||||
|
||||
summary(A$year) # The column "year" in data frame A
|
||||
# is accessed as A$year
|
||||
|
||||
A$newcol <- A$my1 + A$my2 # Makes a new column in A
|
||||
newvar <- A$my1 - A$my2 # Makes a new R object "newvar"
|
||||
A$my1 <- NULL # Removes the column "my1"
|
||||
|
||||
# You might find these useful, to "look around" a dataset --
|
||||
str(A)
|
||||
summary(A)
|
||||
library(Hmisc) # This requires that you've installed the Hmisc package
|
||||
contents(A)
|
||||
describe(A)
|
43
r/To show amazing R indexing notation, and the use of is.r
Normal file
43
r/To show amazing R indexing notation, and the use of is.r
Normal file
@ -0,0 +1,43 @@
|
||||
# Goal: To show amazing R indexing notation, and the use of is.na()
|
||||
|
||||
x <- c(2,7,9,2,NA,5) # An example vector to play with.
|
||||
|
||||
# Give me elems 1 to 3 --
|
||||
x[1:3]
|
||||
|
||||
# Give me all but elem 1 --
|
||||
x[-1]
|
||||
|
||||
# Odd numbered elements --
|
||||
indexes <- seq(1,6,2)
|
||||
x[indexes]
|
||||
# or, more compactly,
|
||||
x[seq(1,6,2)]
|
||||
|
||||
# Access elements by specifying "on" / "off" through booleans --
|
||||
require <- c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE)
|
||||
x[require]
|
||||
# Short vectors get reused! So, to get odd numbered elems --
|
||||
x[c(TRUE,FALSE)]
|
||||
|
||||
# Locate missing data --
|
||||
is.na(x)
|
||||
|
||||
# Replace missing data by 0 --
|
||||
x[is.na(x)] <- 0
|
||||
x
|
||||
|
||||
# Similar ideas work for matrices --
|
||||
y <- matrix(c(2,7,9,2,NA,5), nrow=2)
|
||||
y
|
||||
|
||||
# Make a matrix containing columns 1 and 3 --
|
||||
y[,c(1,3)]
|
||||
|
||||
# Let us see what is.na(y) does --
|
||||
is.na(y)
|
||||
str(is.na(y))
|
||||
# So is.na(y) gives back a matrix with the identical structure as that of y.
|
||||
# Hence I can say
|
||||
y[is.na(y)] <- -1
|
||||
y
|
42
r/To write functions.r
Normal file
42
r/To write functions.r
Normal file
@ -0,0 +1,42 @@
|
||||
# Goals: To write functions
|
||||
# To write functions that send back multiple objects.
|
||||
|
||||
# FIRST LEARN ABOUT LISTS --
|
||||
X = list(height=5.4, weight=54)
|
||||
print("Use default printing --")
|
||||
print(X)
|
||||
print("Accessing individual elements --")
|
||||
cat("Your height is ", X$height, " and your weight is ", X$weight, "\n")
|
||||
|
||||
# FUNCTIONS --
|
||||
square <- function(x) {
|
||||
return(x*x)
|
||||
}
|
||||
cat("The square of 3 is ", square(3), "\n")
|
||||
|
||||
# default value of the arg is set to 5.
|
||||
cube <- function(x=5) {
|
||||
return(x*x*x);
|
||||
}
|
||||
cat("Calling cube with 2 : ", cube(2), "\n") # will give 2^3
|
||||
cat("Calling cube : ", cube(), "\n") # will default to 5^3.
|
||||
|
||||
# LEARN ABOUT FUNCTIONS THAT RETURN MULTIPLE OBJECTS --
|
||||
powers <- function(x) {
|
||||
parcel = list(x2=x*x, x3=x*x*x, x4=x*x*x*x);
|
||||
return(parcel);
|
||||
}
|
||||
|
||||
X = powers(3);
|
||||
print("Showing powers of 3 --"); print(X);
|
||||
|
||||
# WRITING THIS COMPACTLY (4 lines instead of 7)
|
||||
|
||||
powerful <- function(x) {
|
||||
return(list(x2=x*x, x3=x*x*x, x4=x*x*x*x));
|
||||
}
|
||||
print("Showing powers of 3 --"); print(powerful(3));
|
||||
|
||||
# In R, the last expression in a function is, by default, what is
|
||||
# returned. So you could equally just say:
|
||||
powerful <- function(x) {list(x2=x*x, x3=x*x*x, x4=x*x*x*x)}
|
44
r/Two CDFs and a two-sample Kolmogorov-Smirnoff test.r
Normal file
44
r/Two CDFs and a two-sample Kolmogorov-Smirnoff test.r
Normal file
@ -0,0 +1,44 @@
|
||||
# Goal: Given two vectors of data,
|
||||
# superpose their CDFs
|
||||
# and show the results of the two-sample Kolmogorov-Smirnoff test
|
||||
|
||||
# The function consumes two vectors x1 and x2.
|
||||
# You have to provide a pair of labels as `legendstrings'.
|
||||
# If you supply an xlab, it's used
|
||||
# If you specify log - e.g. log="x" - this is passed on to plot.
|
||||
# The remaining args that you specify are sent on into ks.test()
|
||||
two.cdfs.plot <- function(x1, x2, legendstrings, xlab="", log="", ...) {
|
||||
stopifnot(length(x1)>0,
|
||||
length(x2)>0,
|
||||
length(legendstrings)==2)
|
||||
hilo <- range(c(x1,x2))
|
||||
|
||||
par(mai=c(.8,.8,.2,.2))
|
||||
plot(ecdf(x1), xlim=hilo, verticals=TRUE, cex=0,
|
||||
xlab=xlab, log=log, ylab="Cum. distribution", main="")
|
||||
grid()
|
||||
plot(ecdf(x2), add=TRUE, verticals=TRUE, cex=0, lwd=3)
|
||||
legend(x="bottomright", lwd=c(1,3), lty=1, bty="n",
|
||||
legend=legendstrings)
|
||||
|
||||
k <- ks.test(x1,x2, ...)
|
||||
text(x=hilo[1], y=c(.9,.85), pos=4, cex=.8,
|
||||
labels=c(
|
||||
paste("KS test statistic: ", sprintf("%.3g", k$statistic)),
|
||||
paste("Prob value: ", sprintf("%.3g", k$p.value))
|
||||
)
|
||||
)
|
||||
k
|
||||
}
|
||||
|
||||
x1 <- rnorm(100, mean=7, sd=1)
|
||||
x2 <- rnorm(100, mean=9, sd=1)
|
||||
|
||||
# Check error detection --
|
||||
two.cdfs.plot(x1,x2)
|
||||
|
||||
# Typical use --
|
||||
two.cdfs.plot(x1, x2, c("X1","X2"), xlab="Height (metres)", log="x")
|
||||
|
||||
# Send args into ks.test() --
|
||||
two.cdfs.plot(x1, x2, c("X1","X2"), alternative="less")
|
61
r/Using orthogonal polynomials.r
Normal file
61
r/Using orthogonal polynomials.r
Normal file
@ -0,0 +1,61 @@
|
||||
# Goal: Experiment with fitting nonlinear functional forms in
|
||||
# OLS, using orthogonal polynomials to avoid difficulties with
|
||||
# near-singular design matrices that occur with ordinary polynomials.
|
||||
# Shriya Anand, Gabor Grothendieck, Ajay Shah, March 2006.
|
||||
|
||||
# We will deal with noisy data from the d.g.p. y = sin(x) + e
|
||||
x <- seq(0, 2*pi, length.out=50)
|
||||
set.seed(101)
|
||||
y <- sin(x) + 0.3*rnorm(50)
|
||||
basicplot <- function(x, y, minx=0, maxx=3*pi, title="") {
|
||||
plot(x, y, xlim=c(minx,maxx), ylim=c(-2,2), main=title)
|
||||
lines(x, sin(x), col="blue", lty=2, lwd=2)
|
||||
abline(h=0, v=0)
|
||||
}
|
||||
x.outsample <- seq(0, 3*pi, length.out=100)
|
||||
|
||||
# Severe multicollinearity with ordinary polynomials
|
||||
x2 <- x*x
|
||||
x3 <- x2*x
|
||||
x4 <- x3*x
|
||||
cor(cbind(x, x2, x3, x4))
|
||||
# and a perfect design matrix using orthogonal polynomials
|
||||
m <- poly(x, 4)
|
||||
all.equal(cor(m), diag(4)) # Correlation matrix is I.
|
||||
|
||||
par(mfrow=c(2,2))
|
||||
# Ordinary polynomial regression --
|
||||
p <- lm(y ~ x + I(x^2) + I(x^3) + I(x^4))
|
||||
summary(p)
|
||||
basicplot(x, y, title="Polynomial, insample") # Data
|
||||
lines(x, fitted(p), col="red", lwd=3) # In-sample
|
||||
basicplot(x, y, title="Polynomial, out-of-sample")
|
||||
predictions.p <- predict(p, list(x = x.outsample)) # Out-of-sample
|
||||
lines(x.outsample, predictions.p, type="l", col="red", lwd=3)
|
||||
lines(x.outsample, sin(x.outsample), type="l", col="blue", lwd=2, lty=2)
|
||||
# As expected, polynomial fitting gives terrible results out of sample.
|
||||
|
||||
# These IDENTICAL things using orthogonal polynomials
|
||||
d <- lm(y ~ poly(x, 4))
|
||||
summary(d)
|
||||
basicplot(x, y, title="Orth. poly., insample") # Data
|
||||
lines(x, fitted(d), col="red", lwd=3) # In-sample
|
||||
basicplot(x, y, title="Orth. poly., out-of-sample")
|
||||
predictions.op <- predict(d, list(x = x.outsample)) # Out-of-sample
|
||||
lines(x.outsample, predictions.op, type="l", col="red", lwd=3)
|
||||
lines(x.outsample, sin(x.outsample), type="l", col="blue", lwd=2, lty=2)
|
||||
|
||||
# predict(d) is magical! See ?SafePrediction
|
||||
# The story runs at two levels. First, when you do an OLS model,
|
||||
# predict()ion requires applying coefficients to an appropriate
|
||||
# X matrix. But one level deeper, the polynomial or orthogonal-polynomial
|
||||
# needs to be utilised for computing the X matrix based on the
|
||||
# supplied x.outsample data.
|
||||
# If you say p <- poly(x, n)
|
||||
# then you can say predict(p, new) where predict.poly() gets invoked.
|
||||
# And when you say predict(lm()), the full steps are worked out for
|
||||
# you automatically: predict.poly() is used to make an X matrix and
|
||||
# then prediction based on the regression results is done.
|
||||
|
||||
all.equal(predictions.p, predictions.op) # Both paths are identical for this
|
||||
# (tame) problem.
|
26
r/Utilise matrix notation.r
Normal file
26
r/Utilise matrix notation.r
Normal file
@ -0,0 +1,26 @@
|
||||
# Goal: Utilise matrix notation
|
||||
# We use the problems of portfolio analysis as an example.
|
||||
|
||||
# Prices of 4 firms to play with, at weekly frequency (for calendar 2004) --
|
||||
p <- structure(c(300.403, 294.604, 291.038, 283.805, 270.773, 275.506, 292.271, 292.837, 284.872, 295.037, 280.939, 259.574, 250.608, 268.84, 266.507, 263.94, 273.173, 238.609, 230.677, 192.847, 219.078, 201.846, 210.279, 193.281, 186.748, 197.314, 202.813, 204.08, 226.044, 242.442, 261.274, 269.173, 256.05, 259.75, 243, 250.3, 263.45, 279.5, 289.55, 291.95, 302.1, 284.4, 283.5, 287.8, 298.3, 307.6, 307.65, 311.9, 327.7, 318.1, 333.6, 358.9, 385.1, 53.6, 51.95, 47.65, 44.8, 44.85, 44.3, 47.1, 44.2, 41.8, 41.9, 41, 35.3, 33.35, 35.6, 34.55, 35.55, 40.05, 35, 34.85, 28.95, 31, 29.25, 29.05, 28.95, 24.95, 26.15, 28.35, 29.4, 32.55, 37.2, 39.85, 40.8, 38.2, 40.35, 37.55, 39.4, 39.8, 43.25, 44.75, 47.25, 49.6, 47.6, 46.35, 49.4, 49.5, 50.05, 50.5, 51.85, 56.35, 54.15, 58, 60.7, 62.7, 293.687, 292.746, 283.222, 286.63, 259.774, 259.257, 270.898, 250.625, 242.401, 248.1, 244.942, 239.384, 237.926, 224.886, 243.959, 270.998, 265.557, 257.508, 258.266, 257.574, 251.917, 250.583, 250.783, 246.6, 252.475, 266.625, 263.85, 249.925, 262.9, 264.975, 273.425, 275.575, 267.2, 282.25, 284.25, 290.75, 295.625, 296.25, 291.375, 302.225, 318.95, 324.825, 320.55, 328.75, 344.05, 345.925, 356.5, 368.275, 374.825, 373.525, 378.325, 378.6, 374.4, 1416.7, 1455.15, 1380.97, 1365.31, 1303.2, 1389.64, 1344.05, 1266.29, 1265.61, 1312.17, 1259.25, 1297.3, 1327.38, 1250, 1328.03, 1347.46, 1326.79, 1286.54, 1304.84, 1272.44, 1227.53, 1264.44, 1304.34, 1277.65, 1316.12, 1370.97, 1423.35, 1382.5, 1477.75, 1455.15, 1553.5, 1526.8, 1479.85, 1546.8, 1565.3, 1606.6, 1654.05, 1689.7, 1613.95, 1703.25, 1708.05, 1786.75, 1779.75, 1906.35, 1976.6, 2027.2, 2057.85, 2029.6, 2051.35, 2033.4, 2089.1, 2065.2, 2091.7), .Dim = c(53, 4), .Dimnames = list(NULL, c("TISCO", "SAIL", "Wipro", "Infosys")))
|
||||
# Shift from prices to returns --
|
||||
r <- 100*diff(log(p))
|
||||
|
||||
# Historical expected returns --
|
||||
colMeans(r)
|
||||
# Historical correlation matrix --
|
||||
cor(r)
|
||||
# Historical covariance matrix --
|
||||
S <- cov(r)
|
||||
S
|
||||
|
||||
# Historical portfolio variance for a stated portfolio of 20%,20%,30%,30% --
|
||||
w <- c(.2, .2, .3, .3)
|
||||
t(w) %*% S %*% w
|
||||
|
||||
# The portfolio optimisation function in tseries --
|
||||
library(tseries)
|
||||
optimised <- portfolio.optim(r) # This uses the historical facts from r
|
||||
optimised$pw # Weights
|
||||
optimised$pm # Expected return using these weights
|
||||
optimised$ps # Standard deviation of optimised port.
|
17
r/Vector Elements Arithmetic.r
Normal file
17
r/Vector Elements Arithmetic.r
Normal file
@ -0,0 +1,17 @@
|
||||
> sum(2,7,5)
|
||||
[1] 14
|
||||
|
||||
> x
|
||||
[1] 2 NA 3 1 4
|
||||
|
||||
> sum(x) # if any element is NA or NaN, result is NA or NaN
|
||||
[1] NA
|
||||
|
||||
> sum(x, na.rm=TRUE) # this way we can ignore NA and NaN values
|
||||
[1] 10
|
||||
|
||||
> mean(x, na.rm=TRUE)
|
||||
[1] 2.5
|
||||
|
||||
> prod(x, na.rm=TRUE)
|
||||
[1] 24
|
41
r/z=f(x,y) using contour lines and colours.r
Normal file
41
r/z=f(x,y) using contour lines and colours.r
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user