Adding R programming language
This commit is contained in:
parent
599b63599b
commit
026079a47d
@ -0,0 +1,31 @@
|
||||
# Goals: A first look at R objects - vectors, lists, matrices, data frames.
|
||||
|
||||
# To make vectors "x" "y" "year" and "names"
|
||||
x <- c(2,3,7,9)
|
||||
y <- c(9,7,3,2)
|
||||
year <- 1990:1993
|
||||
names <- c("payal", "shraddha", "kritika", "itida")
|
||||
# Accessing the 1st and last elements of y --
|
||||
y[1]
|
||||
y[length(y)]
|
||||
|
||||
# To make a list "person" --
|
||||
person <- list(name="payal", x=2, y=9, year=1990)
|
||||
person
|
||||
# Accessing things inside a list --
|
||||
person$name
|
||||
person$x
|
||||
|
||||
# To make a matrix, pasting together the columns "year" "x" and "y"
|
||||
# The verb cbind() stands for "column bind"
|
||||
cbind(year, x, y)
|
||||
|
||||
# To make a "data frame", which is a list of vectors of the same length --
|
||||
D <- data.frame(names, year, x, y)
|
||||
nrow(D)
|
||||
# Accessing one of these vectors
|
||||
D$names
|
||||
# Accessing the last element of this vector
|
||||
D$names[nrow(D)]
|
||||
# Or equally,
|
||||
D$names[length(D$names)]
|
11
r/A histogram with tails shown in red.r
Normal file
11
r/A histogram with tails shown in red.r
Normal file
@ -0,0 +1,11 @@
|
||||
# Goal: A histogram with tails shown in red.
|
||||
|
||||
# This happened on the R mailing list on 7 May 2004.
|
||||
# This is by Martin Maechler <maechler@stat.math.ethz.ch>, who was
|
||||
# responding to a slightly imperfect version of this by
|
||||
# "Guazzetti Stefano" <Stefano.Guazzetti@ausl.re.it>
|
||||
|
||||
x <- rnorm(1000)
|
||||
hx <- hist(x, breaks=100, plot=FALSE)
|
||||
plot(hx, col=ifelse(abs(hx$breaks) < 1.669, 4, 2))
|
||||
# What is cool is that "col" is supplied a vector.
|
67
r/ARMA modeling - estimation, diagnostics, forecasting.r
Normal file
67
r/ARMA modeling - estimation, diagnostics, forecasting.r
Normal file
@ -0,0 +1,67 @@
|
||||
# Goals: ARMA modeling - estimation, diagnostics, forecasting.
|
||||
|
||||
|
||||
# 0. SETUP DATA
|
||||
rawdata <- c(-0.21,-2.28,-2.71,2.26,-1.11,1.71,2.63,-0.45,-0.11,4.79,5.07,-2.24,6.46,3.82,4.29,-1.47,2.69,7.95,4.46,7.28,3.43,-3.19,-3.14,-1.25,-0.50,2.25,2.77,6.72,9.17,3.73,6.72,6.04,10.62,9.89,8.23,5.37,-0.10,1.40,1.60,3.40,3.80,3.60,4.90,9.60,18.20,20.60,15.20,27.00,15.42,13.31,11.22,12.77,12.43,15.83,11.44,12.32,12.10,12.02,14.41,13.54,11.36,12.97,10.00,7.20,8.74,3.92,8.73,2.19,3.85,1.48,2.28,2.98,4.21,3.85,6.52,8.16,5.36,8.58,7.00,10.57,7.12,7.95,7.05,3.84,4.93,4.30,5.44,3.77,4.71,3.18,0.00,5.25,4.27,5.14,3.53,4.54,4.70,7.40,4.80,6.20,7.29,7.30,8.38,3.83,8.07,4.88,8.17,8.25,6.46,5.96,5.88,5.03,4.99,5.87,6.78,7.43,3.61,4.29,2.97,2.35,2.49,1.56,2.65,2.49,2.85,1.89,3.05,2.27,2.91,3.94,2.34,3.14,4.11,4.12,4.53,7.11,6.17,6.25,7.03,4.13,6.15,6.73,6.99,5.86,4.19,6.38,6.68,6.58,5.75,7.51,6.22,8.22,7.45,8.00,8.29,8.05,8.91,6.83,7.33,8.52,8.62,9.80,10.63,7.70,8.91,7.50,5.88,9.82,8.44,10.92,11.67)
|
||||
|
||||
# Make a R timeseries out of the rawdata: specify frequency & startdate
|
||||
gIIP <- ts(rawdata, frequency=12, start=c(1991,4))
|
||||
print(gIIP)
|
||||
plot.ts(gIIP, type="l", col="blue", ylab="IIP Growth (%)", lwd=2,
|
||||
main="Full data")
|
||||
grid()
|
||||
|
||||
# Based on this, I decide that 4/1995 is the start of the sensible period.
|
||||
gIIP <- window(gIIP, start=c(1995,4))
|
||||
print(gIIP)
|
||||
plot.ts(gIIP, type="l", col="blue", ylab="IIP Growth (%)", lwd=2,
|
||||
main="Estimation subset")
|
||||
grid()
|
||||
|
||||
# Descriptive statistics about gIIP
|
||||
mean(gIIP); sd(gIIP); summary(gIIP);
|
||||
plot(density(gIIP), col="blue", main="(Unconditional) Density of IIP growth")
|
||||
acf(gIIP)
|
||||
|
||||
|
||||
# 1. ARMA ESTIMATION
|
||||
m.ar2 <- arima(gIIP, order = c(2,0,0))
|
||||
print(m.ar2) # Print it out
|
||||
|
||||
|
||||
# 2. ARMA DIAGNOSTICS
|
||||
tsdiag(m.ar2) # His pretty picture of diagnostics
|
||||
## Time series structure in errors
|
||||
print(Box.test(m.ar2$residuals, lag=12, type="Ljung-Box"));
|
||||
## Sniff for ARCH
|
||||
print(Box.test(m.ar2$residuals^2, lag=12, type="Ljung-Box"));
|
||||
## Eyeball distribution of residuals
|
||||
plot(density(m.ar2$residuals), col="blue", xlim=c(-8,8),
|
||||
main=paste("Residuals of AR(2)"))
|
||||
|
||||
|
||||
# 3. FORECASTING
|
||||
## Make a picture of the residuals
|
||||
plot.ts(m.ar2$residual, ylab="Innovations", col="blue", lwd=2)
|
||||
s <- sqrt(m.ar2$sigma2)
|
||||
abline(h=c(-s,s), lwd=2, col="lightGray")
|
||||
|
||||
p <- predict(m.ar2, n.ahead = 12) # Make 12 predictions.
|
||||
print(p)
|
||||
|
||||
## Watch the forecastability decay away from fat values to 0.
|
||||
## sd(x) is the naive sigma. p$se is the prediction se.
|
||||
gain <- 100*(1-p$se/sd(gIIP))
|
||||
plot.ts(gain, main="Gain in forecast s.d.", ylab="Per cent",
|
||||
col="blue", lwd=2)
|
||||
|
||||
## Make a pretty picture that puts it all together
|
||||
ts.plot(gIIP, p$pred, p$pred-1.96*p$se, p$pred+1.96*p$se,
|
||||
gpars=list(lty=c(1,1,2,2), lwd=c(2,2,1,1),
|
||||
ylab="IIP growth (%)", col=c("blue","red", "red", "red")))
|
||||
grid()
|
||||
abline(h=mean(gIIP), lty=2, lwd=2, col="lightGray")
|
||||
legend(x="bottomleft", cex=0.8, bty="n",
|
||||
lty=c(1,1,2,2), lwd=c(2,1,1,2),
|
||||
col=c("blue", "red", "red", "lightGray"),
|
||||
legend=c("IIP", "AR(2) forecasts", "95% C.I.", "Mean IIP growth"))
|
16
r/Add Two Vectors.r
Normal file
16
r/Add Two Vectors.r
Normal file
@ -0,0 +1,16 @@
|
||||
> x
|
||||
[1] 3 6 8
|
||||
> y
|
||||
[1] 2 9 0
|
||||
|
||||
> x + y
|
||||
[1] 5 15 8
|
||||
|
||||
> x + 1 # 1 is recycled to (1,1,1)
|
||||
[1] 4 7 9
|
||||
|
||||
> x + c(1,4) # (1,4) is recycled to (1,4,1) but warning issued
|
||||
[1] 4 10 9
|
||||
Warning message:
|
||||
In x + c(1, 4) :
|
||||
longer object length is not a multiple of shorter object length
|
33
r/All manner of import and export of datasets.r
Normal file
33
r/All manner of import and export of datasets.r
Normal file
@ -0,0 +1,33 @@
|
||||
# Goal: All manner of import and export of datasets.
|
||||
|
||||
# Invent a dataset --
|
||||
A <- data.frame(
|
||||
name=c("a","b","c"),
|
||||
ownership=c("Case 1","Case 1","Case 2"),
|
||||
listed.at=c("NSE",NA,"BSE"),
|
||||
# Firm "b" is unlisted.
|
||||
is.listed=c(TRUE,FALSE,TRUE),
|
||||
# R convention - boolean variables are named "is.something"
|
||||
x=c(2.2,3.3,4.4),
|
||||
date=as.Date(c("2004-04-04","2005-05-05","2006-06-06"))
|
||||
)
|
||||
|
||||
# To a spreadsheet through a CSV file --
|
||||
write.table(A,file="demo.csv",sep = ",",col.names = NA,qmethod = "double")
|
||||
B <- read.table("demo.csv", header = TRUE, sep = ",", row.names = 1)
|
||||
|
||||
# To R as a binary file --
|
||||
save(A, file="demo.rda")
|
||||
load("demo.rda")
|
||||
|
||||
# To the Open XML standard for transport for statistical data --
|
||||
library(StatDataML)
|
||||
writeSDML(A, "/tmp/demo.sdml")
|
||||
B <- readSDML("/tmp/demo.sdml")
|
||||
|
||||
# To Stata --
|
||||
library(foreign)
|
||||
write.dta(A, "/tmp/demo.dta")
|
||||
B <- read.dta("/tmp/demo.dta")
|
||||
|
||||
# foreign::write.foreign() also has a pathway to SAS and SPSS.
|
33
r/An example of simulation-based inference.r
Normal file
33
r/An example of simulation-based inference.r
Normal file
@ -0,0 +1,33 @@
|
||||
# Goal: An example of simulation-based inference.
|
||||
# This is in the context of testing for time-series dependence in
|
||||
# stock market returns data.
|
||||
# The code here does the idea of Kim, Nelson, Startz (1991).
|
||||
# We want to use the distribution of realworld returns data, without
|
||||
# needing assumptions about normality.
|
||||
# The null is lack of dependence (i.e. an efficient market).
|
||||
# So repeatedly, the data is permuted, and the sample ACF is computed.
|
||||
# This gives us the distribution of the ACF under H0: independence, but
|
||||
# while using the empirical distribution of the returns data.
|
||||
|
||||
# Weekly returns on Nifty, 1/1/2002 to 31/12/2003, 104 weeks of data.
|
||||
r <- c(-0.70031182197603, 0.421690133064168, -1.20098072984689, 0.143402360644984, 3.81836537549516, 3.17055939373247, 0.305580301919228, 1.23853814691852, 0.81584795095706, -1.51865139747764, -2.71223626421522, -0.784836480094242, 1.09180041170998, 0.397649587762761, -4.11309534220923, -0.263912425099111, -0.0410144239805454, 1.75756212770972, -2.3335373897992, -2.19228764624217, -3.64578978183987, 1.92535789661354, 3.45782867883164, -2.15532607229374, -0.448039988298987, 1.50124793565896, -1.45871585874362, -2.13459863369767, -6.2128068251802, -1.94482987066289, 0.751294815735637, 1.78244982829590, 1.61567494389745, 1.53557708728931, -1.53557708728931, -0.322061470004265, -2.28394919698225, 0.70399304137414, -2.93580952607737, 2.38125098034425, 0.0617697039252185, -4.14482733720716, 2.04397528093754, 0.576400673606603, 3.43072725191913, 2.96465382864843, 2.89833358015583, 1.85387040058336, 1.52136515035952, -0.637268376944444, 1.75418926224609, -0.804391905851354, -0.861816058320475, 0.576902488444109, -2.84259880663331, -1.35375536139417, 1.49096529042234, -2.05404881010045, 2.86868849528146, -0.258270670200478, -4.4515881438687, -1.73055019137092, 3.04427015714648, -2.94928202352018, 1.62081315773994, -6.83117945164824, -0.962715713711582, -1.75875847071740, 1.50330330252721, -0.0479705789653728, 3.68968303215933, -0.535807567290103, 3.94034871061182, 3.85787174417738, 0.932185956989873, 4.08598654183674, 2.27343783689715, 1.13958830440017, 2.01737201171230, -1.88131458327554, 1.97596267156648, 2.79857144562001, 2.22470306481695, 2.03212951411427, 4.95626853448883, 3.40400972901396, 3.03840139165246, -1.89863129741417, -3.70832135042951, 4.78478922155396, 4.3973589590097, 4.9667050392987, 2.99775078737081, -4.12349101552438, 3.25638269809945, 2.29683376253966, -2.64772825878214, -0.630835277076258, 4.72528848505451, 1.87368447333380, 3.17543946162564, 4.58174427843208, 3.23625985632168, 2.29777651227296)
|
||||
|
||||
# The 1st autocorrelation from the sample:
|
||||
acf(r, 1, plot=FALSE)$acf[2]
|
||||
|
||||
# Obtain 1000 draws from the distribution of the 1st autocorrelation
|
||||
# under the null of independence:
|
||||
set.seed <- 101
|
||||
simulated <- replicate(1000, acf(r[sample(1:104, replace=FALSE)], 1, plot=FALSE)$acf[2])
|
||||
# At 95% --
|
||||
quantile(simulated, probs=c(.025,.975))
|
||||
# At 99% --
|
||||
quantile(simulated, probs=c(.005,.995))
|
||||
|
||||
# So we can reject the null at 95% but not at 99%.
|
||||
|
||||
# A pretty picture.
|
||||
plot(density(simulated), col="blue")
|
||||
abline(v=0)
|
||||
abline(v=quantile(simulated, probs=c(.025,.975)), lwd=2, col="purple")
|
||||
abline(v=acf(r, 1, plot=FALSE)$acf[2], lty=2, lwd=4, col="yellow")
|
52
r/Associative arrays (as in awk) or hashes (as in perl).r
Normal file
52
r/Associative arrays (as in awk) or hashes (as in perl).r
Normal file
@ -0,0 +1,52 @@
|
||||
# Goal: Associative arrays (as in awk) or hashes (as in perl).
|
||||
# Or, more generally, adventures in R addressing.
|
||||
|
||||
# Here's a plain R vector:
|
||||
x <- c(2,3,7,9)
|
||||
# But now I tag every elem with labels:
|
||||
names(x) <- c("kal","sho","sad","aja")
|
||||
# Associative array operations:
|
||||
x["kal"] <- 12
|
||||
# Pretty printing the entire associative array:
|
||||
x
|
||||
|
||||
# This works for matrices too:
|
||||
m <- matrix(runif(10), nrow=5)
|
||||
rownames(m) <- c("violet","indigo","blue","green","yellow")
|
||||
colnames(m) <- c("Asia","Africa")
|
||||
# The full matrix --
|
||||
m
|
||||
# Or even better --
|
||||
library(xtable)
|
||||
xtable(m)
|
||||
|
||||
# Now address symbolically --
|
||||
m[,"Africa"]
|
||||
m["indigo",]
|
||||
m["indigo","Africa"]
|
||||
|
||||
# The "in" operator, as in awk --
|
||||
for (colour in c("yellow", "orange", "red")) {
|
||||
if (colour %in% rownames(m)) {
|
||||
cat("For Africa and ", colour, " we have ", m[colour, "Africa"], "\n")
|
||||
} else {
|
||||
cat("Colour ", colour, " does not exist in the hash.\n")
|
||||
}
|
||||
}
|
||||
|
||||
# This works for data frames also --
|
||||
D <- data.frame(m)
|
||||
D
|
||||
# Look closely at what happened --
|
||||
str(D) # The colours are the rownames(D).
|
||||
|
||||
# Operations --
|
||||
D$Africa
|
||||
D[,"Africa"]
|
||||
D["yellow",]
|
||||
# or
|
||||
subset(D, rownames(D)=="yellow")
|
||||
|
||||
colnames(D) <- c("Antarctica","America")
|
||||
D
|
||||
D$America
|
16
r/Binary to Decimal.r
Normal file
16
r/Binary to Decimal.r
Normal file
@ -0,0 +1,16 @@
|
||||
Binary to Decimal
|
||||
# Program to convert decimal
|
||||
# number into binary number
|
||||
# using recursive function
|
||||
|
||||
convert_to_binary <- function(n) {
|
||||
|
||||
if(n > 1) {
|
||||
convert_to_binary(as.integer(n/2))
|
||||
}
|
||||
cat(n %% 2)
|
||||
}
|
||||
Output
|
||||
|
||||
> convert_to_binary(52)
|
||||
110100
|
25
r/Check Armstrong number.r
Normal file
25
r/Check Armstrong number.r
Normal file
@ -0,0 +1,25 @@
|
||||
Check Armstrong number
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
|
||||
# initialize sum
|
||||
sum = 0
|
||||
|
||||
# find the sum of the cube of each digit
|
||||
temp = num
|
||||
while(temp > 0) {
|
||||
digit = temp %% 10
|
||||
sum = sum + (digit ^ 3)
|
||||
temp = floor(temp / 10)
|
||||
}
|
||||
|
||||
# display the result
|
||||
if(num == sum) {
|
||||
print(paste(num, "is an Armstrong number"))
|
||||
} else {
|
||||
print(paste(num, "is not an Armstrong number"))
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a number: 23
|
||||
[1] "23 is not an Armstrong number"
|
27
r/Check Leap Year.r
Normal file
27
r/Check Leap Year.r
Normal file
@ -0,0 +1,27 @@
|
||||
Check Leap Year
|
||||
# Program to check if
|
||||
# the input year is
|
||||
# a leap year or not
|
||||
|
||||
year = as.integer(readline(prompt="Enter a year: "))
|
||||
if((year %% 4) == 0) {
|
||||
if((year %% 100) == 0) {
|
||||
if((year %% 400) == 0) {
|
||||
print(paste(year,"is a leap year"))
|
||||
} else {
|
||||
print(paste(year,"is not a leap year"))
|
||||
}
|
||||
} else {
|
||||
print(paste(year,"is a leap year"))
|
||||
}
|
||||
} else {
|
||||
print(paste(year,"is not a leap year"))
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a year: 1900
|
||||
[1] "1900 is not a leap year"
|
||||
Output 2
|
||||
|
||||
Enter a year: 2000
|
||||
[1] "2000 is a leap year"
|
21
r/Check Odd and Even Number.r
Normal file
21
r/Check Odd and Even Number.r
Normal file
@ -0,0 +1,21 @@
|
||||
Check Odd and Even Number
|
||||
# Program to check if
|
||||
# the input number is odd or even.
|
||||
# A number is even if division
|
||||
# by 2 give a remainder of 0.
|
||||
# If remainder is 1, it is odd.
|
||||
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
if((num %% 2) == 0) {
|
||||
print(paste(num,"is Even"))
|
||||
} else {
|
||||
print(paste(num,"is Odd"))
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a number: 89
|
||||
[1] "89 is Odd"
|
||||
Output 2
|
||||
|
||||
Enter a number: 0
|
||||
[1] "0 is Even"
|
24
r/Check Positive, Negative or Zero.r
Normal file
24
r/Check Positive, Negative or Zero.r
Normal file
@ -0,0 +1,24 @@
|
||||
Check Positive, Negative or Zero
|
||||
# In this program, we input a number
|
||||
# check if the number is positive or
|
||||
# negative or zero and display
|
||||
# an appropriate message
|
||||
|
||||
num = as.double(readline(prompt="Enter a number: "))
|
||||
if(num > 0) {
|
||||
print("Positive number")
|
||||
} else {
|
||||
if(num == 0) {
|
||||
print("Zero")
|
||||
} else {
|
||||
print("Negative number")
|
||||
}
|
||||
}
|
||||
Output 1
|
||||
|
||||
Enter a number: -9.6
|
||||
[1] "Negative number"
|
||||
Output 2
|
||||
|
||||
Enter a number: 2
|
||||
[1] "Positive number"
|
27
r/Check Prime Number.r
Normal file
27
r/Check Prime Number.r
Normal file
@ -0,0 +1,27 @@
|
||||
Check Prime Number
|
||||
# Program to check if
|
||||
# the input number is
|
||||
# prime or not
|
||||
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
|
||||
flag = 0
|
||||
# prime numbers are greater than 1
|
||||
if(num > 1) {
|
||||
# check for factors
|
||||
flag = 1
|
||||
for(i in 2:(num-1)) {
|
||||
if ((num %% i) == 0) {
|
||||
flag = 0
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(num == 2) flag = 1
|
||||
if(flag == 1) {
|
||||
print(paste(num,"is a prime number"))
|
||||
} else {
|
||||
print(paste(num,"is not a prime number"))
|
||||
}
|
30
r/Compute LCM in R.r
Normal file
30
r/Compute LCM in R.r
Normal file
@ -0,0 +1,30 @@
|
||||
Compute LCM in R
|
||||
# Program to find the L.C.M. of two input number
|
||||
lcm <- function(x, y) {
|
||||
# choose the greater number
|
||||
if(x > y) {
|
||||
greater = x
|
||||
} else {
|
||||
greater = y
|
||||
}
|
||||
|
||||
while(TRUE) {
|
||||
if((greater %% x == 0) && (greater %% y == 0)) {
|
||||
lcm = greater
|
||||
break
|
||||
}
|
||||
greater = greater + 1
|
||||
}
|
||||
return(lcm)
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
num1 = as.integer(readline(prompt = "Enter first number: "))
|
||||
num2 = as.integer(readline(prompt = "Enter second number: "))
|
||||
|
||||
print(paste("The L.C.M. of", num1,"and", num2,"is", lcm(num1, num2)))
|
||||
Output
|
||||
|
||||
Enter first number: 24
|
||||
Enter second number: 25
|
||||
[1] "The L.C.M. of 24 and 25 is 600"
|
@ -0,0 +1,15 @@
|
||||
# Goals: Do bootstrap inference, as an example, for a sample median.
|
||||
|
||||
library(boot)
|
||||
|
||||
samplemedian <- function(x, d) { # d is a vector of integer indexes
|
||||
return(median(x[d])) # The genius is in the x[d] notation
|
||||
}
|
||||
|
||||
data <- rnorm(50) # Generate a dataset with 50 obs
|
||||
b <- boot(data, samplemedian, R=2000) # 2000 bootstrap replications
|
||||
cat("Sample median has a sigma of ", sd(b$t[,1]), "\n")
|
||||
plot(b)
|
||||
|
||||
# Make a 99% confidence interval
|
||||
boot.ci(b, conf=0.99, type="basic")
|
44
r/Doing OLS.r
Normal file
44
r/Doing OLS.r
Normal file
@ -0,0 +1,44 @@
|
||||
# Goal: Simulate a dataset from the OLS model and obtain
|
||||
# obtain OLS estimates for it.
|
||||
|
||||
x <- runif(100, 0, 10) # 100 draws from U(0,10)
|
||||
y <- 2 + 3*x + rnorm(100) # beta = [2, 3] and sigma = 1
|
||||
|
||||
# You want to just look at OLS results?
|
||||
summary(lm(y ~ x))
|
||||
|
||||
# Suppose x and y were packed together in a data frame --
|
||||
D <- data.frame(x,y)
|
||||
summary(lm(y ~ x, D))
|
||||
|
||||
# Full and elaborate steps --
|
||||
d <- lm(y ~ x)
|
||||
# Learn about this object by saying ?lm and str(d)
|
||||
# Compact model results --
|
||||
print(d)
|
||||
# Pretty graphics for regression diagnostics --
|
||||
par(mfrow=c(2,2))
|
||||
plot(d)
|
||||
|
||||
d <- summary(d)
|
||||
# Detailed model results --
|
||||
print(d)
|
||||
# Learn about this object by saying ?summary.lm and by saying str(d)
|
||||
cat("OLS gave slope of ", d$coefficients[2,1],
|
||||
"and a error sigma of ", d$sigma, "\n")
|
||||
|
||||
|
||||
## I need to drop down to a smaller dataset now --
|
||||
x <- runif(10)
|
||||
y <- 2 + 3*x + rnorm(10)
|
||||
m <- lm(y ~ x)
|
||||
|
||||
# Now R supplies a wide range of generic functions which extract
|
||||
# useful things out of the result of estimation of many kinds of models.
|
||||
|
||||
residuals(m)
|
||||
fitted(m)
|
||||
AIC(m)
|
||||
AIC(m, k=log(10)) # SBC
|
||||
vcov(m)
|
||||
logLik(m)
|
55
r/Dummy variables in regression.r
Normal file
55
r/Dummy variables in regression.r
Normal file
@ -0,0 +1,55 @@
|
||||
# Goal: "Dummy variables" in regression.
|
||||
|
||||
# Suppose you have this data:
|
||||
people = data.frame(
|
||||
age = c(21,62,54,49,52,38),
|
||||
education = c("college", "school", "none", "school", "college", "none"),
|
||||
education.code = c( 2, 1, 0, 1, 2, 0 )
|
||||
)
|
||||
# Here people$education is a string categorical variable and
|
||||
# people$education.code is the same thing, with a numerical coding system.
|
||||
people
|
||||
|
||||
# Note the structure of the dataset --
|
||||
str(people)
|
||||
# The strings supplied for `education' have been treated (correctly) as
|
||||
# a factor, but education.code is being treated as an integer and not as
|
||||
# a factor.
|
||||
|
||||
|
||||
# We want to do a dummy variable regression. Normally you would have:
|
||||
# 1 Chosen college as the omitted category
|
||||
# 2 Made a dummy for "none" named educationnone
|
||||
# 3 Made a dummy for "school" named educationschool
|
||||
# 4 Ran a regression like lm(age ~ educationnone + educationschool, people)
|
||||
# But this is R. Things are cool:
|
||||
lm(age ~ education, people)
|
||||
|
||||
# ! :-)
|
||||
# When you feed him an explanatory variable like education, he does all
|
||||
# these steps automatically. (He chose college as the omitted category).
|
||||
|
||||
# If you use an integer coding, then the obvious thing goes wrong --
|
||||
lm(age ~ education.code, people)
|
||||
# because he's thinking that education.code is an integer explanatory
|
||||
# variable. So you need to:
|
||||
|
||||
lm(age ~ factor(education.code), people)
|
||||
# (he choose a different omitted category)
|
||||
|
||||
# Alternatively, fix up the dataset --
|
||||
people$education.code <- factor(people$education.code)
|
||||
lm(age ~ education.code, people)
|
||||
|
||||
#
|
||||
# Bottom line:
|
||||
# Once the dataset has categorical variables correctly represented as factors, i.e. as
|
||||
str(people)
|
||||
# doing OLS in R induces automatic generation of dummy variables while leaving one out:
|
||||
lm(age ~ education, people)
|
||||
lm(age ~ education.code, people)
|
||||
|
||||
# But what if you want the X matrix?
|
||||
m <- lm(age ~ education, people)
|
||||
model.matrix(m)
|
||||
# This is the design matrix that went into the regression m.
|
38
r/Fibonacci Sequence in R.r
Normal file
38
r/Fibonacci Sequence in R.r
Normal file
@ -0,0 +1,38 @@
|
||||
Fibonacci Sequence in R
|
||||
# Program to diplay the Fibonacci
|
||||
# sequence up to n-th term using
|
||||
# recursive functions
|
||||
|
||||
recurse_fibonacci <- function(n) {
|
||||
if(n <= 1) {
|
||||
return(n)
|
||||
} else {
|
||||
return(recurse_fibonacci(n-1) + recurse_fibonacci(n-2))
|
||||
}
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
nterms = as.integer(readline(prompt="How many terms? "))
|
||||
|
||||
# check if the number of terms is valid
|
||||
if(nterms <= 0) {
|
||||
print("Plese enter a positive integer")
|
||||
} else {
|
||||
print("Fibonacci sequence:")
|
||||
for(i in 0:(nterms-1)) {
|
||||
print(recurse_fibonacci(i))
|
||||
}
|
||||
}
|
||||
Output
|
||||
|
||||
How many terms? 9
|
||||
[1] "Fibonacci sequence:"
|
||||
[1] 0
|
||||
[1] 1
|
||||
[1] 1
|
||||
[1] 2
|
||||
[1] 3
|
||||
[1] 5
|
||||
[1] 8
|
||||
[1] 13
|
||||
[1] 21
|
14
r/Find Factorial of a number using recursion.r
Normal file
14
r/Find Factorial of a number using recursion.r
Normal file
@ -0,0 +1,14 @@
|
||||
Find Factorial of a number using recursion
|
||||
recur_factorial <- function(n) {
|
||||
if(n <= 1) {
|
||||
return(1)
|
||||
} else {
|
||||
return(n * recur_factorial(n-1))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Output
|
||||
|
||||
> recur_factorial(5)
|
||||
[1] 120
|
33
r/Find Minimum and Maximum.r
Normal file
33
r/Find Minimum and Maximum.r
Normal file
@ -0,0 +1,33 @@
|
||||
Find Minimum and Maximum
|
||||
> x
|
||||
[1] 5 8 3 9 2 7 4 6 10
|
||||
|
||||
> # find the minimum
|
||||
> min(x)
|
||||
[1] 2
|
||||
|
||||
> # find the maximum
|
||||
> max(x)
|
||||
[1] 10
|
||||
|
||||
> # find the range
|
||||
> range(x)
|
||||
[1] 2 10
|
||||
If we want to find where the minimum or maximum is located, i.e. the index instead of the actual value, then we can use which.min() and which.max() functions.
|
||||
|
||||
Note that these functions will return the index of the first minimum or maximum in case multiple of them exists.
|
||||
|
||||
> x
|
||||
[1] 5 8 3 9 2 7 4 6 10
|
||||
|
||||
> # find index of the minimum
|
||||
> which.min(x)
|
||||
[1] 5
|
||||
|
||||
> # find index of the minimum
|
||||
> which.max(x)
|
||||
[1] 9
|
||||
|
||||
> # alternate way to find the minimum
|
||||
> x[which.min(x)]
|
||||
[1] 2
|
29
r/Find factors of a number.r
Normal file
29
r/Find factors of a number.r
Normal file
@ -0,0 +1,29 @@
|
||||
Find factors of a number
|
||||
print_factors <- function(x) {
|
||||
print(paste("The factors of",x,"are:"))
|
||||
for(i in 1:x) {
|
||||
if((x %% i) == 0) {
|
||||
print(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
Output
|
||||
|
||||
> print_factors(120)
|
||||
[1] "The factors of 120 are:"
|
||||
[1] 1
|
||||
[1] 2
|
||||
[1] 3
|
||||
[1] 4
|
||||
[1] 5
|
||||
[1] 6
|
||||
[1] 8
|
||||
[1] 10
|
||||
[1] 12
|
||||
[1] 15
|
||||
[1] 20
|
||||
[1] 24
|
||||
[1] 30
|
||||
[1] 40
|
||||
[1] 60
|
||||
[1] 120
|
21
r/Find sum of natural numbers without formula.r
Normal file
21
r/Find sum of natural numbers without formula.r
Normal file
@ -0,0 +1,21 @@
|
||||
Find sum of natural numbers without formula
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt = "Enter a number: "))
|
||||
|
||||
if(num < 0) {
|
||||
print("Enter a positive number")
|
||||
} else {
|
||||
sum = 0
|
||||
|
||||
# use while loop to iterate until zero
|
||||
while(num > 0) {
|
||||
sum = sum + num
|
||||
num = num - 1
|
||||
}
|
||||
|
||||
print(paste("The sum is", sum))
|
||||
}
|
||||
Output
|
||||
|
||||
Enter a number: 10
|
||||
[1] "The sum is 55"
|
20
r/Find the factorial of a number.r
Normal file
20
r/Find the factorial of a number.r
Normal file
@ -0,0 +1,20 @@
|
||||
#Find the factorial of a number
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt="Enter a number: "))
|
||||
factorial = 1
|
||||
|
||||
# check is the number is negative, positive or zero
|
||||
if(num < 0) {
|
||||
print("Sorry, factorial does not exist for negative numbers")
|
||||
} else if(num == 0) {
|
||||
print("The factorial of 0 is 1")
|
||||
} else {
|
||||
for(i in 1:num) {
|
||||
factorial = factorial * i
|
||||
}
|
||||
print(paste("The factorial of", num ,"is",factorial))
|
||||
}
|
||||
Output
|
||||
|
||||
Enter a number: 8
|
||||
[1] "The factorial of 8 is 40320"
|
8
r/From Uniform Distribution.r
Normal file
8
r/From Uniform Distribution.r
Normal file
@ -0,0 +1,8 @@
|
||||
> runif(1) # generates 1 random number
|
||||
[1] 0.3984754
|
||||
|
||||
> runif(3) # generates 3 random number
|
||||
[1] 0.8090284 0.1797232 0.6803607
|
||||
|
||||
> runif(3, min=5, max=10) # define the range between 5 and 10
|
||||
[1] 7.099781 8.355461 5.173133
|
22
r/Handling missing data.r
Normal file
22
r/Handling missing data.r
Normal file
@ -0,0 +1,22 @@
|
||||
# Goal:
|
||||
# A stock is traded on 2 exchanges.
|
||||
# Price data is missing at random on both exchanges owing to non-trading.
|
||||
# We want to make a single price time-series utilising information
|
||||
# from both exchanges. I.e., missing data for exchange 1 will
|
||||
# be replaced by information for exchange 2 (if observed).
|
||||
|
||||
# Let's create some example data for the problem.
|
||||
e1 <- runif(15) # Prices on exchange 1
|
||||
e2 <- e1 + 0.05*rnorm(15) # Prices on exchange 2.
|
||||
cbind(e1, e2)
|
||||
# Blow away 5 points from each at random.
|
||||
e1[sample(1:15, 5)] <- NA
|
||||
e2[sample(1:15, 5)] <- NA
|
||||
cbind(e1, e2)
|
||||
|
||||
# Now how do we reconstruct a time-series that tries to utilise both?
|
||||
combined <- e1 # Do use the more liquid exchange here.
|
||||
missing <- is.na(combined)
|
||||
combined[missing] <- e2[missing] # if it's also missing, I don't care.
|
||||
cbind(e1, e2, combined)
|
||||
# There you are.
|
@ -0,0 +1,43 @@
|
||||
# Goal: Joint distributions, marginal distributions, useful tables.
|
||||
|
||||
# First let me invent some fake data
|
||||
set.seed(102) # This yields a good illustration.
|
||||
x <- sample(1:3, 15, replace=TRUE)
|
||||
education <- factor(x, labels=c("None", "School", "College"))
|
||||
x <- sample(1:2, 15, replace=TRUE)
|
||||
gender <- factor(x, labels=c("Male", "Female"))
|
||||
age <- runif(15, min=20,max=60)
|
||||
|
||||
D <- data.frame(age, gender, education)
|
||||
rm(x,age,gender,education)
|
||||
print(D)
|
||||
|
||||
# Table about education
|
||||
table(D$education)
|
||||
|
||||
# Table about education and gender --
|
||||
table(D$gender, D$education)
|
||||
# Joint distribution of education and gender --
|
||||
table(D$gender, D$education)/nrow(D)
|
||||
|
||||
# Add in the marginal distributions also
|
||||
addmargins(table(D$gender, D$education))
|
||||
addmargins(table(D$gender, D$education))/nrow(D)
|
||||
|
||||
# Generate a good LaTeX table out of it --
|
||||
library(xtable)
|
||||
xtable(addmargins(table(D$gender, D$education))/nrow(D),
|
||||
digits=c(0,2,2,2,2)) # You have to do | and \hline manually.
|
||||
|
||||
# Study age by education category
|
||||
by(D$age, D$gender, mean)
|
||||
by(D$age, D$gender, sd)
|
||||
by(D$age, D$gender, summary)
|
||||
|
||||
# Two-way table showing average age depending on education & gender
|
||||
a <- matrix(by(D$age, list(D$gender, D$education), mean), nrow=2)
|
||||
rownames(a) <- levels(D$gender)
|
||||
colnames(a) <- levels(D$education)
|
||||
print(a)
|
||||
# or, of course,
|
||||
print(xtable(a))
|
@ -0,0 +1,23 @@
|
||||
# Goals: Simulate a dataset from a "fixed effects" model, and
|
||||
# obtain "least squares dummy variable" (LSDV) estimates.
|
||||
#
|
||||
# We do this in the context of a familiar "earnings function" -
|
||||
# log earnings is quadratic in log experience, with parallel shifts by
|
||||
# education category.
|
||||
|
||||
# Create an education factor with 4 levels --
|
||||
education <- factor(sample(1:4,1000, replace=TRUE),
|
||||
labels=c("none", "school", "college", "beyond"))
|
||||
# Simulate an experience variable with a plausible range --
|
||||
experience <- 30*runif(1000) # experience from 0 to 20 years
|
||||
# Make the intercept vary by education category between 4 given values --
|
||||
intercept <- c(0.5,1,1.5,2)[education]
|
||||
|
||||
# Simulate the log earnings --
|
||||
log.earnings <- intercept +
|
||||
2*experience - 0.05*experience*experience + rnorm(1000)
|
||||
A <- data.frame(education, experience, e2=experience*experience, log.earnings)
|
||||
summary(A)
|
||||
|
||||
# The OLS path to LSDV --
|
||||
summary(lm(log.earnings ~ -1 + education + experience + e2, A))
|
31
r/Make a time-series object using the zoo package.r
Normal file
31
r/Make a time-series object using the zoo package.r
Normal file
@ -0,0 +1,31 @@
|
||||
# Goal: Make a time-series object using the "zoo" package
|
||||
|
||||
A <- data.frame(date=c("1995-01-01", "1995-01-02", "1995-01-03", "1995-01-06"),
|
||||
x=runif(4),
|
||||
y=runif(4))
|
||||
A$date <- as.Date(A$date) # yyyy-mm-dd is the default format
|
||||
# So far there's nothing new - it's just a data frame. I have hand-
|
||||
# constructed A but you could equally have obtained it using read.table().
|
||||
|
||||
# I want to make a zoo matrix out of the numerical columns of A
|
||||
library(zoo)
|
||||
B <- A
|
||||
B$date <- NULL
|
||||
z <- zoo(as.matrix(B), order.by=A$date)
|
||||
rm(A, B)
|
||||
|
||||
# So now you are holding "z", a "zoo" object. You can do many cool
|
||||
# things with it.
|
||||
# See http://www.google.com/search?hl=en&q=zoo+quickref+achim&btnI=I%27m+Feeling+Lucky
|
||||
|
||||
# To drop down to a plain data matrix, say
|
||||
C <- coredata(z)
|
||||
rownames(C) <- as.character(time(z))
|
||||
# Compare --
|
||||
str(C)
|
||||
str(z)
|
||||
|
||||
# The above is a tedious way of doing these things, designed to give you
|
||||
# an insight into what is going on. If you just want to read a file
|
||||
# into a zoo object, a very short path is something like:
|
||||
# z <- read.zoo(filename, format="%d %b %Y")
|
20
r/Make pictures in PDF files that can be put into a paper.r
Normal file
20
r/Make pictures in PDF files that can be put into a paper.r
Normal file
@ -0,0 +1,20 @@
|
||||
# Goal: Make pictures in PDF files that can be put into a paper.
|
||||
|
||||
xpts <- seq(-3,3,.05)
|
||||
|
||||
# Here is my suggested setup for a two-column picture --
|
||||
pdf("demo2.pdf", width=5.6, height=2.8, bg="cadetblue1", pointsize=8)
|
||||
par(mai=c(.6,.6,.2,.2))
|
||||
plot(xpts, sin(xpts*xpts), type="l", lwd=2, col="cadetblue4",
|
||||
xlab="x", ylab="sin(x*x)")
|
||||
grid(col="white", lty=1, lwd=.2)
|
||||
abline(h=0, v=0)
|
||||
|
||||
# My suggested setup for a square one-column picture --
|
||||
pdf("demo1.pdf", width=2.8, height=2.8, bg="cadetblue1", pointsize=8)
|
||||
par(mai=c(.6,.6,.2,.2))
|
||||
plot(xpts, sin(xpts*xpts), type="l", lwd=2, col="cadetblue4",
|
||||
xlab="x", ylab="sin(x*x)")
|
||||
grid(col="white", lty=1, lwd=.2)
|
||||
abline(h=0, v=0)
|
||||
|
25
r/Multiplication Table.r
Normal file
25
r/Multiplication Table.r
Normal file
@ -0,0 +1,25 @@
|
||||
Multiplication Table
|
||||
# Program to find the multiplication
|
||||
# table (from 1 to 10)
|
||||
# of a number input by the user
|
||||
|
||||
# take input from the user
|
||||
num = as.integer(readline(prompt = "Enter a number: "))
|
||||
|
||||
# use for loop to iterate 10 times
|
||||
for(i in 1:10) {
|
||||
print(paste(num,'x', i, '=', num*i))
|
||||
}
|
||||
Output
|
||||
|
||||
Enter a number: 7
|
||||
[1] "7 x 1 = 7"
|
||||
[1] "7 x 2 = 14"
|
||||
[1] "7 x 3 = 21"
|
||||
[1] "7 x 4 = 28"
|
||||
[1] "7 x 5 = 35"
|
||||
[1] "7 x 6 = 42"
|
||||
[1] "7 x 7 = 49"
|
||||
[1] "7 x 8 = 56"
|
||||
[1] "7 x 9 = 63"
|
||||
[1] "7 x 10 = 70"
|
@ -0,0 +1,13 @@
|
||||
# Goal: Display two series on one plot, one with a left y axis
|
||||
# and another with a right y axis.
|
||||
|
||||
y1 <- cumsum(rnorm(100))
|
||||
y2 <- cumsum(rnorm(100, mean=0.2))
|
||||
|
||||
par(mai=c(.8, .8, .2, .8))
|
||||
plot(1:100, y1, type="l", col="blue", xlab="X axis label", ylab="Left legend")
|
||||
par(new=TRUE)
|
||||
plot(1:100, y2, type="l", ann=FALSE, yaxt="n")
|
||||
axis(4)
|
||||
legend(x="topleft", bty="n", lty=c(1,1), col=c("blue","black"),
|
||||
legend=c("String 1 (left scale)", "String 2 (right scale)"))
|
99
r/Prices and returns.r
Normal file
99
r/Prices and returns.r
Normal file
@ -0,0 +1,99 @@
|
||||
# Goal: Prices and returns
|
||||
|
||||
# I like to multiply returns by 100 so as to have "units in percent".
|
||||
# In other words, I like it for 5% to be a value like 5 rather than 0.05.
|
||||
|
||||
###################################################################
|
||||
# I. Simulate random-walk prices, switch between prices & returns.
|
||||
###################################################################
|
||||
# Simulate a time-series of PRICES drawn from a random walk
|
||||
# where one-period returns are i.i.d. N(mu, sigma^2).
|
||||
ranrw <- function(mu, sigma, p0=100, T=100) {
|
||||
cumprod(c(p0, 1 + (rnorm(n=T, mean=mu, sd=sigma)/100)))
|
||||
}
|
||||
prices2returns <- function(x) {
|
||||
100*diff(log(x))
|
||||
}
|
||||
returns2prices <- function(r, p0=100) {
|
||||
c(p0, p0 * exp(cumsum(r/100)))
|
||||
}
|
||||
|
||||
cat("Simulate 25 points from a random walk starting at 1500 --\n")
|
||||
p <- ranrw(0.05, 1.4, p0=1500, T=25)
|
||||
# gives you a 25-long series, starting with a price of 1500, where
|
||||
# one-period returns are N(0.05,1.4^2) percent.
|
||||
print(p)
|
||||
|
||||
cat("Convert to returns--\n")
|
||||
r <- prices2returns(p)
|
||||
print(r)
|
||||
|
||||
cat("Go back from returns to prices --\n")
|
||||
goback <- returns2prices(r, 1500)
|
||||
print(goback)
|
||||
|
||||
###################################################################
|
||||
# II. Plenty of powerful things you can do with returns....
|
||||
###################################################################
|
||||
summary(r); sd(r) # summary statistics
|
||||
plot(density(r)) # kernel density plot
|
||||
acf(r) # Autocorrelation function
|
||||
ar(r) # Estimate a AIC-minimising AR model
|
||||
Box.test(r, lag=2, type="Ljung") # Box-Ljung test
|
||||
library(tseries)
|
||||
runs.test(factor(sign(r))) # Runs test
|
||||
bds.test(r) # BDS test.
|
||||
|
||||
###################################################################
|
||||
# III. Visualisation and the random walk
|
||||
###################################################################
|
||||
# I want to obtain intuition into what kinds of price series can happen,
|
||||
# given a starting price, a mean return, and a given standard deviation.
|
||||
# This function simulates out 10000 days of a price time-series at a time,
|
||||
# and waits for you to click in the graph window, after which a second
|
||||
# series is painted, and so on. Make the graph window very big and
|
||||
# sit back and admire.
|
||||
# The point is to eyeball many series and thus obtain some intuition
|
||||
# into what the random walk does.
|
||||
visualisation <- function(p0, s, mu, labelstring) {
|
||||
N <- 10000
|
||||
x <- (1:(N+1))/250 # Unit of years
|
||||
while (1) {
|
||||
plot(x, ranrw(mu, s, p0, N), ylab="Level", log="y",
|
||||
type="l", col="red", xlab="Time (years)",
|
||||
main=paste("40 years of a process much like", labelstring))
|
||||
grid()
|
||||
z=locator(1)
|
||||
}
|
||||
}
|
||||
|
||||
# Nifty -- assuming sigma of 1.4% a day and E(returns) of 13% a year
|
||||
visualisation(2600, 1.4, 13/250, "Nifty")
|
||||
|
||||
# The numerical values here are used to think about what the INR/USD
|
||||
# exchange rate would have looked like if it started from 31.37, had
|
||||
# a mean depreciation of 5% per year, and had the daily vol of a floating
|
||||
# exchange rate like EUR/USD.
|
||||
visualisation(31.37, 0.7, 5/365, "INR/USD (NOT!) with daily sigma=0.7")
|
||||
# This is of course not like the INR/USD series in the real world -
|
||||
# which is neither a random walk nor does it have a vol of 0.7% a day.
|
||||
|
||||
# The numerical values here are used to think about what the USD/EUR
|
||||
# exchange rate, starting with 1, having no drift, and having the observed
|
||||
# daily vol of 0.7. (This is about right).
|
||||
visualisation(1, 0.7, 0, "USD/EUR with no drift")
|
||||
|
||||
###################################################################
|
||||
# IV. A monte carlo experiment about the runs test
|
||||
###################################################################
|
||||
# Measure the effectiveness of the runs test when faced with an
|
||||
# AR(1) process of length 100 with a coeff of 0.1
|
||||
set.seed(101)
|
||||
one.ts <- function() {arima.sim(list(order = c(1,0,0), ar = 0.1), n=100)}
|
||||
table(replicate(1000, runs.test(factor(sign(one.ts())))$p.value < 0.05))
|
||||
# We find that the runs test throws up a prob value of below 0.05
|
||||
# for 91 out of 1000 experiments.
|
||||
# Wow! :-)
|
||||
# To understand this, you need to look up the man pages of:
|
||||
# set.seed, arima.sim, sign, factor, runs.test, replicate, table.
|
||||
# e.g. say ?replicate
|
41
r/Print Fibonacci Sequence.r
Normal file
41
r/Print Fibonacci Sequence.r
Normal file
@ -0,0 +1,41 @@
|
||||
Print Fibonacci Sequence
|
||||
# take input from the user
|
||||
nterms = as.integer(readline(prompt="How many terms? "))
|
||||
|
||||
# first two terms
|
||||
n1 = 0
|
||||
n2 = 1
|
||||
count = 2
|
||||
|
||||
# check if the number of terms is valid
|
||||
if(nterms <= 0) {
|
||||
print("Plese enter a positive integer")
|
||||
} else {
|
||||
if(nterms == 1) {
|
||||
print("Fibonacci sequence:")
|
||||
print(n1)
|
||||
} else {
|
||||
print("Fibonacci sequence:")
|
||||
print(n1)
|
||||
print(n2)
|
||||
while(count < nterms) {
|
||||
nth = n1 + n2
|
||||
print(nth)
|
||||
# update values
|
||||
n1 = n2
|
||||
n2 = nth
|
||||
count = count + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
Output
|
||||
|
||||
How many terms? 7
|
||||
[1] "Fibonacci sequence:"
|
||||
[1] 0
|
||||
[1] 1
|
||||
[1] 1
|
||||
[1] 2
|
||||
[1] 3
|
||||
[1] 5
|
||||
[1] 8
|
30
r/Program to Find GCD.r
Normal file
30
r/Program to Find GCD.r
Normal file
@ -0,0 +1,30 @@
|
||||
Program to Find GCD
|
||||
# Program to find the
|
||||
# H.C.F of two input number
|
||||
|
||||
# define a function
|
||||
hcf <- function(x, y) {
|
||||
# choose the smaller number
|
||||
if(x > y) {
|
||||
smaller = y
|
||||
} else {
|
||||
smaller = x
|
||||
}
|
||||
for(i in 1:smaller) {
|
||||
if((x %% i == 0) && (y %% i == 0)) {
|
||||
hcf = i
|
||||
}
|
||||
}
|
||||
return(hcf)
|
||||
}
|
||||
|
||||
# take input from the user
|
||||
num1 = as.integer(readline(prompt = "Enter first number: "))
|
||||
num2 = as.integer(readline(prompt = "Enter second number: "))
|
||||
|
||||
print(paste("The H.C.F. of", num1,"and", num2,"is", hcf(num1, num2)))
|
||||
Output
|
||||
|
||||
Enter first number: 72
|
||||
Enter second number: 120
|
||||
[1] "The H.C.F. of 72 and 120 is 24"
|
52
r/Quartiles deciles tables graphs.r
Normal file
52
r/Quartiles deciles tables graphs.r
Normal file
@ -0,0 +1,52 @@
|
||||
# Get the data in place --
|
||||
load(file="demo.rda")
|
||||
summary(firms)
|
||||
|
||||
# Look at it --
|
||||
plot(density(log(firms$mktcap)))
|
||||
plot(firms$mktcap, firms$spread, type="p", cex=.2, col="blue", log="xy",
|
||||
xlab="Market cap (Mln USD)", ylab="Bid/offer spread (bps)")
|
||||
m=lm(log(spread) ~ log(mktcap), firms)
|
||||
summary(m)
|
||||
|
||||
# Making deciles --
|
||||
library(gtools)
|
||||
library(gdata)
|
||||
# for deciles (default=quartiles)
|
||||
size.category = quantcut(firms$mktcap, q=seq(0, 1, 0.1), labels=F)
|
||||
table(size.category)
|
||||
means = aggregate(firms, list(size.category), mean)
|
||||
print(data.frame(means$mktcap,means$spread))
|
||||
|
||||
# Make a picture combining the sample mean of spread (in each decile)
|
||||
# with the weighted average sample mean of the spread (in each decile),
|
||||
# where weights are proportional to size.
|
||||
wtd.means = by(firms, size.category,
|
||||
function(piece) (sum(piece$mktcap*piece$spread)/sum(piece$mktcap)))
|
||||
lines(means$mktcap, means$spread, type="b", lwd=2, col="green", pch=19)
|
||||
lines(means$mktcap, wtd.means, type="b", lwd=2, col="red", pch=19)
|
||||
legend(x=0.25, y=0.5, bty="n",
|
||||
col=c("blue", "green", "red"),
|
||||
lty=c(0, 1, 1), lwd=c(0,2,2),
|
||||
pch=c(0,19,19),
|
||||
legend=c("firm", "Mean spread in size deciles",
|
||||
"Size weighted mean spread in size deciles"))
|
||||
|
||||
# Within group standard deviations --
|
||||
aggregate(firms, list(size.category), sd)
|
||||
|
||||
# Now I do quartiles by BOTH mktcap and spread.
|
||||
size.quartiles = quantcut(firms$mktcap, labels=F)
|
||||
spread.quartiles = quantcut(firms$spread, labels=F)
|
||||
table(size.quartiles, spread.quartiles)
|
||||
# Re-express everything as joint probabilities
|
||||
table(size.quartiles, spread.quartiles)/nrow(firms)
|
||||
# Compute cell means at every point in the joint table:
|
||||
aggregate(firms, list(size.quartiles, spread.quartiles), mean)
|
||||
|
||||
# Make pretty two-way tables
|
||||
aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, nobs)
|
||||
aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, mean)
|
||||
aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, sd)
|
||||
aggregate.table(firms$spread, size.quartiles, spread.quartiles, mean)
|
||||
aggregate.table(firms$spread, size.quartiles, spread.quartiles, sd)
|
11
r/R Hello World Program.r
Normal file
11
r/R Hello World Program.r
Normal file
@ -0,0 +1,11 @@
|
||||
> # We can use the print() function
|
||||
> print("Hello World!")
|
||||
[1] "Hello World!"
|
||||
|
||||
> # Quotes can be suppressed in the output
|
||||
> print("Hello World!", quote = FALSE)
|
||||
[1] Hello World!
|
||||
|
||||
> # If there are more than 1 item, we can concatenate using paste()
|
||||
> print(paste("How","are","you?"))
|
||||
[1] "How are you?"
|
@ -0,0 +1,12 @@
|
||||
# Goal: R syntax where model specification is an argument to a function.
|
||||
|
||||
# Invent a dataset
|
||||
x <- runif(100); y <- runif(100); z <- 2 + 3*x + 4*y + rnorm(100)
|
||||
D <- data.frame(x=x, y=y, z=z)
|
||||
|
||||
amodel <- function(modelstring) {
|
||||
summary(lm(modelstring, D))
|
||||
}
|
||||
|
||||
amodel(z ~ x)
|
||||
amodel(z ~ y)
|
3
r/README.md
Normal file
3
r/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# R programming language
|
||||
|
||||
R is a programming language designed for statistical computing and graphics purposes. Contains code that can be executed within the R software environment.
|
@ -0,0 +1,43 @@
|
||||
# Goal: Reading and writing ascii files, reading and writing binary files.
|
||||
# And, to measure how much faster it is working with binary files.
|
||||
|
||||
# First manufacture a tall data frame:
|
||||
# FYI -- runif(10) yields 10 U(0,1) random numbers.
|
||||
B = data.frame(x1=runif(100000), x2=runif(100000), x3=runif(100000))
|
||||
summary(B)
|
||||
|
||||
# Write out ascii file:
|
||||
write.table(B, file = "/tmp/foo.csv", sep = ",", col.names = NA)
|
||||
# Read in this resulting ascii file:
|
||||
C=read.table("/tmp/foo.csv", header = TRUE, sep = ",", row.names=1)
|
||||
# Write a binary file out of dataset C:
|
||||
save(C, file="/tmp/foo.binary")
|
||||
# Delete the dataset C:
|
||||
rm(C)
|
||||
# Restore from foo.binary:
|
||||
load("/tmp/foo.binary")
|
||||
summary(C) # should yield the same results
|
||||
# as summary(B) above.
|
||||
|
||||
|
||||
# Now we time all these operations --
|
||||
cat("Time creation of dataset:\n")
|
||||
system.time({
|
||||
B = data.frame(x1=runif(100000), x2=runif(100000), x3=runif(100000))
|
||||
})
|
||||
|
||||
cat("Time writing an ascii file out of dataset B:\n")
|
||||
system.time(
|
||||
write.table(B, file = "/tmp/foo.csv", sep = ",", col.names = NA)
|
||||
)
|
||||
|
||||
cat("Time reading an ascii file into dataset C:\n")
|
||||
system.time(
|
||||
{C=read.table("/tmp/foo.csv", header = TRUE, sep=",", row.names=1)
|
||||
})
|
||||
|
||||
cat("Time writing a binary file out of dataset C:\n")
|
||||
system.time(save(C, file="/tmp/foo.binary"))
|
||||
|
||||
cat("Time reading a binary file + variablenames from /tmp/foo.binary:\n")
|
||||
system.time(load("/tmp/foo.binary")) # and then read it in from binary file
|
15
r/Reading in a Microsoft .r
Normal file
15
r/Reading in a Microsoft .r
Normal file
@ -0,0 +1,15 @@
|
||||
# Goal: Reading in a Microsoft .xls file directly
|
||||
|
||||
library(gdata)
|
||||
a <- read.xls("file.xls", sheet=2) # This reads in the 2nd sheet
|
||||
|
||||
# Look at what the cat dragged in
|
||||
str(a)
|
||||
|
||||
# If you have a date column, you'll want to fix it up like this:
|
||||
a$date <- as.Date(as.character(a$X), format="%d-%b-%y")
|
||||
a$X <- NULL
|
||||
|
||||
|
||||
# Also see http://tolstoy.newcastle.edu.au/R/help/06/04/25674.html for
|
||||
# another path.
|
17
r/Reading in a file made by CMIE's Business Beacon program.r
Normal file
17
r/Reading in a file made by CMIE's Business Beacon program.r
Normal file
@ -0,0 +1,17 @@
|
||||
# Goal: To read in files produced by CMIE's "Business Beacon".
|
||||
# This assumes you have made a file of MONTHLY data using CMIE's
|
||||
# Business Beacon program. This contains 2 columns: M3 and M0.
|
||||
|
||||
A <- read.table(
|
||||
# Generic to all BB files --
|
||||
sep="|", # CMIE's .txt file is pipe delimited
|
||||
skip=3, # Skip the 1st 3 lines
|
||||
na.strings=c("N.A.","Err"), # The ways they encode missing data
|
||||
# Specific to your immediate situation --
|
||||
file="bb_data.text",
|
||||
col.names=c("junk", "date", "M3", "M0")
|
||||
)
|
||||
A$junk <- NULL # Blow away this column
|
||||
|
||||
# Parse the CMIE-style "Mmm yy" date string that's used on monthly data
|
||||
A$date <- as.Date(paste("1", as.character(A$date)), format="%d %b %Y")
|
16
r/Sample From a Population.r
Normal file
16
r/Sample From a Population.r
Normal file
@ -0,0 +1,16 @@
|
||||
> # sample with replacement
|
||||
> sample(x, replace = TRUE)
|
||||
[1] 15 17 13 9 5 15 11 15 1
|
||||
|
||||
> # if we simply pass in a positive number n, it will sample
|
||||
> # from 1:n without replacement
|
||||
> sample(10)
|
||||
[1] 2 4 7 9 1 3 10 5 8 6
|
||||
|
||||
|
||||
|
||||
|
||||
An example to simulate a coin toss for 10 times.
|
||||
|
||||
> sample(c("H","T"),10, replace = TRUE)
|
||||
[1] "H" "H" "H" "T" "H" "T" "H" "H" "H" "T"
|
@ -0,0 +1,40 @@
|
||||
# Goals: Scare the hell out of children with the Cauchy distribution.
|
||||
|
||||
# A function which simulates N draws from one of two distributions,
|
||||
# and returns the mean obtained thusly.
|
||||
one.simulation <- function(N=100, distribution="normal") {
|
||||
if (distribution == "normal") {
|
||||
x <- rnorm(N)
|
||||
} else {
|
||||
x <- rcauchy(N)
|
||||
}
|
||||
mean(x)
|
||||
}
|
||||
|
||||
k1 <- density(replicate(1000, one.simulation(20)))
|
||||
k2 <- density(replicate(1000, one.simulation(20, distribution="cauchy")))
|
||||
|
||||
xrange <- range(k1$x, k2$x)
|
||||
plot(k1$x, k1$y, xlim=xrange, type="l", xlab="Estimated value", ylab="")
|
||||
grid()
|
||||
lines(k2$x, k2$y, col="red")
|
||||
abline(v=.5)
|
||||
legend(x="topleft", bty="n",
|
||||
lty=c(1,1),
|
||||
col=c("black", "red"),
|
||||
legend=c("Mean of Normal", "Mean of Cauchy"))
|
||||
# The distribution of the mean of normals collapses into a point;
|
||||
# that of the cauchy does not.
|
||||
|
||||
# Here's more scary stuff --
|
||||
for (i in 1:10) {
|
||||
cat("Sigma of distribution of 1000 draws from mean of normal - ",
|
||||
sd(replicate(1000, one.simulation(20))), "\n")
|
||||
}
|
||||
for (i in 1:10) {
|
||||
cat("Sigma of distribution of 1000 draws from mean of cauchy - ",
|
||||
sd(replicate(1000, one.simulation(20, distribution="cauchy"))), "\n")
|
||||
}
|
||||
|
||||
# Exercise for the reader: Compare the distribution of the median of
|
||||
# the Normal against the distribution of the median of the Cauchy.
|
@ -0,0 +1,33 @@
|
||||
# Goals: Lots of times, you need to give an R object to a friend,
|
||||
# or embed data into an email.
|
||||
|
||||
# First I invent a little dataset --
|
||||
set.seed(101) # To make sure you get the same random numbers as me
|
||||
# FYI -- runif(10) yields 10 U(0,1) random numbers.
|
||||
A = data.frame(x1=runif(10), x2=runif(10), x3=runif(10))
|
||||
# Look at it --
|
||||
print(A)
|
||||
|
||||
# Writing to a binary file that can be transported
|
||||
save(A, file="/tmp/my_data_file.rda") # You can give this file to a friend
|
||||
load("/tmp/my_data_file.rda")
|
||||
|
||||
# Plan B - you want pure ascii, which can be put into an email --
|
||||
dput(A)
|
||||
# This gives you a block of R code. Let me utilise that generated code
|
||||
# to create a dataset named "B".
|
||||
B <- structure(list(x1 = c(0.372198376338929, 0.0438248154241592,
|
||||