parent
599b63599b

commit
026079a47d

**70 changed files**with

**2257 additions**and

**0 deletions**

`@ -0,0 +1,31 @@` |
||||

`# Goals: A first look at R objects - vectors, lists, matrices, data frames.` |
||||

```
``` |
||||

`# To make vectors "x" "y" "year" and "names"` |
||||

`x <- c(2,3,7,9)` |
||||

`y <- c(9,7,3,2)` |
||||

`year <- 1990:1993` |
||||

`names <- c("payal", "shraddha", "kritika", "itida")` |
||||

`# Accessing the 1st and last elements of y --` |
||||

`y[1]` |
||||

`y[length(y)]` |
||||

```
``` |
||||

`# To make a list "person" --` |
||||

`person <- list(name="payal", x=2, y=9, year=1990)` |
||||

`person` |
||||

`# Accessing things inside a list --` |
||||

`person$name` |
||||

`person$x` |
||||

```
``` |
||||

`# To make a matrix, pasting together the columns "year" "x" and "y"` |
||||

`# The verb cbind() stands for "column bind"` |
||||

`cbind(year, x, y)` |
||||

```
``` |
||||

`# To make a "data frame", which is a list of vectors of the same length --` |
||||

`D <- data.frame(names, year, x, y)` |
||||

`nrow(D)` |
||||

`# Accessing one of these vectors` |
||||

`D$names` |
||||

`# Accessing the last element of this vector` |
||||

`D$names[nrow(D)]` |
||||

`# Or equally,` |
||||

`D$names[length(D$names)]` |

`@ -0,0 +1,11 @@` |
||||

`# Goal: A histogram with tails shown in red.` |
||||

```
``` |
||||

`# This happened on the R mailing list on 7 May 2004.` |
||||

`# This is by Martin Maechler <maechler@stat.math.ethz.ch>, who was` |
||||

`# responding to a slightly imperfect version of this by` |
||||

`# "Guazzetti Stefano" <Stefano.Guazzetti@ausl.re.it>` |
||||

```
``` |
||||

`x <- rnorm(1000)` |
||||

`hx <- hist(x, breaks=100, plot=FALSE)` |
||||

`plot(hx, col=ifelse(abs(hx$breaks) < 1.669, 4, 2))` |
||||

` # What is cool is that "col" is supplied a vector.` |

`@ -0,0 +1,67 @@` |
||||

`# Goals: ARMA modeling - estimation, diagnostics, forecasting.` |
||||

```
``` |
||||

```
``` |
||||

`# 0. SETUP DATA` |
||||

`rawdata <- c(-0.21,-2.28,-2.71,2.26,-1.11,1.71,2.63,-0.45,-0.11,4.79,5.07,-2.24,6.46,3.82,4.29,-1.47,2.69,7.95,4.46,7.28,3.43,-3.19,-3.14,-1.25,-0.50,2.25,2.77,6.72,9.17,3.73,6.72,6.04,10.62,9.89,8.23,5.37,-0.10,1.40,1.60,3.40,3.80,3.60,4.90,9.60,18.20,20.60,15.20,27.00,15.42,13.31,11.22,12.77,12.43,15.83,11.44,12.32,12.10,12.02,14.41,13.54,11.36,12.97,10.00,7.20,8.74,3.92,8.73,2.19,3.85,1.48,2.28,2.98,4.21,3.85,6.52,8.16,5.36,8.58,7.00,10.57,7.12,7.95,7.05,3.84,4.93,4.30,5.44,3.77,4.71,3.18,0.00,5.25,4.27,5.14,3.53,4.54,4.70,7.40,4.80,6.20,7.29,7.30,8.38,3.83,8.07,4.88,8.17,8.25,6.46,5.96,5.88,5.03,4.99,5.87,6.78,7.43,3.61,4.29,2.97,2.35,2.49,1.56,2.65,2.49,2.85,1.89,3.05,2.27,2.91,3.94,2.34,3.14,4.11,4.12,4.53,7.11,6.17,6.25,7.03,4.13,6.15,6.73,6.99,5.86,4.19,6.38,6.68,6.58,5.75,7.51,6.22,8.22,7.45,8.00,8.29,8.05,8.91,6.83,7.33,8.52,8.62,9.80,10.63,7.70,8.91,7.50,5.88,9.82,8.44,10.92,11.67)` |
||||

```
``` |
||||

`# Make a R timeseries out of the rawdata: specify frequency & startdate` |
||||

`gIIP <- ts(rawdata, frequency=12, start=c(1991,4))` |
||||

`print(gIIP)` |
||||

`plot.ts(gIIP, type="l", col="blue", ylab="IIP Growth (%)", lwd=2,` |
||||

` main="Full data")` |
||||

`grid()` |
||||

```
``` |
||||

`# Based on this, I decide that 4/1995 is the start of the sensible period.` |
||||

`gIIP <- window(gIIP, start=c(1995,4))` |
||||

`print(gIIP)` |
||||

`plot.ts(gIIP, type="l", col="blue", ylab="IIP Growth (%)", lwd=2,` |
||||

` main="Estimation subset")` |
||||

`grid()` |
||||

```
``` |
||||

`# Descriptive statistics about gIIP` |
||||

`mean(gIIP); sd(gIIP); summary(gIIP);` |
||||

`plot(density(gIIP), col="blue", main="(Unconditional) Density of IIP growth")` |
||||

`acf(gIIP)` |
||||

```
``` |
||||

```
``` |
||||

`# 1. ARMA ESTIMATION` |
||||

`m.ar2 <- arima(gIIP, order = c(2,0,0))` |
||||

`print(m.ar2) # Print it out` |
||||

```
``` |
||||

```
``` |
||||

`# 2. ARMA DIAGNOSTICS` |
||||

`tsdiag(m.ar2) # His pretty picture of diagnostics` |
||||

`## Time series structure in errors` |
||||

`print(Box.test(m.ar2$residuals, lag=12, type="Ljung-Box"));` |
||||

`## Sniff for ARCH` |
||||

`print(Box.test(m.ar2$residuals^2, lag=12, type="Ljung-Box"));` |
||||

`## Eyeball distribution of residuals` |
||||

`plot(density(m.ar2$residuals), col="blue", xlim=c(-8,8),` |
||||

` main=paste("Residuals of AR(2)"))` |
||||

```
``` |
||||

```
``` |
||||

`# 3. FORECASTING` |
||||

`## Make a picture of the residuals` |
||||

`plot.ts(m.ar2$residual, ylab="Innovations", col="blue", lwd=2)` |
||||

`s <- sqrt(m.ar2$sigma2)` |
||||

`abline(h=c(-s,s), lwd=2, col="lightGray")` |
||||

```
``` |
||||

`p <- predict(m.ar2, n.ahead = 12) # Make 12 predictions.` |
||||

`print(p)` |
||||

```
``` |
||||

`## Watch the forecastability decay away from fat values to 0.` |
||||

`## sd(x) is the naive sigma. p$se is the prediction se.` |
||||

`gain <- 100*(1-p$se/sd(gIIP))` |
||||

`plot.ts(gain, main="Gain in forecast s.d.", ylab="Per cent",` |
||||

` col="blue", lwd=2)` |
||||

```
``` |
||||

`## Make a pretty picture that puts it all together` |
||||

`ts.plot(gIIP, p$pred, p$pred-1.96*p$se, p$pred+1.96*p$se,` |
||||

` gpars=list(lty=c(1,1,2,2), lwd=c(2,2,1,1),` |
||||

` ylab="IIP growth (%)", col=c("blue","red", "red", "red")))` |
||||

`grid()` |
||||

`abline(h=mean(gIIP), lty=2, lwd=2, col="lightGray")` |
||||

`legend(x="bottomleft", cex=0.8, bty="n",` |
||||

` lty=c(1,1,2,2), lwd=c(2,1,1,2),` |
||||

` col=c("blue", "red", "red", "lightGray"),` |
||||

` legend=c("IIP", "AR(2) forecasts", "95% C.I.", "Mean IIP growth"))` |

`@ -0,0 +1,16 @@` |
||||

`> x` |
||||

`[1] 3 6 8` |
||||

`> y` |
||||

`[1] 2 9 0` |
||||

```
``` |
||||

`> x + y` |
||||

`[1] 5 15 8` |
||||

```
``` |
||||

`> x + 1 # 1 is recycled to (1,1,1)` |
||||

`[1] 4 7 9` |
||||

```
``` |
||||

`> x + c(1,4) # (1,4) is recycled to (1,4,1) but warning issued` |
||||

`[1] 4 10 9` |
||||

`Warning message:` |
||||

`In x + c(1, 4) :` |
||||

` longer object length is not a multiple of shorter object length` |

`@ -0,0 +1,33 @@` |
||||

`# Goal: All manner of import and export of datasets.` |
||||

```
``` |
||||

`# Invent a dataset --` |
||||

`A <- data.frame(` |
||||

` name=c("a","b","c"),` |
||||

` ownership=c("Case 1","Case 1","Case 2"),` |
||||

` listed.at=c("NSE",NA,"BSE"),` |
||||

` # Firm "b" is unlisted.` |
||||

` is.listed=c(TRUE,FALSE,TRUE),` |
||||

` # R convention - boolean variables are named "is.something"` |
||||

` x=c(2.2,3.3,4.4),` |
||||

` date=as.Date(c("2004-04-04","2005-05-05","2006-06-06"))` |
||||

` )` |
||||

```
``` |
||||

`# To a spreadsheet through a CSV file --` |
||||

`write.table(A,file="demo.csv",sep = ",",col.names = NA,qmethod = "double")` |
||||

`B <- read.table("demo.csv", header = TRUE, sep = ",", row.names = 1)` |
||||

```
``` |
||||

`# To R as a binary file --` |
||||

`save(A, file="demo.rda")` |
||||

`load("demo.rda")` |
||||

```
``` |
||||

`# To the Open XML standard for transport for statistical data --` |
||||

`library(StatDataML)` |
||||

`writeSDML(A, "/tmp/demo.sdml")` |
||||

`B <- readSDML("/tmp/demo.sdml")` |
||||

```
``` |
||||

`# To Stata --` |
||||

`library(foreign)` |
||||

`write.dta(A, "/tmp/demo.dta")` |
||||

`B <- read.dta("/tmp/demo.dta")` |
||||

```
``` |
||||

`# foreign::write.foreign() also has a pathway to SAS and SPSS.` |

`@ -0,0 +1,33 @@` |
||||

`# Goal: An example of simulation-based inference.` |
||||

`# This is in the context of testing for time-series dependence in` |
||||

`# stock market returns data.` |
||||

`# The code here does the idea of Kim, Nelson, Startz (1991).` |
||||

`# We want to use the distribution of realworld returns data, without` |
||||

`# needing assumptions about normality.` |
||||

`# The null is lack of dependence (i.e. an efficient market).` |
||||

`# So repeatedly, the data is permuted, and the sample ACF is computed.` |
||||

`# This gives us the distribution of the ACF under H0: independence, but` |
||||

`# while using the empirical distribution of the returns data.` |
||||

```
``` |
||||

`# Weekly returns on Nifty, 1/1/2002 to 31/12/2003, 104 weeks of data.` |
||||

`r <- c(-0.70031182197603, 0.421690133064168, -1.20098072984689, 0.143402360644984, 3.81836537549516, 3.17055939373247, 0.305580301919228, 1.23853814691852, 0.81584795095706, -1.51865139747764, -2.71223626421522, -0.784836480094242, 1.09180041170998, 0.397649587762761, -4.11309534220923, -0.263912425099111, -0.0410144239805454, 1.75756212770972, -2.3335373897992, -2.19228764624217, -3.64578978183987, 1.92535789661354, 3.45782867883164, -2.15532607229374, -0.448039988298987, 1.50124793565896, -1.45871585874362, -2.13459863369767, -6.2128068251802, -1.94482987066289, 0.751294815735637, 1.78244982829590, 1.61567494389745, 1.53557708728931, -1.53557708728931, -0.322061470004265, -2.28394919698225, 0.70399304137414, -2.93580952607737, 2.38125098034425, 0.0617697039252185, -4.14482733720716, 2.04397528093754, 0.576400673606603, 3.43072725191913, 2.96465382864843, 2.89833358015583, 1.85387040058336, 1.52136515035952, -0.637268376944444, 1.75418926224609, -0.804391905851354, -0.861816058320475, 0.576902488444109, -2.84259880663331, -1.35375536139417, 1.49096529042234, -2.05404881010045, 2.86868849528146, -0.258270670200478, -4.4515881438687, -1.73055019137092, 3.04427015714648, -2.94928202352018, 1.62081315773994, -6.83117945164824, -0.962715713711582, -1.75875847071740, 1.50330330252721, -0.0479705789653728, 3.68968303215933, -0.535807567290103, 3.94034871061182, 3.85787174417738, 0.932185956989873, 4.08598654183674, 2.27343783689715, 1.13958830440017, 2.01737201171230, -1.88131458327554, 1.97596267156648, 2.79857144562001, 2.22470306481695, 2.03212951411427, 4.95626853448883, 3.40400972901396, 3.03840139165246, -1.89863129741417, -3.70832135042951, 4.78478922155396, 4.3973589590097, 4.9667050392987, 2.99775078737081, -4.12349101552438, 3.25638269809945, 2.29683376253966, -2.64772825878214, -0.630835277076258, 4.72528848505451, 1.87368447333380, 3.17543946162564, 4.58174427843208, 3.23625985632168, 2.29777651227296)` |
||||

```
``` |
||||

`# The 1st autocorrelation from the sample:` |
||||

`acf(r, 1, plot=FALSE)$acf[2]` |
||||

```
``` |
||||

`# Obtain 1000 draws from the distribution of the 1st autocorrelation` |
||||

`# under the null of independence:` |
||||

`set.seed <- 101` |
||||

`simulated <- replicate(1000, acf(r[sample(1:104, replace=FALSE)], 1, plot=FALSE)$acf[2])` |
||||

`# At 95% --` |
||||

`quantile(simulated, probs=c(.025,.975))` |
||||

`# At 99% --` |
||||

`quantile(simulated, probs=c(.005,.995))` |
||||

```
``` |
||||

`# So we can reject the null at 95% but not at 99%.` |
||||

```
``` |
||||

`# A pretty picture.` |
||||

`plot(density(simulated), col="blue")` |
||||

`abline(v=0)` |
||||

`abline(v=quantile(simulated, probs=c(.025,.975)), lwd=2, col="purple")` |
||||

`abline(v=acf(r, 1, plot=FALSE)$acf[2], lty=2, lwd=4, col="yellow")` |

`@ -0,0 +1,52 @@` |
||||

`# Goal: Associative arrays (as in awk) or hashes (as in perl).` |
||||

`# Or, more generally, adventures in R addressing.` |
||||

```
``` |
||||

`# Here's a plain R vector:` |
||||

`x <- c(2,3,7,9)` |
||||

`# But now I tag every elem with labels:` |
||||

`names(x) <- c("kal","sho","sad","aja")` |
||||

`# Associative array operations:` |
||||

`x["kal"] <- 12` |
||||

`# Pretty printing the entire associative array:` |
||||

`x` |
||||

```
``` |
||||

`# This works for matrices too:` |
||||

`m <- matrix(runif(10), nrow=5)` |
||||

`rownames(m) <- c("violet","indigo","blue","green","yellow")` |
||||

`colnames(m) <- c("Asia","Africa")` |
||||

`# The full matrix --` |
||||

`m` |
||||

`# Or even better --` |
||||

`library(xtable)` |
||||

`xtable(m)` |
||||

```
``` |
||||

`# Now address symbolically --` |
||||

`m[,"Africa"]` |
||||

`m["indigo",]` |
||||

`m["indigo","Africa"]` |
||||

```
``` |
||||

`# The "in" operator, as in awk --` |
||||

`for (colour in c("yellow", "orange", "red")) {` |
||||

` if (colour %in% rownames(m)) {` |
||||

` cat("For Africa and ", colour, " we have ", m[colour, "Africa"], "\n")` |
||||

` } else {` |
||||

` cat("Colour ", colour, " does not exist in the hash.\n")` |
||||

` }` |
||||

`}` |
||||

```
``` |
||||

`# This works for data frames also --` |
||||

`D <- data.frame(m)` |
||||

`D` |
||||

`# Look closely at what happened --` |
||||

`str(D) # The colours are the rownames(D).` |
||||

```
``` |
||||

`# Operations --` |
||||

`D$Africa` |
||||

`D[,"Africa"]` |
||||

`D["yellow",]` |
||||

`# or` |
||||

`subset(D, rownames(D)=="yellow")` |
||||

```
``` |
||||

`colnames(D) <- c("Antarctica","America")` |
||||

`D` |
||||

`D$America` |

`@ -0,0 +1,16 @@` |
||||

`Binary to Decimal` |
||||

`# Program to convert decimal` |
||||

`# number into binary number` |
||||

`# using recursive function` |
||||

```
``` |
||||

`convert_to_binary <- function(n) {` |
||||

```
``` |
||||

` if(n > 1) {` |
||||

` convert_to_binary(as.integer(n/2))` |
||||

` }` |
||||

` cat(n %% 2)` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`> convert_to_binary(52)` |
||||

`110100` |

`@ -0,0 +1,25 @@` |
||||

` Check Armstrong number` |
||||

`# take input from the user` |
||||

`num = as.integer(readline(prompt="Enter a number: "))` |
||||

```
``` |
||||

`# initialize sum` |
||||

`sum = 0` |
||||

```
``` |
||||

`# find the sum of the cube of each digit` |
||||

`temp = num` |
||||

`while(temp > 0) {` |
||||

` digit = temp %% 10` |
||||

` sum = sum + (digit ^ 3)` |
||||

` temp = floor(temp / 10)` |
||||

`}` |
||||

```
``` |
||||

`# display the result` |
||||

`if(num == sum) {` |
||||

` print(paste(num, "is an Armstrong number"))` |
||||

`} else {` |
||||

` print(paste(num, "is not an Armstrong number"))` |
||||

`}` |
||||

`Output 1` |
||||

```
``` |
||||

`Enter a number: 23` |
||||

`[1] "23 is not an Armstrong number"` |

`@ -0,0 +1,27 @@` |
||||

`Check Leap Year` |
||||

`# Program to check if` |
||||

`# the input year is` |
||||

`# a leap year or not` |
||||

```
``` |
||||

`year = as.integer(readline(prompt="Enter a year: "))` |
||||

`if((year %% 4) == 0) {` |
||||

` if((year %% 100) == 0) {` |
||||

` if((year %% 400) == 0) {` |
||||

` print(paste(year,"is a leap year"))` |
||||

` } else {` |
||||

` print(paste(year,"is not a leap year"))` |
||||

` }` |
||||

` } else {` |
||||

` print(paste(year,"is a leap year"))` |
||||

` }` |
||||

`} else {` |
||||

` print(paste(year,"is not a leap year"))` |
||||

`}` |
||||

`Output 1` |
||||

```
``` |
||||

`Enter a year: 1900` |
||||

`[1] "1900 is not a leap year"` |
||||

`Output 2` |
||||

```
``` |
||||

`Enter a year: 2000` |
||||

`[1] "2000 is a leap year"` |

`@ -0,0 +1,21 @@` |
||||

`Check Odd and Even Number` |
||||

`# Program to check if` |
||||

`# the input number is odd or even.` |
||||

`# A number is even if division` |
||||

`# by 2 give a remainder of 0.` |
||||

`# If remainder is 1, it is odd.` |
||||

```
``` |
||||

`num = as.integer(readline(prompt="Enter a number: "))` |
||||

`if((num %% 2) == 0) {` |
||||

` print(paste(num,"is Even"))` |
||||

`} else {` |
||||

` print(paste(num,"is Odd"))` |
||||

`}` |
||||

`Output 1` |
||||

```
``` |
||||

`Enter a number: 89` |
||||

`[1] "89 is Odd"` |
||||

`Output 2` |
||||

```
``` |
||||

`Enter a number: 0` |
||||

`[1] "0 is Even"` |

`@ -0,0 +1,24 @@` |
||||

`Check Positive, Negative or Zero` |
||||

`# In this program, we input a number` |
||||

`# check if the number is positive or` |
||||

`# negative or zero and display` |
||||

`# an appropriate message` |
||||

```
``` |
||||

`num = as.double(readline(prompt="Enter a number: "))` |
||||

`if(num > 0) {` |
||||

` print("Positive number")` |
||||

`} else {` |
||||

` if(num == 0) {` |
||||

` print("Zero")` |
||||

` } else {` |
||||

` print("Negative number")` |
||||

` }` |
||||

`}` |
||||

`Output 1` |
||||

```
``` |
||||

`Enter a number: -9.6` |
||||

`[1] "Negative number"` |
||||

`Output 2` |
||||

```
``` |
||||

`Enter a number: 2` |
||||

`[1] "Positive number"` |

`@ -0,0 +1,27 @@` |
||||

`Check Prime Number` |
||||

`# Program to check if` |
||||

`# the input number is` |
||||

`# prime or not` |
||||

```
``` |
||||

`# take input from the user` |
||||

`num = as.integer(readline(prompt="Enter a number: "))` |
||||

```
``` |
||||

`flag = 0` |
||||

`# prime numbers are greater than 1` |
||||

`if(num > 1) {` |
||||

` # check for factors` |
||||

` flag = 1` |
||||

` for(i in 2:(num-1)) {` |
||||

` if ((num %% i) == 0) {` |
||||

` flag = 0` |
||||

` break` |
||||

` }` |
||||

` }` |
||||

`} ` |
||||

```
``` |
||||

`if(num == 2) flag = 1` |
||||

`if(flag == 1) {` |
||||

` print(paste(num,"is a prime number"))` |
||||

`} else {` |
||||

` print(paste(num,"is not a prime number"))` |
||||

`}` |

`@ -0,0 +1,30 @@` |
||||

`Compute LCM in R` |
||||

`# Program to find the L.C.M. of two input number` |
||||

`lcm <- function(x, y) {` |
||||

` # choose the greater number` |
||||

` if(x > y) {` |
||||

` greater = x` |
||||

` } else {` |
||||

` greater = y` |
||||

` }` |
||||

```
``` |
||||

` while(TRUE) {` |
||||

` if((greater %% x == 0) && (greater %% y == 0)) {` |
||||

` lcm = greater` |
||||

` break` |
||||

` }` |
||||

` greater = greater + 1` |
||||

` }` |
||||

` return(lcm)` |
||||

`}` |
||||

```
``` |
||||

`# take input from the user` |
||||

`num1 = as.integer(readline(prompt = "Enter first number: "))` |
||||

`num2 = as.integer(readline(prompt = "Enter second number: "))` |
||||

```
``` |
||||

`print(paste("The L.C.M. of", num1,"and", num2,"is", lcm(num1, num2)))` |
||||

`Output` |
||||

```
``` |
||||

`Enter first number: 24` |
||||

`Enter second number: 25` |
||||

`[1] "The L.C.M. of 24 and 25 is 600"` |

`@ -0,0 +1,15 @@` |
||||

`# Goals: Do bootstrap inference, as an example, for a sample median.` |
||||

```
``` |
||||

`library(boot)` |
||||

```
``` |
||||

`samplemedian <- function(x, d) { # d is a vector of integer indexes` |
||||

` return(median(x[d])) # The genius is in the x[d] notation` |
||||

`}` |
||||

```
``` |
||||

`data <- rnorm(50) # Generate a dataset with 50 obs` |
||||

`b <- boot(data, samplemedian, R=2000) # 2000 bootstrap replications` |
||||

`cat("Sample median has a sigma of ", sd(b$t[,1]), "\n")` |
||||

`plot(b)` |
||||

```
``` |
||||

`# Make a 99% confidence interval` |
||||

`boot.ci(b, conf=0.99, type="basic")` |

`@ -0,0 +1,44 @@` |
||||

`# Goal: Simulate a dataset from the OLS model and obtain` |
||||

`# obtain OLS estimates for it.` |
||||

```
``` |
||||

`x <- runif(100, 0, 10) # 100 draws from U(0,10)` |
||||

`y <- 2 + 3*x + rnorm(100) # beta = [2, 3] and sigma = 1` |
||||

```
``` |
||||

`# You want to just look at OLS results?` |
||||

`summary(lm(y ~ x))` |
||||

```
``` |
||||

`# Suppose x and y were packed together in a data frame --` |
||||

`D <- data.frame(x,y)` |
||||

`summary(lm(y ~ x, D))` |
||||

```
``` |
||||

`# Full and elaborate steps --` |
||||

`d <- lm(y ~ x)` |
||||

`# Learn about this object by saying ?lm and str(d)` |
||||

`# Compact model results --` |
||||

`print(d)` |
||||

`# Pretty graphics for regression diagnostics --` |
||||

`par(mfrow=c(2,2))` |
||||

`plot(d)` |
||||

```
``` |
||||

`d <- summary(d)` |
||||

`# Detailed model results --` |
||||

`print(d)` |
||||

`# Learn about this object by saying ?summary.lm and by saying str(d)` |
||||

`cat("OLS gave slope of ", d$coefficients[2,1],` |
||||

` "and a error sigma of ", d$sigma, "\n")` |
||||

```
``` |
||||

```
``` |
||||

`## I need to drop down to a smaller dataset now --` |
||||

`x <- runif(10)` |
||||

`y <- 2 + 3*x + rnorm(10)` |
||||

`m <- lm(y ~ x)` |
||||

```
``` |
||||

`# Now R supplies a wide range of generic functions which extract` |
||||

`# useful things out of the result of estimation of many kinds of models.` |
||||

```
``` |
||||

`residuals(m)` |
||||

`fitted(m)` |
||||

`AIC(m)` |
||||

`AIC(m, k=log(10)) # SBC` |
||||

`vcov(m)` |
||||

`logLik(m)` |

`@ -0,0 +1,55 @@` |
||||

`# Goal: "Dummy variables" in regression.` |
||||

```
``` |
||||

`# Suppose you have this data:` |
||||

`people = data.frame(` |
||||

` age = c(21,62,54,49,52,38),` |
||||

` education = c("college", "school", "none", "school", "college", "none"),` |
||||

` education.code = c( 2, 1, 0, 1, 2, 0 )` |
||||

` )` |
||||

`# Here people$education is a string categorical variable and` |
||||

`# people$education.code is the same thing, with a numerical coding system.` |
||||

`people` |
||||

```
``` |
||||

`# Note the structure of the dataset --` |
||||

`str(people)` |
||||

`# The strings supplied for `education' have been treated (correctly) as` |
||||

`# a factor, but education.code is being treated as an integer and not as` |
||||

`# a factor.` |
||||

```
``` |
||||

```
``` |
||||

`# We want to do a dummy variable regression. Normally you would have:` |
||||

`# 1 Chosen college as the omitted category` |
||||

`# 2 Made a dummy for "none" named educationnone` |
||||

`# 3 Made a dummy for "school" named educationschool` |
||||

`# 4 Ran a regression like lm(age ~ educationnone + educationschool, people)` |
||||

`# But this is R. Things are cool:` |
||||

`lm(age ~ education, people)` |
||||

```
``` |
||||

`# ! :-)` |
||||

`# When you feed him an explanatory variable like education, he does all` |
||||

`# these steps automatically. (He chose college as the omitted category).` |
||||

```
``` |
||||

`# If you use an integer coding, then the obvious thing goes wrong --` |
||||

`lm(age ~ education.code, people)` |
||||

`# because he's thinking that education.code is an integer explanatory` |
||||

`# variable. So you need to:` |
||||

```
``` |
||||

`lm(age ~ factor(education.code), people)` |
||||

`# (he choose a different omitted category)` |
||||

```
``` |
||||

`# Alternatively, fix up the dataset --` |
||||

`people$education.code <- factor(people$education.code)` |
||||

`lm(age ~ education.code, people)` |
||||

```
``` |
||||

`#` |
||||

`# Bottom line:` |
||||

`# Once the dataset has categorical variables correctly represented as factors, i.e. as` |
||||

`str(people)` |
||||

`# doing OLS in R induces automatic generation of dummy variables while leaving one out:` |
||||

`lm(age ~ education, people)` |
||||

`lm(age ~ education.code, people)` |
||||

```
``` |
||||

`# But what if you want the X matrix?` |
||||

`m <- lm(age ~ education, people)` |
||||

`model.matrix(m)` |
||||

`# This is the design matrix that went into the regression m.` |

`@ -0,0 +1,38 @@` |
||||

`Fibonacci Sequence in R` |
||||

`# Program to diplay the Fibonacci` |
||||

`# sequence up to n-th term using` |
||||

`# recursive functions` |
||||

```
``` |
||||

`recurse_fibonacci <- function(n) {` |
||||

` if(n <= 1) {` |
||||

` return(n)` |
||||

` } else {` |
||||

` return(recurse_fibonacci(n-1) + recurse_fibonacci(n-2))` |
||||

` }` |
||||

`}` |
||||

```
``` |
||||

`# take input from the user` |
||||

`nterms = as.integer(readline(prompt="How many terms? "))` |
||||

```
``` |
||||

`# check if the number of terms is valid` |
||||

`if(nterms <= 0) {` |
||||

` print("Plese enter a positive integer")` |
||||

`} else {` |
||||

` print("Fibonacci sequence:")` |
||||

` for(i in 0:(nterms-1)) {` |
||||

` print(recurse_fibonacci(i))` |
||||

` }` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`How many terms? 9` |
||||

`[1] "Fibonacci sequence:"` |
||||

`[1] 0` |
||||

`[1] 1` |
||||

`[1] 1` |
||||

`[1] 2` |
||||

`[1] 3` |
||||

`[1] 5` |
||||

`[1] 8` |
||||

`[1] 13` |
||||

`[1] 21` |

`@ -0,0 +1,14 @@` |
||||

`Find Factorial of a number using recursion` |
||||

`recur_factorial <- function(n) {` |
||||

` if(n <= 1) {` |
||||

` return(1)` |
||||

` } else { ` |
||||

` return(n * recur_factorial(n-1))` |
||||

` }` |
||||

`}` |
||||

```
``` |
||||

```
``` |
||||

`Output` |
||||

```
``` |
||||

`> recur_factorial(5)` |
||||

`[1] 120` |

`@ -0,0 +1,33 @@` |
||||

`Find Minimum and Maximum` |
||||

`> x` |
||||

`[1] 5 8 3 9 2 7 4 6 10` |
||||

```
``` |
||||

`> # find the minimum` |
||||

`> min(x)` |
||||

`[1] 2` |
||||

```
``` |
||||

`> # find the maximum` |
||||

`> max(x)` |
||||

`[1] 10` |
||||

```
``` |
||||

`> # find the range` |
||||

`> range(x)` |
||||

`[1] 2 10` |
||||

`If we want to find where the minimum or maximum is located, i.e. the index instead of the actual value, then we can use which.min() and which.max() functions.` |
||||

```
``` |
||||

`Note that these functions will return the index of the first minimum or maximum in case multiple of them exists.` |
||||

```
``` |
||||

`> x` |
||||

`[1] 5 8 3 9 2 7 4 6 10` |
||||

```
``` |
||||

`> # find index of the minimum` |
||||

`> which.min(x)` |
||||

`[1] 5` |
||||

```
``` |
||||

`> # find index of the minimum` |
||||

`> which.max(x)` |
||||

`[1] 9` |
||||

```
``` |
||||

`> # alternate way to find the minimum` |
||||

`> x[which.min(x)]` |
||||

`[1] 2` |

`@ -0,0 +1,29 @@` |
||||

`Find factors of a number` |
||||

`print_factors <- function(x) {` |
||||

` print(paste("The factors of",x,"are:"))` |
||||

` for(i in 1:x) {` |
||||

` if((x %% i) == 0) {` |
||||

` print(i)` |
||||

` }` |
||||

` }` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`> print_factors(120)` |
||||

`[1] "The factors of 120 are:"` |
||||

`[1] 1` |
||||

`[1] 2` |
||||

`[1] 3` |
||||

`[1] 4` |
||||

`[1] 5` |
||||

`[1] 6` |
||||

`[1] 8` |
||||

`[1] 10` |
||||

`[1] 12` |
||||

`[1] 15` |
||||

`[1] 20` |
||||

`[1] 24` |
||||

`[1] 30` |
||||

`[1] 40` |
||||

`[1] 60` |
||||

`[1] 120` |

`@ -0,0 +1,21 @@` |
||||

`Find sum of natural numbers without formula` |
||||

`# take input from the user` |
||||

`num = as.integer(readline(prompt = "Enter a number: "))` |
||||

```
``` |
||||

`if(num < 0) {` |
||||

` print("Enter a positive number")` |
||||

`} else {` |
||||

` sum = 0` |
||||

```
``` |
||||

` # use while loop to iterate until zero` |
||||

` while(num > 0) {` |
||||

` sum = sum + num` |
||||

` num = num - 1` |
||||

` }` |
||||

```
``` |
||||

` print(paste("The sum is", sum))` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`Enter a number: 10` |
||||

`[1] "The sum is 55"` |

`@ -0,0 +1,20 @@` |
||||

`#Find the factorial of a number` |
||||

`# take input from the user` |
||||

`num = as.integer(readline(prompt="Enter a number: "))` |
||||

`factorial = 1` |
||||

```
``` |
||||

`# check is the number is negative, positive or zero` |
||||

`if(num < 0) {` |
||||

` print("Sorry, factorial does not exist for negative numbers")` |
||||

`} else if(num == 0) {` |
||||

` print("The factorial of 0 is 1")` |
||||

`} else {` |
||||

` for(i in 1:num) {` |
||||

` factorial = factorial * i` |
||||

` }` |
||||

` print(paste("The factorial of", num ,"is",factorial))` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`Enter a number: 8` |
||||

`[1] "The factorial of 8 is 40320"` |

`@ -0,0 +1,8 @@` |
||||

`> runif(1) # generates 1 random number` |
||||

`[1] 0.3984754` |
||||

```
``` |
||||

`> runif(3) # generates 3 random number` |
||||

`[1] 0.8090284 0.1797232 0.6803607` |
||||

```
``` |
||||

`> runif(3, min=5, max=10) # define the range between 5 and 10` |
||||

`[1] 7.099781 8.355461 5.173133` |

`@ -0,0 +1,22 @@` |
||||

`# Goal:` |
||||

`# A stock is traded on 2 exchanges.` |
||||

`# Price data is missing at random on both exchanges owing to non-trading.` |
||||

`# We want to make a single price time-series utilising information` |
||||

`# from both exchanges. I.e., missing data for exchange 1 will` |
||||

`# be replaced by information for exchange 2 (if observed).` |
||||

```
``` |
||||

`# Let's create some example data for the problem.` |
||||

`e1 <- runif(15) # Prices on exchange 1` |
||||

`e2 <- e1 + 0.05*rnorm(15) # Prices on exchange 2.` |
||||

`cbind(e1, e2)` |
||||

`# Blow away 5 points from each at random.` |
||||

`e1[sample(1:15, 5)] <- NA` |
||||

`e2[sample(1:15, 5)] <- NA` |
||||

`cbind(e1, e2)` |
||||

```
``` |
||||

`# Now how do we reconstruct a time-series that tries to utilise both?` |
||||

`combined <- e1 # Do use the more liquid exchange here.` |
||||

`missing <- is.na(combined)` |
||||

`combined[missing] <- e2[missing] # if it's also missing, I don't care.` |
||||

`cbind(e1, e2, combined)` |
||||

`# There you are.` |

`@ -0,0 +1,43 @@` |
||||

`# Goal: Joint distributions, marginal distributions, useful tables.` |
||||

```
``` |
||||

`# First let me invent some fake data` |
||||

`set.seed(102) # This yields a good illustration.` |
||||

`x <- sample(1:3, 15, replace=TRUE)` |
||||

`education <- factor(x, labels=c("None", "School", "College"))` |
||||

`x <- sample(1:2, 15, replace=TRUE)` |
||||

`gender <- factor(x, labels=c("Male", "Female"))` |
||||

`age <- runif(15, min=20,max=60)` |
||||

```
``` |
||||

`D <- data.frame(age, gender, education)` |
||||

`rm(x,age,gender,education)` |
||||

`print(D)` |
||||

```
``` |
||||

`# Table about education` |
||||

`table(D$education)` |
||||

```
``` |
||||

`# Table about education and gender --` |
||||

`table(D$gender, D$education)` |
||||

`# Joint distribution of education and gender --` |
||||

`table(D$gender, D$education)/nrow(D)` |
||||

```
``` |
||||

`# Add in the marginal distributions also` |
||||

`addmargins(table(D$gender, D$education))` |
||||

`addmargins(table(D$gender, D$education))/nrow(D)` |
||||

```
``` |
||||

`# Generate a good LaTeX table out of it --` |
||||

`library(xtable)` |
||||

`xtable(addmargins(table(D$gender, D$education))/nrow(D),` |
||||

` digits=c(0,2,2,2,2)) # You have to do | and \hline manually.` |
||||

```
``` |
||||

`# Study age by education category` |
||||

`by(D$age, D$gender, mean)` |
||||

`by(D$age, D$gender, sd)` |
||||

`by(D$age, D$gender, summary)` |
||||

```
``` |
||||

`# Two-way table showing average age depending on education & gender` |
||||

`a <- matrix(by(D$age, list(D$gender, D$education), mean), nrow=2)` |
||||

`rownames(a) <- levels(D$gender)` |
||||

`colnames(a) <- levels(D$education)` |
||||

`print(a)` |
||||

`# or, of course,` |
||||

`print(xtable(a))` |

`@ -0,0 +1,23 @@` |
||||

`# Goals: Simulate a dataset from a "fixed effects" model, and` |
||||

`# obtain "least squares dummy variable" (LSDV) estimates.` |
||||

`#` |
||||

`# We do this in the context of a familiar "earnings function" -` |
||||

`# log earnings is quadratic in log experience, with parallel shifts by` |
||||

`# education category.` |
||||

```
``` |
||||

`# Create an education factor with 4 levels --` |
||||

`education <- factor(sample(1:4,1000, replace=TRUE),` |
||||

` labels=c("none", "school", "college", "beyond"))` |
||||

`# Simulate an experience variable with a plausible range --` |
||||

`experience <- 30*runif(1000) # experience from 0 to 20 years` |
||||

`# Make the intercept vary by education category between 4 given values --` |
||||

`intercept <- c(0.5,1,1.5,2)[education]` |
||||

```
``` |
||||

`# Simulate the log earnings --` |
||||

`log.earnings <- intercept +` |
||||

` 2*experience - 0.05*experience*experience + rnorm(1000)` |
||||

`A <- data.frame(education, experience, e2=experience*experience, log.earnings)` |
||||

`summary(A)` |
||||

```
``` |
||||

`# The OLS path to LSDV --` |
||||

`summary(lm(log.earnings ~ -1 + education + experience + e2, A))` |

`@ -0,0 +1,31 @@` |
||||

`# Goal: Make a time-series object using the "zoo" package` |
||||

```
``` |
||||

`A <- data.frame(date=c("1995-01-01", "1995-01-02", "1995-01-03", "1995-01-06"),` |
||||

` x=runif(4),` |
||||

` y=runif(4))` |
||||

`A$date <- as.Date(A$date) # yyyy-mm-dd is the default format` |
||||

`# So far there's nothing new - it's just a data frame. I have hand-` |
||||

`# constructed A but you could equally have obtained it using read.table().` |
||||

```
``` |
||||

`# I want to make a zoo matrix out of the numerical columns of A` |
||||

`library(zoo)` |
||||

`B <- A` |
||||

`B$date <- NULL` |
||||

`z <- zoo(as.matrix(B), order.by=A$date)` |
||||

`rm(A, B)` |
||||

```
``` |
||||

`# So now you are holding "z", a "zoo" object. You can do many cool` |
||||

`# things with it.` |
||||

`# See http://www.google.com/search?hl=en&q=zoo+quickref+achim&btnI=I%27m+Feeling+Lucky` |
||||

```
``` |
||||

`# To drop down to a plain data matrix, say` |
||||

`C <- coredata(z)` |
||||

`rownames(C) <- as.character(time(z))` |
||||

`# Compare --` |
||||

`str(C)` |
||||

`str(z)` |
||||

```
``` |
||||

`# The above is a tedious way of doing these things, designed to give you` |
||||

`# an insight into what is going on. If you just want to read a file` |
||||

`# into a zoo object, a very short path is something like:` |
||||

`# z <- read.zoo(filename, format="%d %b %Y")` |

`@ -0,0 +1,20 @@` |
||||

`# Goal: Make pictures in PDF files that can be put into a paper.` |
||||

```
``` |
||||

`xpts <- seq(-3,3,.05)` |
||||

```
``` |
||||

`# Here is my suggested setup for a two-column picture --` |
||||

`pdf("demo2.pdf", width=5.6, height=2.8, bg="cadetblue1", pointsize=8)` |
||||

`par(mai=c(.6,.6,.2,.2))` |
||||

`plot(xpts, sin(xpts*xpts), type="l", lwd=2, col="cadetblue4",` |
||||

` xlab="x", ylab="sin(x*x)")` |
||||

`grid(col="white", lty=1, lwd=.2)` |
||||

`abline(h=0, v=0)` |
||||

```
``` |
||||

`# My suggested setup for a square one-column picture --` |
||||

`pdf("demo1.pdf", width=2.8, height=2.8, bg="cadetblue1", pointsize=8)` |
||||

`par(mai=c(.6,.6,.2,.2))` |
||||

`plot(xpts, sin(xpts*xpts), type="l", lwd=2, col="cadetblue4",` |
||||

` xlab="x", ylab="sin(x*x)")` |
||||

`grid(col="white", lty=1, lwd=.2)` |
||||

`abline(h=0, v=0)` |
||||

```
``` |

`@ -0,0 +1,25 @@` |
||||

`Multiplication Table` |
||||

`# Program to find the multiplication` |
||||

`# table (from 1 to 10)` |
||||

`# of a number input by the user` |
||||

```
``` |
||||

`# take input from the user` |
||||

`num = as.integer(readline(prompt = "Enter a number: "))` |
||||

```
``` |
||||

`# use for loop to iterate 10 times` |
||||

`for(i in 1:10) {` |
||||

` print(paste(num,'x', i, '=', num*i))` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`Enter a number: 7` |
||||

`[1] "7 x 1 = 7"` |
||||

`[1] "7 x 2 = 14"` |
||||

`[1] "7 x 3 = 21"` |
||||

`[1] "7 x 4 = 28"` |
||||

`[1] "7 x 5 = 35"` |
||||

`[1] "7 x 6 = 42"` |
||||

`[1] "7 x 7 = 49"` |
||||

`[1] "7 x 8 = 56"` |
||||

`[1] "7 x 9 = 63"` |
||||

`[1] "7 x 10 = 70"` |

`@ -0,0 +1,13 @@` |
||||

`# Goal: Display two series on one plot, one with a left y axis` |
||||

`# and another with a right y axis.` |
||||

```
``` |
||||

`y1 <- cumsum(rnorm(100))` |
||||

`y2 <- cumsum(rnorm(100, mean=0.2))` |
||||

```
``` |
||||

`par(mai=c(.8, .8, .2, .8))` |
||||

`plot(1:100, y1, type="l", col="blue", xlab="X axis label", ylab="Left legend")` |
||||

`par(new=TRUE)` |
||||

`plot(1:100, y2, type="l", ann=FALSE, yaxt="n")` |
||||

`axis(4)` |
||||

`legend(x="topleft", bty="n", lty=c(1,1), col=c("blue","black"),` |
||||

` legend=c("String 1 (left scale)", "String 2 (right scale)"))` |

`@ -0,0 +1,99 @@` |
||||

`# Goal: Prices and returns` |
||||

```
``` |
||||

`# I like to multiply returns by 100 so as to have "units in percent".` |
||||

`# In other words, I like it for 5% to be a value like 5 rather than 0.05.` |
||||

```
``` |
||||

`###################################################################` |
||||

`# I. Simulate random-walk prices, switch between prices & returns.` |
||||

`###################################################################` |
||||

`# Simulate a time-series of PRICES drawn from a random walk` |
||||

`# where one-period returns are i.i.d. N(mu, sigma^2).` |
||||

`ranrw <- function(mu, sigma, p0=100, T=100) {` |
||||

` cumprod(c(p0, 1 + (rnorm(n=T, mean=mu, sd=sigma)/100)))` |
||||

`}` |
||||

`prices2returns <- function(x) {` |
||||

` 100*diff(log(x))` |
||||

`}` |
||||

`returns2prices <- function(r, p0=100) {` |
||||

` c(p0, p0 * exp(cumsum(r/100)))` |
||||

`}` |
||||

```
``` |
||||

`cat("Simulate 25 points from a random walk starting at 1500 --\n")` |
||||

`p <- ranrw(0.05, 1.4, p0=1500, T=25)` |
||||

` # gives you a 25-long series, starting with a price of 1500, where` |
||||

` # one-period returns are N(0.05,1.4^2) percent.` |
||||

`print(p)` |
||||

```
``` |
||||

`cat("Convert to returns--\n")` |
||||

`r <- prices2returns(p)` |
||||

`print(r)` |
||||

```
``` |
||||

`cat("Go back from returns to prices --\n")` |
||||

`goback <- returns2prices(r, 1500)` |
||||

`print(goback)` |
||||

```
``` |
||||

`###################################################################` |
||||

`# II. Plenty of powerful things you can do with returns....` |
||||

`###################################################################` |
||||

`summary(r); sd(r) # summary statistics` |
||||

`plot(density(r)) # kernel density plot` |
||||

`acf(r) # Autocorrelation function` |
||||

`ar(r) # Estimate a AIC-minimising AR model` |
||||

`Box.test(r, lag=2, type="Ljung") # Box-Ljung test` |
||||

`library(tseries)` |
||||

`runs.test(factor(sign(r))) # Runs test` |
||||

`bds.test(r) # BDS test.` |
||||

```
``` |
||||

`###################################################################` |
||||

`# III. Visualisation and the random walk` |
||||

`###################################################################` |
||||

`# I want to obtain intuition into what kinds of price series can happen,` |
||||

`# given a starting price, a mean return, and a given standard deviation.` |
||||

`# This function simulates out 10000 days of a price time-series at a time,` |
||||

`# and waits for you to click in the graph window, after which a second` |
||||

`# series is painted, and so on. Make the graph window very big and` |
||||

`# sit back and admire.` |
||||

`# The point is to eyeball many series and thus obtain some intuition` |
||||

`# into what the random walk does.` |
||||

`visualisation <- function(p0, s, mu, labelstring) {` |
||||

` N <- 10000` |
||||

` x <- (1:(N+1))/250 # Unit of years` |
||||

` while (1) {` |
||||

` plot(x, ranrw(mu, s, p0, N), ylab="Level", log="y",` |
||||

` type="l", col="red", xlab="Time (years)",` |
||||

` main=paste("40 years of a process much like", labelstring))` |
||||

` grid()` |
||||

` z=locator(1)` |
||||

` }` |
||||

`}` |
||||

```
``` |
||||

`# Nifty -- assuming sigma of 1.4% a day and E(returns) of 13% a year` |
||||

`visualisation(2600, 1.4, 13/250, "Nifty")` |
||||

```
``` |
||||

`# The numerical values here are used to think about what the INR/USD` |
||||

`# exchange rate would have looked like if it started from 31.37, had` |
||||

`# a mean depreciation of 5% per year, and had the daily vol of a floating` |
||||

`# exchange rate like EUR/USD.` |
||||

`visualisation(31.37, 0.7, 5/365, "INR/USD (NOT!) with daily sigma=0.7")` |
||||

`# This is of course not like the INR/USD series in the real world -` |
||||

`# which is neither a random walk nor does it have a vol of 0.7% a day.` |
||||

```
``` |
||||

`# The numerical values here are used to think about what the USD/EUR` |
||||

`# exchange rate, starting with 1, having no drift, and having the observed` |
||||

`# daily vol of 0.7. (This is about right).` |
||||

`visualisation(1, 0.7, 0, "USD/EUR with no drift")` |
||||

```
``` |
||||

`###################################################################` |
||||

`# IV. A monte carlo experiment about the runs test` |
||||

`###################################################################` |
||||

`# Measure the effectiveness of the runs test when faced with an` |
||||

`# AR(1) process of length 100 with a coeff of 0.1` |
||||

`set.seed(101)` |
||||

`one.ts <- function() {arima.sim(list(order = c(1,0,0), ar = 0.1), n=100)}` |
||||

`table(replicate(1000, runs.test(factor(sign(one.ts())))$p.value < 0.05))` |
||||

`# We find that the runs test throws up a prob value of below 0.05` |
||||

`# for 91 out of 1000 experiments.` |
||||

`# Wow! :-)` |
||||

`# To understand this, you need to look up the man pages of:` |
||||

`# set.seed, arima.sim, sign, factor, runs.test, replicate, table.` |
||||

`# e.g. say ?replicate` |

`@ -0,0 +1,41 @@` |
||||

`Print Fibonacci Sequence` |
||||

`# take input from the user` |
||||

`nterms = as.integer(readline(prompt="How many terms? "))` |
||||

```
``` |
||||

`# first two terms` |
||||

`n1 = 0` |
||||

`n2 = 1` |
||||

`count = 2` |
||||

```
``` |
||||

`# check if the number of terms is valid` |
||||

`if(nterms <= 0) {` |
||||

` print("Plese enter a positive integer")` |
||||

`} else {` |
||||

` if(nterms == 1) {` |
||||

` print("Fibonacci sequence:")` |
||||

` print(n1)` |
||||

` } else {` |
||||

` print("Fibonacci sequence:")` |
||||

` print(n1)` |
||||

` print(n2)` |
||||

` while(count < nterms) {` |
||||

` nth = n1 + n2` |
||||

` print(nth)` |
||||

` # update values` |
||||

` n1 = n2` |
||||

` n2 = nth` |
||||

` count = count + 1` |
||||

` }` |
||||

` }` |
||||

`}` |
||||

`Output` |
||||

```
``` |
||||

`How many terms? 7` |
||||

`[1] "Fibonacci sequence:"` |
||||

`[1] 0` |
||||

`[1] 1` |
||||

`[1] 1` |
||||

`[1] 2` |
||||

`[1] 3` |
||||

`[1] 5` |
||||

`[1] 8 ` |

`@ -0,0 +1,30 @@` |
||||

`Program to Find GCD` |
||||

`# Program to find the` |
||||

`# H.C.F of two input number` |
||||

```
``` |
||||

`# define a function` |
||||

`hcf <- function(x, y) {` |
||||

` # choose the smaller number` |
||||

` if(x > y) {` |
||||

` smaller = y` |
||||

` } else {` |
||||

` smaller = x` |
||||

` }` |
||||

` for(i in 1:smaller) {` |
||||

` if((x %% i == 0) && (y %% i == 0)) {` |
||||

` hcf = i` |
||||

` }` |
||||

` }` |
||||

` return(hcf)` |
||||

`}` |
||||

```
``` |
||||

`# take input from the user` |
||||

`num1 = as.integer(readline(prompt = "Enter first number: "))` |
||||

`num2 = as.integer(readline(prompt = "Enter second number: "))` |
||||

```
``` |
||||

`print(paste("The H.C.F. of", num1,"and", num2,"is", hcf(num1, num2)))` |
||||

`Output` |
||||

```
``` |
||||

`Enter first number: 72` |
||||

`Enter second number: 120` |
||||

`[1] "The H.C.F. of 72 and 120 is 24"` |

`@ -0,0 +1,52 @@` |
||||

`# Get the data in place --` |
||||

`load(file="demo.rda")` |
||||

`summary(firms)` |
||||

```
``` |
||||

`# Look at it --` |
||||

`plot(density(log(firms$mktcap)))` |
||||

`plot(firms$mktcap, firms$spread, type="p", cex=.2, col="blue", log="xy",` |
||||

` xlab="Market cap (Mln USD)", ylab="Bid/offer spread (bps)")` |
||||

`m=lm(log(spread) ~ log(mktcap), firms)` |
||||

`summary(m)` |
||||

```
``` |
||||

`# Making deciles --` |
||||

`library(gtools)` |
||||

`library(gdata)` |
||||

` # for deciles (default=quartiles)` |
||||

`size.category = quantcut(firms$mktcap, q=seq(0, 1, 0.1), labels=F)` |
||||

`table(size.category)` |
||||

`means = aggregate(firms, list(size.category), mean)` |
||||

`print(data.frame(means$mktcap,means$spread))` |
||||

```
``` |
||||

`# Make a picture combining the sample mean of spread (in each decile)` |
||||

`# with the weighted average sample mean of the spread (in each decile),` |
||||

`# where weights are proportional to size.` |
||||

`wtd.means = by(firms, size.category,` |
||||

` function(piece) (sum(piece$mktcap*piece$spread)/sum(piece$mktcap)))` |
||||

`lines(means$mktcap, means$spread, type="b", lwd=2, col="green", pch=19)` |
||||

`lines(means$mktcap, wtd.means, type="b", lwd=2, col="red", pch=19)` |
||||

`legend(x=0.25, y=0.5, bty="n",` |
||||

` col=c("blue", "green", "red"),` |
||||

` lty=c(0, 1, 1), lwd=c(0,2,2),` |
||||

` pch=c(0,19,19),` |
||||

` legend=c("firm", "Mean spread in size deciles",` |
||||

` "Size weighted mean spread in size deciles"))` |
||||

```
``` |
||||

`# Within group standard deviations --` |
||||

`aggregate(firms, list(size.category), sd)` |
||||

```
``` |
||||

`# Now I do quartiles by BOTH mktcap and spread.` |
||||

`size.quartiles = quantcut(firms$mktcap, labels=F)` |
||||

`spread.quartiles = quantcut(firms$spread, labels=F)` |
||||

`table(size.quartiles, spread.quartiles)` |
||||

`# Re-express everything as joint probabilities` |
||||

`table(size.quartiles, spread.quartiles)/nrow(firms)` |
||||

`# Compute cell means at every point in the joint table:` |
||||

`aggregate(firms, list(size.quartiles, spread.quartiles), mean)` |
||||

```
``` |
||||

`# Make pretty two-way tables` |
||||

`aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, nobs)` |
||||

`aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, mean)` |
||||

`aggregate.table(firms$mktcap, size.quartiles, spread.quartiles, sd)` |
||||

`aggregate.table(firms$spread, size.quartiles, spread.quartiles, mean)` |
||||

`aggregate.table(firms$spread, size.quartiles, spread.quartiles, sd)` |

`@ -0,0 +1,11 @@` |
||||

`> # We can use the print() function` |
||||

`> print("Hello World!")` |
||||

`[1] "Hello World!"` |
||||

```
``` |
||||

`> # Quotes can be suppressed in the output` |
||||

`> print("Hello World!", quote = FALSE)` |
||||

`[1] Hello World!` |
||||

```
``` |
||||

`> # If there are more than 1 item, we can concatenate using paste()` |
||||

`> print(paste("How","are","you?"))` |
||||

`[1] "How are you?"` |

`@ -0,0 +1,12 @@` |
||||

`# Goal: R syntax where model specification is an argument to a function.` |
||||

```
``` |
||||

`# Invent a dataset` |
||||

`x <- runif(100); y <- runif(100); z <- 2 + 3*x + 4*y + rnorm(100)` |
||||

`D <- data.frame(x=x, y=y, z=z)` |
||||

```
``` |
||||

`amodel <- function(modelstring) {` |
||||

` summary(lm(modelstring, D))` |
||||

`}` |
||||

```
``` |
||||

`amodel(z ~ x)` |
||||

`amodel(z ~ y)` |

`@ -0,0 +1,3 @@` |
||||

`# R programming language` |
||||

```
``` |
||||

`R is a programming language designed for statistical computing and graphics purposes. Contains code that can be executed within the R software environment.` |

`@ -0,0 +1,43 @@` |
||||

`# Goal: Reading and writing ascii files, reading and writing binary files.` |
||||

`# And, to measure how much faster it is working with binary files.` |
||||

```
``` |
||||

`# First manufacture a tall data frame:` |
||||

` # FYI -- runif(10) yields 10 U(0,1) random numbers.` |
||||

`B = data.frame(x1=runif(100000), x2=runif(100000), x3=runif(100000))` |
||||

`summary(B)` |
||||

```
``` |
||||

`# Write out ascii file:` |
||||

`write.table(B, file = "/tmp/foo.csv", sep = ",", col.names = NA)` |
||||

`# Read in this resulting ascii file:` |
||||

`C=read.table("/tmp/foo.csv", header = TRUE, sep = ",", row.names=1)` |
||||

`# Write a binary file out of dataset C:` |
||||

`save(C, file="/tmp/foo.binary")` |
||||

`# Delete the dataset C:` |
||||

`rm(C)` |
||||

`# Restore from foo.binary:` |
||||

`load("/tmp/foo.binary")` |
||||

`summary(C) # should yield the same results` |
||||

` # as summary(B) above.` |
||||

```
``` |
||||

```
``` |
||||

`# Now we time all these operations --` |
||||

`cat("Time creation of dataset:\n")` |
||||

`system.time({` |
||||

` B = data.frame(x1=runif(100000), x2=runif(100000), x3=runif(100000))` |
||||

`})` |
||||

```
``` |
||||

`cat("Time writing an ascii file out of dataset B:\n")` |
||||

`system.time(` |
||||

` write.table(B, file = "/tmp/foo.csv", sep = ",", col.names = NA)` |
||||

` )` |
||||

```
``` |
||||

`cat("Time reading an ascii file into dataset C:\n")` |
||||

`system.time(` |
||||

` {C=read.table("/tmp/foo.csv", header = TRUE, sep=",", row.names=1)` |
||||

` })` |
||||

```
``` |
||||

`cat("Time writing a binary file out of dataset C:\n")` |
||||

`system.time(save(C, file="/tmp/foo.binary"))` |
||||

```
``` |
||||

`cat("Time reading a binary file + variablenames from /tmp/foo.binary:\n")` |
||||

`system.time(load("/tmp/foo.binary")) # and then read it in from binary file` |

`@ -0,0 +1,15 @@` |
||||

`# Goal: Reading in a Microsoft .xls file directly` |
||||

```
``` |
||||

`library(gdata)` |
||||

`a <- read.xls("file.xls", sheet=2) # This reads in the 2nd sheet` |
||||

```
``` |
||||

`# Look at what the cat dragged in` |
||||

`str(a)` |
||||

```
``` |
||||

`# If you have a date column, you'll want to fix it up like this:` |
||||

`a$date <- as.Date(as.character(a$X), format="%d-%b-%y")` |
||||

`a$X <- NULL` |
||||

```
``` |
||||

```
``` |
||||

`# Also see http://tolstoy.newcastle.edu.au/R/help/06/04/25674.html for` |
||||

`# another path.` |

`@ -0,0 +1,17 @@` |
||||

`# Goal: To read in files produced by CMIE's "Business Beacon".` |
||||

`# This assumes you have made a file of MONTHLY data using CMIE's` |
||||

`# Business Beacon program. This contains 2 columns: M3 and M0.` |
||||

```
``` |
||||

`A <- read.table(` |
||||

` # Generic to all BB files --` |
||||

` sep="|", # CMIE's .txt file is pipe delimited` |
||||

` skip=3, # Skip the 1st 3 lines` |
||||

` na.strings=c("N.A.","Err"), # The ways they encode missing data` |
||||

` # Specific to your immediate situation --` |
||||

` file="bb_data.text",` |
||||

` col.names=c("junk", "date", "M3", "M0")` |
||||

` )` |
||||

`A$junk <- NULL # Blow away this column` |
||||

```
``` |
||||

`# Parse the CMIE-style "Mmm yy" date string that's used on monthly data` |
||||

`A$date <- as.Date(paste("1", as.character(A$date)), format="%d %b %Y")` |

`@ -0,0 +1,16 @@` |
||||

`> # sample with replacement` |
||||

`> sample(x, replace = TRUE)` |
||||

`[1] 15 17 13 9 5 15 11 15 1` |
||||

```
``` |
||||

`> # if we simply pass in a positive number n, it will sample` |
||||

`> # from 1:n without replacement` |
||||

`> sample(10)` |
||||

` [1] 2 4 7 9 1 3 10 5 8 6` |
||||

```
``` |
||||

```
``` |
||||

```
``` |
||||

```
``` |
||||

` An example to simulate a coin toss for 10 times.` |
||||

```
``` |
||||

`> sample(c("H","T"),10, replace = TRUE)` |
||||

` [1] "H" "H" "H" "T" "H" "T" "H" "H" "H" "T"` |

`@ -0,0 +1,40 @@` |
||||

`# Goals: Scare the hell out of children with the Cauchy distribution.` |
||||

```
``` |
||||

`# A function which simulates N draws from one of two distributions,` |
||||

`# and returns the mean obtained thusly.` |
||||

`one.simulation <- function(N=100, distribution="normal") {` |
||||

` if (distribution == "normal") {` |
||||

` x <- rnorm(N)` |
||||

` } else {` |
||||

` x <- rcauchy(N)` |
||||

` }` |
||||

` mean(x)` |
||||

`}` |
||||

```
``` |
||||

`k1 <- density(replicate(1000, one.simulation(20)))` |
||||

`k2 <- density(replicate(1000, one.simulation(20, distribution="cauchy")))` |
||||

```
``` |
||||

`xrange <- range(k1$x, k2$x)` |
||||

`plot(k1$x, k1$y, xlim=xrange, type="l", xlab="Estimated value", ylab="")` |
||||

`grid()` |
||||

`lines(k2$x, k2$y, col="red")` |
||||

`abline(v=.5)` |
||||

`legend(x="topleft", bty="n",` |
||||

` lty=c(1,1),` |
||||

` col=c("black", "red"),` |
||||

` legend=c("Mean of Normal", "Mean of Cauchy"))` |
||||

`# The distribution of the mean of normals collapses into a point;` |
||||

`# that of the cauchy does not.` |
||||