####################################################################### ####################################################################### ###### Analysis of NW Pacific tropical storm record, 1959-2000. ###### ###### ###### ###### This script illustrates the following features of R: ###### ###### 1) Data input from an ASCII file. ###### ###### 2) Merging two datasets. ###### ###### 3) Graphical capabilities including generation of JPEG ###### ###### and PostScript files. ###### ###### 4) User-defined functions. ###### ###### 5) Generalised Linear Modelling capabilities. ###### ####################################################################### ####################################################################### # First: read storm data from file, interpreting first line of # line as a list of variable names ####################################################################### storm.data <- read.table(file="nstorms.dat",header=T) attach(storm.data) ####################################################################### # Plot the data, and make the plot into a JPEG image and a # PostScript file. The "text.width" argument to legend() is # used to make the legend appear correctly in the JPEG image. ####################################################################### par(ask=T) # Pause between plots ylim <- c(0,max(Storms)) plot(Year,Storms,type="l",lwd=2,col=2,xlab="Year", ylab="Number of events",ylim=ylim) lines(Year,Typhoons,lwd=2,lty=2,col=3) lines(Year,Intense,lwd=2,lty=3,col=4) legend(1958,10,lwd=c(2,2,2),lty=c(1,2,3),col=c(2,3,4), legend=c("Tropical Storms","Typhoons","Intense typhoons"), text.width=10) title("Tropical storm numbers in North-West Pacific, 1959-2000") dev.copy(device=jpeg,file="nstorms.jpg",quality=90) dev.off() dev.copy(device=postscript,file="nstorms.ps",paper="a4") dev.off() ####################################################################### # Now let's look at the distributions of cyclone numbers, using # histograms. The par(mfrow=c(3,1) puts 3 plots on a page. Since # we'll be repeating essentially the same procedure 3 times, we # have written our own R function (custom.hist) to do this. The # argument statloc() is the LOCation for summary STATistics on # the plot. ####################################################################### custom.hist <- function(x,breaks,colour,name,statloc) { # # Customized histograms, including summary statistics # hist(x,breaks=breaks,freq=F,col=colour, main=paste("Distribution of",name,"numbers"), xlab = "Number of events",ylab="Relative frequency") text(statloc[1],statloc[2], paste("Mean =",round(mean(x,na.rm=T),2), "\n\nVariance =",round(var(x,na.rm=T),2)), cex=1.2,adj=0) } par(mfrow=c(3,1)) custom.hist(Storms,seq(0,40,2),2,"storm",c(5,0.05)) custom.hist(Typhoons,seq(0,40,2),3,"typhoon",c(30,0.05)) custom.hist(Intense,seq(0,40,2),4,"intense typhoon",c(20,0.05)) dev.copy(device=postscript,file="histogram.ps",paper="a4",horizontal=F) dev.off() ####################################################################### # OK, now let's have a look at monthly Nino3 data from the # previous year, and see if we can find relationships with # storm numbers. First read the Nino3 data and merge it with # the storm data. When R merges the frames, it notices that they # both have a column called 'Year', so it matches cases accordingly. # By adding 1 to the years in the Nino3 frame before merging, # we match the storm data to the Nino3 data in the *previous* year. # Having made changes to the storm.data frame, we need to re-attach # it, to force R to re-read the variable definitions. ####################################################################### nino3.data <- read.table("nino3.dat",header=T) nino3.data$Year <- nino3.data$Year + 1 storm.data <- merge(storm.data,nino3.data) detach(storm.data) attach(storm.data) ####################################################################### # Now let's plot some scatterplots showing relationship of # predictors to tropical storms ####################################################################### monthlabs <- c("January","February","March","April","May","June", "July","August","September","October","November","December") par(mfrow=c(4,3)) for (i in 1:12) { plot(storm.data[,4+i],Typhoons,col=3,pch=15,xlab="Nino3 (degrees C)", ylab="Number of typhoons") title(paste(monthlabs[i]," Nino3\n\nCorrelation = ", round(cor(storm.data[,4+i],Typhoons),3)," (p = ", round(cor.test(storm.data[,4+i],Typhoons)$p.value,3), ")",sep="")) } dev.copy(device=jpeg,file="typh_vs_nino3.jpg",quality=90,width=700,height=700) dev.off() dev.copy(device=postscript,file="typh_vs_nino3.ps",paper="a4",horizontal=F) dev.off() ####################################################################### # Now let's fit some Poisson Generalized Linear Models. An # explanation of what we're doing may be found in the lecture # notes. ####################################################################### # #sink("tc_anal.res") # Uncomment this line to send output to a file storm.model1 <- glm(Typhoons ~ N3.m09,family=poisson(link="log")) cat("RESULTS FOR MODEL 1:\n\n") print(summary(storm.model1)) print(anova(storm.model1,test="Chi")) cat("\n\nRESULTS FOR MODEL 2:\n\n") storm.model2 <- glm(Typhoons ~ N3.m09 + N3.m08,family=poisson(link="log")) print(summary(storm.model2)) print(anova(storm.model2,test="Chi")) cat("\n\nRESULTS FOR MODEL 3:\n\n") storm.model3 <- glm(Typhoons ~ N3.m08 + N3.m09 + N3.m10 + N3.m11 + N3.m12, family=poisson(link="log")) print(summary(storm.model3)) cat("\n\nCOMPARISON OF MODEL 1 AND MODEL 3:\n\n") print(anova(storm.model1,storm.model3,test="Chi")) ####################################################################### # Finally, some residual checks for model 1. NB R calculates # Pearson residuals using the variance from the *penultimate* # iteration in model fitting - this leads to minor discrepancies # compared with a manual calculation. This is something to watch # out for in general with GLMs in R and Splus. ####################################################################### par(mfrow=c(2,2)) plot(storm.model1) dev.copy(device=postscript,file="mdl1_check.ps",paper="a4") dev.off() pearson.model1 <- resid(storm.model1,type="pearson") par(mfrow=c(1,1)) plot(Year,pearson.model1,lwd=2,type="l",xlab="Year",ylab="Residual") abline(0,0) title("Pearson residuals from typhoon prediction model, by year") dev.copy(device=postscript,file="mdl1_prts.ps",paper="a4") dev.off() cat("\n\nPEARSON RESIDUALS FROM MODEL 1:\n\n") meanres <- mean(pearson.model1) varres <- sum((pearson.model1-mean(pearson.model1))^2) / # Can't use storm.model1$df.residual # var() here as # the degrees # of freedom # would be # incorrect cat("Mean Pearson residual: ",round(meanres,3), "\nVariance of Pearson residuals: ",round(varres,3),"\n") MSE <- sum(resid(storm.model1,type="response")^2)/storm.model1$df.residual Rsquared <- 1 - (MSE/var(storm.model1$y)) cat("Adjusted R squared for this model: ",round(Rsquared,3),"\n\n") ####################################################################### # Here are some observed and expected frequencies. We need to be # a bit clever here, to get the observed frequencies ... ####################################################################### obsvexp <- list(Nstorms="numeric",Observed="numeric", Expected="numeric") obsvexp$Nstorms <- 0:30 obsvexp$Observed <- c(tabulate(Typhoons+1,nbins=31)) obsvexp$Expected <- rep(0,length(obsvexp$Nstorms)) mu <- fitted(storm.model1) for (i in 1:length(obsvexp$Expected)) { obsvexp$Expected[i] <- sum(dpois(i,mu)) } obsvexp.table <- rbind(obsvexp$Observed,obsvexp$Expected) par(mfrow=c(1,1)) barplot(obsvexp.table,beside=T,names=obsvexp$Nstorms, xlab="Number of events in year",ylab="Number of years") legend(0,4,fill=heat.colors(2),cex=c(1.2,1.2), legend=c("Observed","Expected"),text.width=15) title(paste("Observed distribution of annual typhoon numbers", "\ncompared with that expected under Poisson GLM")) dev.copy(device=postscript,file="typh_obsvexpfreq.ps",paper="a4", horizontal=F) dev.off() #sink() # Uncomment this line to return output to screen