#######################################################################
#######################################################################
######	Analysis of NW Pacific tropical storm record, 1959-2000. ######
######	                                                         ######
######	This script illustrates the following features of R:     ######
######	1) Data input from an ASCII file.                        ######
######	2) Merging two datasets.                                 ######
######	3) Graphical capabilities including generation of JPEG   ######
######	   and PostScript files.                                 ######
######	4) User-defined functions.                               ######
######	5) Generalised Linear Modelling capabilities.            ######
#######################################################################
#######################################################################
#	First: read storm data from file, interpreting first line of
#	line as a list of variable names
#######################################################################

storm.data <- read.table(file="nstorms.dat",header=T)
attach(storm.data)

#######################################################################
#	Plot the data, and make the plot into a JPEG image and a
#	PostScript file. The "text.width" argument to legend() is
#	used to make the legend appear correctly in the JPEG image.
#######################################################################

par(ask=T)						# Pause between plots

ylim <- c(0,max(Storms))
plot(Year,Storms,type="l",lwd=2,col=2,xlab="Year",
			ylab="Number of events",ylim=ylim)
lines(Year,Typhoons,lwd=2,lty=2,col=3)
lines(Year,Intense,lwd=2,lty=3,col=4)
legend(1958,10,lwd=c(2,2,2),lty=c(1,2,3),col=c(2,3,4),
	legend=c("Tropical Storms","Typhoons","Intense typhoons"),
	text.width=10)
title("Tropical storm numbers in North-West Pacific, 1959-2000")
dev.copy(device=jpeg,file="nstorms.jpg",quality=90)
dev.off()
dev.copy(device=postscript,file="nstorms.ps",paper="a4")
dev.off()

#######################################################################
#	Now let's look at the distributions of cyclone numbers, using
#	histograms. The par(mfrow=c(3,1) puts 3 plots on a page. Since
#	we'll be repeating essentially the same procedure 3 times, we 
#	have written our own R function (custom.hist) to do this. The
#	argument statloc() is the LOCation for summary STATistics on
#	the plot.
#######################################################################

custom.hist <- function(x,breaks,colour,name,statloc) {
#
# Customized histograms, including summary statistics
#
	hist(x,breaks=breaks,freq=F,col=colour,
		main=paste("Distribution of",name,"numbers"),
		xlab = "Number of events",ylab="Relative frequency")
	text(statloc[1],statloc[2],
		paste("Mean =",round(mean(x,na.rm=T),2),
			"\n\nVariance =",round(var(x,na.rm=T),2)),
		cex=1.2,adj=0)
}

par(mfrow=c(3,1))
custom.hist(Storms,seq(0,40,2),2,"storm",c(5,0.05))
custom.hist(Typhoons,seq(0,40,2),3,"typhoon",c(30,0.05))
custom.hist(Intense,seq(0,40,2),4,"intense typhoon",c(20,0.05))
dev.copy(device=postscript,file="histogram.ps",paper="a4",horizontal=F)
dev.off()

#######################################################################
#	OK, now let's have a look at monthly Nino3 data from the 
#	previous year, and see if we can find relationships with
#	storm numbers. First read the Nino3 data and merge it with
#	the storm data. When R merges the frames, it notices that they 
#	both have a column called 'Year', so it matches cases accordingly. 
#	By adding 1 to the years in the Nino3 frame before merging,
# 	we match the storm data to the Nino3 data in the *previous* year.
#	Having made changes to the storm.data frame, we need to re-attach
#	it, to force R to re-read the variable definitions.
#######################################################################

nino3.data <- read.table("nino3.dat",header=T)
nino3.data$Year <- nino3.data$Year + 1
storm.data <- merge(storm.data,nino3.data)
detach(storm.data)
attach(storm.data)

#######################################################################
#	Now let's plot some scatterplots showing relationship of
#	predictors to tropical storms
#######################################################################

monthlabs <- c("January","February","March","April","May","June",
	"July","August","September","October","November","December")

par(mfrow=c(4,3))
for (i in 1:12) {
	plot(storm.data[,4+i],Typhoons,col=3,pch=15,xlab="Nino3 (degrees C)",
	ylab="Number of typhoons")
	title(paste(monthlabs[i]," Nino3\n\nCorrelation = ",
		round(cor(storm.data[,4+i],Typhoons),3)," (p = ",
		round(cor.test(storm.data[,4+i],Typhoons)$p.value,3),
		")",sep=""))
}
dev.copy(device=jpeg,file="typh_vs_nino3.jpg",quality=90,width=700,height=700)
dev.off()
dev.copy(device=postscript,file="typh_vs_nino3.ps",paper="a4",horizontal=F)
dev.off()

#######################################################################
#	Now let's fit some Poisson Generalized Linear Models. An 
#	explanation of what we're doing may be found in the lecture 
#	notes.
#######################################################################
#
#sink("tc_anal.res")	# Uncomment this line to send output to a file

storm.model1 <- glm(Typhoons ~ N3.m09,family=poisson(link="log"))
cat("RESULTS FOR MODEL 1:\n\n")
print(summary(storm.model1))
print(anova(storm.model1,test="Chi"))
cat("\n\nRESULTS FOR MODEL 2:\n\n")
storm.model2 <- glm(Typhoons ~ N3.m09 + N3.m08,family=poisson(link="log"))
print(summary(storm.model2))
print(anova(storm.model2,test="Chi"))
cat("\n\nRESULTS FOR MODEL 3:\n\n")
storm.model3 <- glm(Typhoons ~ N3.m08 + N3.m09 + N3.m10 + N3.m11 + N3.m12,
						family=poisson(link="log"))
print(summary(storm.model3))
cat("\n\nCOMPARISON OF MODEL 1 AND MODEL 3:\n\n")
print(anova(storm.model1,storm.model3,test="Chi"))

#######################################################################
#	Finally, some residual checks for model 1. NB R calculates
#	Pearson residuals using the variance from the *penultimate*
#	iteration in model fitting - this leads to minor discrepancies
#	compared with a manual calculation. This is something to watch
#	out for in general with GLMs in R and Splus.
#######################################################################

par(mfrow=c(2,2))
plot(storm.model1)
dev.copy(device=postscript,file="mdl1_check.ps",paper="a4")
dev.off()
pearson.model1 <- resid(storm.model1,type="pearson")
par(mfrow=c(1,1))
plot(Year,pearson.model1,lwd=2,type="l",xlab="Year",ylab="Residual")
abline(0,0)
title("Pearson residuals from typhoon prediction model, by year")

dev.copy(device=postscript,file="mdl1_prts.ps",paper="a4")
dev.off()
cat("\n\nPEARSON RESIDUALS FROM MODEL 1:\n\n")
meanres <- mean(pearson.model1)
varres <- sum((pearson.model1-mean(pearson.model1))^2) /	# Can't use 
				storm.model1$df.residual	# var() here as
								# the degrees
								# of freedom
								# would be 
								# incorrect
cat("Mean Pearson residual: ",round(meanres,3),
	"\nVariance of Pearson residuals: ",round(varres,3),"\n")
MSE <- sum(resid(storm.model1,type="response")^2)/storm.model1$df.residual
Rsquared <- 1 - (MSE/var(storm.model1$y))
cat("Adjusted R squared for this model: ",round(Rsquared,3),"\n\n")

#######################################################################
# 	Here are some observed and expected frequencies. We need to be  
# 	a bit clever here, to get the observed frequencies ...
#######################################################################

obsvexp <- list(Nstorms="numeric",Observed="numeric",
					Expected="numeric")
obsvexp$Nstorms <- 0:30
obsvexp$Observed <- c(tabulate(Typhoons+1,nbins=31))
obsvexp$Expected <- rep(0,length(obsvexp$Nstorms))
mu <- fitted(storm.model1)
for (i in 1:length(obsvexp$Expected)) {
	obsvexp$Expected[i] <- sum(dpois(i,mu))
}
obsvexp.table <- rbind(obsvexp$Observed,obsvexp$Expected)
par(mfrow=c(1,1))
barplot(obsvexp.table,beside=T,names=obsvexp$Nstorms,
	xlab="Number of events in year",ylab="Number of years")
legend(0,4,fill=heat.colors(2),cex=c(1.2,1.2),
		legend=c("Observed","Expected"),text.width=15)
title(paste("Observed distribution of annual typhoon numbers",
		"\ncompared with that expected under Poisson GLM"))
dev.copy(device=postscript,file="typh_obsvexpfreq.ps",paper="a4",
							horizontal=F)
dev.off()
 
#sink()	# Uncomment this line to return output to screen