# GLM Simple - Generalized Linear Models # this example is a toy problem # it illustrate the concepts # behind GLMs # #GLM is similar to LM # except GLM fits a curve to the data # instead of fitting a line to the data ## # Author: PatriciaHoffman ############################################################################### rm(list=ls()) #create a data set with the f(p) = p^2 p <- (1:999)/1000 squaredp <- p^2 plot(p,squaredp,xlab = "p", ylab = "squaredp",type = "l",pch = 1) mean(squaredp) sd(squaredp) squaredpTestSet <- data.frame(pLabel=p,targetLabel=squaredp) target <- c(squaredp[seq(from=4,to=100,by=10)], squaredp[seq(from=101,to=900,by=100)],squaredp[seq(from=901,to=999,by=10)] ) str(target) ptrain <- c(p[seq(from=4,to=100,by=10)], p[seq(from=101,to=900,by=100)],p[seq(from=901,to=999,by=10)] ) squareDataSet<-data.frame(pLabel=ptrain,targetLabel=target) squareDataSet plot(target ~ ptrain, data = squareDataSet, xlab = "p value", ylab = "target", type = "p",pch = 1) # xlim = c(.5,2.5), ylim = c(0,1), #how well does a linear model fit the data? linearModel <- lm(target~ptrain,data = squareDataSet) summary(linearModel) #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) -0.08763 0.02312 -3.789 0.000808 *** #ptrain 0.99502 0.03683 27.015 < 2e-16 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 plot(ptrain,target) abline(a=-0.08763,b= 0.99502) #Calculate the error predtarget <- predict(linearModel,squareDataSet) sqerror = sqrt(sum((target-predtarget)^2)) print("As cross validation has NOT been done") print("this is just the error of these points") print("estimated by this line") sqerror # next use a glm to model the data # glm is the same as lm when the following # arguments are given linearGLM <- glm(target ~ ptrain, family = gaussian(link = "identity"), data = squareDataSet) linearGLM # #Coefficients: # (Intercept) ptrain # -0.08763 0.99502 #notice that this returns the same result at the # lm model str(squareDataSet) gaussLog <- glm(target ~ ptrain, family = poisson(link = "sqrt"), data = squareDataSet) gaussLog predtarget <-predict(gaussLog, squareDataSet, type = "response") sqerror = sqrt(sum((target-predtarget)^2)) print("As cross validation has NOT been done") print("this is just the error of these points") print("estimated by this model") sqerror # plot the model with the input data plot(ptrain, target) lines(ptrain, predict(gaussLog, squareDataSet, type = "response"))