Commit c97d2992 by Sanghoon

### Upload New File

parent ff61d903
 # Problem : Can these models be used to predict how lawmakers may vote? #Prepare a clean R environment in work space. rm(list=ls()) #Use setwd() to navigate the data directory and specify desired folder. Here we are using Rstudio Editor directory. setwd(dirname(rstudioapi::getSourceEditorContext()\$path)) #Import our csv file data data=read.csv("VotingData.csv",header=TRUE) #Load data TrainingPct=0.6 #Percent of data to train model on TrainingSample=floor(TrainingPct*dim(data)[1]) #Number of observations to train the model on TestSample=dim(data)[1]-TrainingSample #Number of observations to test the model on TrainingData=data[1:TrainingSample,] #Get the training data #Find probabilities associtaed with democrat voting DemData=subset(TrainingData,TrainingData\$Party=="democrat") #Store All Probabilities in a Matrix (2 rows, across all votes) ProbMat=matrix(0,2,(dim(DemData)[2]-3+1+1)) m=2 #Equivalent sample size for Laplacian correction p=1/2 #Prior probability for Laplacian correction for (j in 3:dim(DemData)[2]) { ProbMat[1,j-2]=(sum(DemData[,j]=="y")+m*p)/(dim(DemData)[1]+m) } #Find Probabilities Associated with Republican Voting GOPData=subset(TrainingData,TrainingData\$Party=="republican") for (j in 3:dim(GOPData)[2]) { ProbMat[2,j-2]=(sum(GOPData[,j]=="y")+m*p)/(dim(GOPData)[1]+m) } #Tag on marignal probabilities FinalInd=dim(ProbMat)[2] ProbMat[1:2,FinalInd]=c(sum(TrainingData\$Party=="democrat")/dim(TrainingData)[1],sum(TrainingData\$Party=="republican")/dim(TrainingData)[1]) colnames(ProbMat)=c(names(data)[3:dim(data)[2]],"MargProb") rownames(ProbMat)=unique(TrainingData\$Party) TestData=data[(TrainingSample+1):dim(data)[1],] AssignedMat=matrix(0,dim(TestData)[1],3) #Use the NB classifier on test Data VotingModel_fn<-function(TestVec,ProbMat){ ProbTestMat=matrix(0,2,dim(ProbMat)[2]) #TestVec is the member of interests' vote record for (j in 1:length(TestVec)){ for (k in 1:2){ #Compute probabilities if vote yes or no via if loop if (TestVec[j]=="y"){ ProbTestMat[k,j]=ProbMat[k,j] } else { ProbTestMat[k,j]=1-ProbMat[k,j] } } } ProbTestMat[1:2,(length(TestVec)+1)]=ProbMat[1:2,(length(TestVec)+1)] Probs=apply(ProbTestMat,1,prod) #Compute product of probabilities for the candidate being of either party ind=which.max(Probs) #Find which probability is higher AssignedVec=c(Probs,unique(TrainingData\$Party)[ind]) #Probability of being a democrat, being a republican, and which one is assigned return(list(AssignedVec=as.numeric(AssignedVec[1:2]),AssignedParty=AssignedVec[3])) #Elements returned as a list. } for (i in 1:dim(TestData)[1]){ for (j in 3:dim(TestData)[2]){ TestVec=TestData[i,3:dim(TestData)[2]] result<-VotingModel_fn(TestVec,ProbMat) AssignedMat[i,]=c(as.numeric(result\$AssignedVec),result\$AssignedParty) } } CheckMat=data.frame(cbind(TestData\$Party,AssignedMat[,3])) colnames(CheckMat)=c("Actual","Assigned") Pct_Accuracy=sum(CheckMat\$Actual==CheckMat\$Assigned)/dim(TestData)[1] #computes the percent accuracy print("Classifier Percent Accuracy") #Print our accuracy as percent value. print(Pct_Accuracy) Example=read.csv("ArbitraryMember.csv") result<-VotingModel_fn(Example,ProbMat) print(result) #Executing function in a sample data set to predict likelihood of voting Example=read.csv("ArbitraryMember.csv") #load data result<-VotingModel_fn(Example,ProbMat) print(result)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment