Commit c97d2992 by Sanghoon

Upload New File

parent ff61d903
# Problem : Can these models be used to predict how lawmakers may vote?
#Prepare a clean R environment in work space.
rm(list=ls())
#Use setwd() to navigate the data directory and specify desired folder. Here we are using Rstudio Editor directory.
setwd(dirname(rstudioapi::getSourceEditorContext()$path))
#Import our csv file data
data=read.csv("VotingData.csv",header=TRUE) #Load data
TrainingPct=0.6 #Percent of data to train model on
TrainingSample=floor(TrainingPct*dim(data)[1]) #Number of observations to train the model on
TestSample=dim(data)[1]-TrainingSample #Number of observations to test the model on
TrainingData=data[1:TrainingSample,] #Get the training data
#Find probabilities associtaed with democrat voting
DemData=subset(TrainingData,TrainingData$Party=="democrat")
#Store All Probabilities in a Matrix (2 rows, across all votes)
ProbMat=matrix(0,2,(dim(DemData)[2]-3+1+1))
m=2 #Equivalent sample size for Laplacian correction
p=1/2 #Prior probability for Laplacian correction
for (j in 3:dim(DemData)[2])
{
ProbMat[1,j-2]=(sum(DemData[,j]=="y")+m*p)/(dim(DemData)[1]+m)
}
#Find Probabilities Associated with Republican Voting
GOPData=subset(TrainingData,TrainingData$Party=="republican")
for (j in 3:dim(GOPData)[2])
{
ProbMat[2,j-2]=(sum(GOPData[,j]=="y")+m*p)/(dim(GOPData)[1]+m)
}
#Tag on marignal probabilities
FinalInd=dim(ProbMat)[2]
ProbMat[1:2,FinalInd]=c(sum(TrainingData$Party=="democrat")/dim(TrainingData)[1],sum(TrainingData$Party=="republican")/dim(TrainingData)[1])
colnames(ProbMat)=c(names(data)[3:dim(data)[2]],"MargProb")
rownames(ProbMat)=unique(TrainingData$Party)
TestData=data[(TrainingSample+1):dim(data)[1],]
AssignedMat=matrix(0,dim(TestData)[1],3)
#Use the NB classifier on test Data
VotingModel_fn<-function(TestVec,ProbMat){
ProbTestMat=matrix(0,2,dim(ProbMat)[2])
#TestVec is the member of interests' vote record
for (j in 1:length(TestVec)){
for (k in 1:2){
#Compute probabilities if vote yes or no via if loop
if (TestVec[j]=="y"){
ProbTestMat[k,j]=ProbMat[k,j]
} else {
ProbTestMat[k,j]=1-ProbMat[k,j]
}
}
}
ProbTestMat[1:2,(length(TestVec)+1)]=ProbMat[1:2,(length(TestVec)+1)]
Probs=apply(ProbTestMat,1,prod) #Compute product of probabilities for the candidate being of either party
ind=which.max(Probs) #Find which probability is higher
AssignedVec=c(Probs,unique(TrainingData$Party)[ind]) #Probability of being a democrat, being a republican, and which one is assigned
return(list(AssignedVec=as.numeric(AssignedVec[1:2]),AssignedParty=AssignedVec[3])) #Elements returned as a list.
}
for (i in 1:dim(TestData)[1]){
for (j in 3:dim(TestData)[2]){
TestVec=TestData[i,3:dim(TestData)[2]]
result<-VotingModel_fn(TestVec,ProbMat)
AssignedMat[i,]=c(as.numeric(result$AssignedVec),result$AssignedParty)
}
}
CheckMat=data.frame(cbind(TestData$Party,AssignedMat[,3]))
colnames(CheckMat)=c("Actual","Assigned")
Pct_Accuracy=sum(CheckMat$Actual==CheckMat$Assigned)/dim(TestData)[1] #computes the percent accuracy
print("Classifier Percent Accuracy") #Print our accuracy as percent value.
print(Pct_Accuracy)
Example=read.csv("ArbitraryMember.csv")
result<-VotingModel_fn(Example,ProbMat)
print(result)
#Executing function in a sample data set to predict likelihood of voting
Example=read.csv("ArbitraryMember.csv") #load data
result<-VotingModel_fn(Example,ProbMat)
print(result)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment