Commit b2293e72 by Sanghoon

Upload New File

parent c97d2992
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Can these models be used to predict how lawmakers may vote?\n",
"\n",
"## Using Naive Bayes Classifier"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#Prepare a clean R environment in work space.\n",
"rm(list=ls())"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#Import our csv file data\n",
"data=read.csv(\"VotingData.csv\",header=TRUE) #Load data\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#Construct training data set\n",
"TrainingPct=0.6 #Percent of data to train model on\n",
"TrainingSample=floor(TrainingPct*dim(data)[1]) #Number of observations to train the model on\n",
"TestSample=dim(data)[1]-TrainingSample #Number of observations to test the model on\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"TrainingData=data[1:TrainingSample,] #Get the training data\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#Find probabilities associtaed with democrat voting\n",
"DemData=subset(TrainingData,TrainingData$Party==\"democrat\")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#Store All Probabilities in a Matrix (2 rows, across all votes)\n",
"ProbMat=matrix(0,2,(dim(DemData)[2]-3+1+1))\n",
"\n",
"m=2 #Equivalent sample size for Laplacian correction\n",
"p=1/2 #Prior probability for Laplacian correction\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"for (j in 3:dim(DemData)[2])\n",
"{\n",
" ProbMat[1,j-2]=(sum(DemData[,j]==\"y\")+m*p)/(dim(DemData)[1]+m)\n",
" \n",
"}\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#Find Probabilities Associated with Republican Voting\n",
"GOPData=subset(TrainingData,TrainingData$Party==\"republican\")\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"for (j in 3:dim(GOPData)[2])\n",
"{\n",
" ProbMat[2,j-2]=(sum(GOPData[,j]==\"y\")+m*p)/(dim(GOPData)[1]+m)\n",
" \n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#Tag on marignal probabilities\n",
"FinalInd=dim(ProbMat)[2]\n",
"ProbMat[1:2,FinalInd]=c(sum(TrainingData$Party==\"democrat\")/dim(TrainingData)[1],sum(TrainingData$Party==\"republican\")/dim(TrainingData)[1])\n",
"colnames(ProbMat)=c(names(data)[3:dim(data)[2]],\"MargProb\")\n",
"rownames(ProbMat)=unique(TrainingData$Party)\n",
"\n",
"TestData=data[(TrainingSample+1):dim(data)[1],]\n",
"AssignedMat=matrix(0,dim(TestData)[1],3)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"\n",
"#Use the NB classifier on test Data\n",
"VotingModel_fn<-function(TestVec,ProbMat){\n",
" \n",
" \n",
" ProbTestMat=matrix(0,2,dim(ProbMat)[2])\n",
" #TestVec is the member of interests' vote record\n",
" \n",
" for (j in 1:length(TestVec)){\n",
" for (k in 1:2){\n",
" #Compute probabilities if vote yes or no via if loop\n",
" \n",
" if (TestVec[j]==\"y\"){\n",
" ProbTestMat[k,j]=ProbMat[k,j]\n",
" } else {\n",
" ProbTestMat[k,j]=1-ProbMat[k,j]\n",
" }\n",
" }\n",
" }\n",
" \n",
" ProbTestMat[1:2,(length(TestVec)+1)]=ProbMat[1:2,(length(TestVec)+1)]\n",
" Probs=apply(ProbTestMat,1,prod) #Compute product of probabilities for the candidate being of either party\n",
" ind=which.max(Probs) #Find which probability is higher\n",
" AssignedVec=c(Probs,unique(TrainingData$Party)[ind]) #Probability of being a democrat, being a republican, and which one is assigned\n",
" \n",
" \n",
" return(list(AssignedVec=as.numeric(AssignedVec[1:2]),AssignedParty=AssignedVec[3])) #Elements returned as a list.\n",
"}\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for (i in 1:dim(TestData)[1]){\n",
" for (j in 3:dim(TestData)[2]){\n",
" \n",
" TestVec=TestData[i,3:dim(TestData)[2]]\n",
" result<-VotingModel_fn(TestVec,ProbMat)\n",
" AssignedMat[i,]=c(as.numeric(result$AssignedVec),result$AssignedParty)\n",
" }\n",
"}\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CheckMat=data.frame(cbind(TestData$Party,AssignedMat[,3]))\n",
"colnames(CheckMat)=c(\"Actual\",\"Assigned\")\n",
"Pct_Accuracy=sum(CheckMat$Actual==CheckMat$Assigned)/dim(TestData)[1] #computes the percent accuracy\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"Classifier Percent Accuracy\") #Print our accuracy as percent value.\n",
"print(Pct_Accuracy)\n",
"\n",
"Example=read.csv(\"ArbitraryMember.csv\")\n",
"result<-VotingModel_fn(Example,ProbMat)\n",
"print(result)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Executing function in a sample data set to predict likelihood of voting\n",
"Example=read.csv(\"ArbitraryMember.csv\") #load data\n",
"\n",
"result<-VotingModel_fn(Example,ProbMat)\n",
"print(result)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment