Solution for Machine Learning using Python for Gradescope Task
#!/usr/bin/env python # coding: utf-8 # Essential Problem 1: # a). Here at least one point is required in each grid, thus the least number of data points are 100 # b). Here the dimension has changed to 3. Thus the least number of data points are 10^3 = 1000. # c). Here the dimension has changed to 3. Thus the least number of data points are 10^(10) # In[ ]: # In[ ]: #The plus problem: # In[4]: #Importing essential libraries import numpy as np import scipy.spatial from collections import Counter import pandas as pd # In[5]: Train_df= pd.read_csv("train (1).csv") # In[6]: #extracting the required columns cols = ['Position','Height','Weight'] Train_data = Train_df[cols] Train_data.head() # In[7]: Test_df=pd.read_csv("test (1).csv") Test_data = Test_df[cols] Test_data.head() # In[8]: #importing distance for calculating euclidean distance from scipy.spatial import distance # In[9]: #code for nearest neighbour def pre_nn(TrainSet, TestSet): #Ans vector whic stores the Position of nearest neighbour Ans = [] #Cols here extracts the columns which are numeric and will be used as data point cols = ['Height','Weight'] #creating Train and Test numeric vectors Train=TrainSet[cols] Test = TestSet[cols] for i in range(len(Test)): #initializing the nearest neighbour index to be 0 ans = 0 #initializing the nearest distance to be very big value dist = 1000000000000 for j in range(len(Train)): #If the distance if dist < distance.euclidean(Train[j:j+1],Test[i:i+1]): dist = distance.euclidean(Train[j:j+1],Test[i:i+1]) ans = j Ans.append(TrainSet['Position'].iloc[ans]) return(Ans) # In[8]: #a) Without standardizing the data, # In[10]: #The predicted Values Predicted_test = pre_nn(Train_data,Test_data) # In[11]: #Calculating the accuracy def Accuracy(X1,X2): match = 0 for i in range(len(X1)): if X1[i] == X2['Position'].iloc[i]: match=match+1 return((match*100)/len(X1)) # In[32]: Accuracy(Predicted_test,Test_data) # In[ ]: #b) With standardizing the data # In[30]: from sklearn import preprocessing cols = ['Height','Weight'] Train = Train_data[cols] Test = Test_data[cols] standardized_train = preprocessing.scale(Train) standardized_train_df = pd.DataFrame(standardized_train) standardized_train_df['Position']=Train_data['Position'] standardized_train_df.columns = ['Height', 'Weight', 'Position'] standardized_test = preprocessing.scale(Test) standardized_test_df = pd.DataFrame(standardized_test) standardized_test_df['Position'] = Test_data['Position'] standardized_test_df.columns = ['Height', 'Weight', 'Position'] # In[29]: standardized_train_df.head() # In[31]: #The predicted Values for standardized variables Predicted_test_standard = pre_nn(standardized_train_df, standardized_test_df) # In[33]: Accuracy(Predicted_test_standard,standardized_test_df)