Python Assignment Help

Machine Learning using Python for Gradescope Task

Solution for Machine Learning using Python for Gradescope Task

#!/usr/bin/env python
 # coding: utf-8

# Essential Problem 1:
 # a). Here at least one point is required in each grid, thus the least number of data points are 100
 # b). Here the dimension has changed to 3. Thus the least number of data points are 10^3 = 1000.
 # c). Here the dimension has changed to 3. Thus the least number of data points are 10^(10)



# In[ ]:

# In[ ]:
 #The plus problem:
 # In[4]:
 #Importing essential libraries
 import numpy as np
 import scipy.spatial
 from collections import Counter
 import pandas as pd
 # In[5]:
 Train_df= pd.read_csv("train (1).csv")
 # In[6]:
 #extracting the required columns
 cols = ['Position','Height','Weight']
 Train_data = Train_df[cols]
 Train_data.head()
 # In[7]:
 Test_df=pd.read_csv("test (1).csv")
 Test_data = Test_df[cols]
 Test_data.head()
 # In[8]:
 #importing distance for calculating euclidean distance
 from scipy.spatial import distance
 # In[9]:
 #code for nearest neighbour
 def pre_nn(TrainSet, TestSet):

#Ans vector whic stores the Position of nearest neighbour
 Ans = []
 #Cols here extracts the columns which are numeric and will be used as data point
 cols = ['Height','Weight']
 #creating Train and Test numeric vectors
 Train=TrainSet[cols]
 Test = TestSet[cols]
 for i in range(len(Test)):
 #initializing the nearest neighbour index to be 0
 ans = 0
 #initializing the nearest distance to be very big value
 dist = 1000000000000
 for j in range(len(Train)):
 #If the distance
 if dist < distance.euclidean(Train[j:j+1],Test[i:i+1]):
 dist = distance.euclidean(Train[j:j+1],Test[i:i+1])
 ans = j
 Ans.append(TrainSet['Position'].iloc[ans])
 return(Ans)
 # In[8]:
 #a) Without standardizing the data,
 # In[10]:
 #The predicted Values
 Predicted_test = pre_nn(Train_data,Test_data)
 # In[11]:
 #Calculating the accuracy
 def Accuracy(X1,X2):
 match = 0
 for i in range(len(X1)):
 if X1[i] == X2['Position'].iloc[i]:
 match=match+1
 return((match*100)/len(X1))
 # In[32]:
 Accuracy(Predicted_test,Test_data)
 # In[ ]:
 #b) With standardizing the data
 # In[30]:
 from sklearn import preprocessing
 cols = ['Height','Weight']
 Train = Train_data[cols]
 Test = Test_data[cols]
 standardized_train = preprocessing.scale(Train)
 standardized_train_df = pd.DataFrame(standardized_train)
 standardized_train_df['Position']=Train_data['Position']
 standardized_train_df.columns = ['Height', 'Weight', 'Position']

standardized_test = preprocessing.scale(Test)
 standardized_test_df = pd.DataFrame(standardized_test)
 standardized_test_df['Position'] = Test_data['Position']
 standardized_test_df.columns = ['Height', 'Weight', 'Position']
 # In[29]:

standardized_train_df.head()
 # In[31]:
 #The predicted Values for standardized variables
 Predicted_test_standard = pre_nn(standardized_train_df, standardized_test_df)
 # In[33]:
 Accuracy(Predicted_test_standard,standardized_test_df)