 #!/usr/bin/env python
# coding: utf-8

# Essential Problem 1:
# a). Here at least one point is required in each grid, thus the least number of data points are 100
# b). Here the dimension has changed to 3. Thus the least number of data points are 10^3 = 1000.
# c). Here the dimension has changed to 3. Thus the least number of data points are 10^(10)

# In[ ]:

# In[ ]:
#The plus problem:
# In:
#Importing essential libraries
import numpy as np
import scipy.spatial
from collections import Counter
import pandas as pd
# In:
# In:
#extracting the required columns
cols = [‘Position’,’Height’,’Weight’]
Train_data = Train_df[cols]
# In:
Test_data = Test_df[cols]
# In:
#importing distance for calculating euclidean distance
from scipy.spatial import distance
# In:
#code for nearest neighbour
def pre_nn(TrainSet, TestSet):

#Ans vector whic stores the Position of nearest neighbour
Ans = []
#Cols here extracts the columns which are numeric and will be used as data point
cols = [‘Height’,’Weight’]
#creating Train and Test numeric vectors
Train=TrainSet[cols]
Test = TestSet[cols]
for i in range(len(Test)):
#initializing the nearest neighbour index to be 0
ans = 0
#initializing the nearest distance to be very big value
dist = 1000000000000
for j in range(len(Train)):
#If the distance
if dist < distance.euclidean(Train[j:j+1],Test[i:i+1]):
dist = distance.euclidean(Train[j:j+1],Test[i:i+1])
ans = j
Ans.append(TrainSet[‘Position’].iloc[ans])
return(Ans)
# In:
#a) Without standardizing the data,
# In:
#The predicted Values
Predicted_test = pre_nn(Train_data,Test_data)
# In:
#Calculating the accuracy
def Accuracy(X1,X2):
match = 0
for i in range(len(X1)):
if X1[i] == X2[‘Position’].iloc[i]:
match=match+1
return((match*100)/len(X1))
# In:
Accuracy(Predicted_test,Test_data)
# In[ ]:
#b) With standardizing the data
# In:
from sklearn import preprocessing
cols = [‘Height’,’Weight’]
Train = Train_data[cols]
Test = Test_data[cols]
standardized_train = preprocessing.scale(Train)
standardized_train_df = pd.DataFrame(standardized_train)
standardized_train_df[‘Position’]=Train_data[‘Position’]
standardized_train_df.columns = [‘Height’, ‘Weight’, ‘Position’]

standardized_test = preprocessing.scale(Test)
standardized_test_df = pd.DataFrame(standardized_test)
standardized_test_df[‘Position’] = Test_data[‘Position’]
standardized_test_df.columns = [‘Height’, ‘Weight’, ‘Position’]
# In: