If you will find any issue; Comment below.
Machine Learning Using scikit-learn Fresco Play Hands on Solution Hacker Rank:-
LAB 1: Welcome to Machine Learning Using Scikit-Learn | 1 | Preprocessing
Solution 1: Preprocessing.
#Write your code here
from sklearn import datasets
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import Imputer
import numpy as np
# Task 1:
# Load Popular iris data set from sklearn.datasets module and assign it to variable 'iris'.
iris = datasets.load_iris()
# Perform Normalization on iris.data with l2 norm and save the transformed data in variable iris_normalized.
iris_normalized = preprocessing.Normalizer(norm= 'l2').fit(iris.data)
iris_normalized = iris_normalized.transform(iris.data)
# Print the mean of every column using the below command.
# print(iris_normalized.mean(axis=0))
print(iris_normalized.mean(axis=0))
# Task 2:
# Convert the categorical integer list iris.target into three binary attribute representation and store the result in variable 'iris_target_onehot'.
enc = preprocessing.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
# Execute the following print statement print(iris_target_onehot.toarray()[[0, 50, 100]])
print(iris_target_onehot.toarray()[[0, 50, 100]])
# Task 3:
# Set first 50 row values of iris.data to Null values. Use numpy.nan.
iris.data[0:50, :]= np.nan
# Perform Imputation on 'iris.data' and save the transformed data in variable 'itis_imputed'
inputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
inputer = inputer.fit(iris.data)
iris_imputed = inputer.transform(iris.data)
"""Alternative way, use this only if above statement will not work:
imputer = SimpleImputer(missing_values='NaN', strategy='mean')
imputer = imputer.fit(iris.data)
iris_imputed = imputer.transform(iris.data)"""
# Print the mean of every column using the below command.
# print(iris_imputed.mean(axis=0))
print(iris_imputed.mean(axis=0))
LAB 2: Welcome to Machine Learning Using Scikit-Learn | 2 | Nearest Neighbors
Solution 1: Hands-On KNN.
#Write your code here
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
# Task 1:
# Import popular iris data set from the sklearn.datasets module and assign it to variable iris.
iris = datasets.load_iris()
# Split iris.data into two sets names X_train, X_test.
# Also, split iris.target into two sets Y_train, Y_test. Set the random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=30)
# Print the shape of X_trian dataset.
# Print the shape of X_test dataset.
print(X_train.shape)
print(X_test.shape)
# Task 2
# Fit K nearest neighbors model on X_train data and Y_train labels, with default parameters.
# Name the model as knn_clf
knn_classifier = KNeighborsClassifier()
knn_clf = knn_classifier.fit(X_train, Y_train)
# Evaluate the model accuracy on training data set and print it's score.
# Evaluate the model accuracy on testing data set and print it's score.
print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))
# Task 3:
# Fit multiple K nearest neighbors models on X_train data and Y_train labels with
# n_neighbors parameters value changing from 3 to 10.
starting_index = 3
ending_index = 10
neighbors = np.arange(starting_index, ending_index)
train_accuracy =np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
# Evaluate each model accuracy on testing data set. Hint: Use for loop.
for i,k in enumerate(neighbors):
#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=k)
#Fit the model
knn.fit(X_train, Y_train)
#Compute accuracy on the training set
train_accuracy[i] = knn.score(X_train, Y_train)
#Compute accuracy on the test set
test_accuracy[i] = knn.score(X_test, Y_test)
index_of_max_accuracy = list(np.where(test_accuracy== max(test_accuracy)))
# Print the n_neighbors value of the model with highest accuracy.
f = index_of_max_accuracy[0][0]+starting_index
print(f)
LAB 3: Welcome to Machine Learning Using Scikit-Learn | 3 | Decision Trees.
Solution 1: Decision Tree - Hands-On.
#Write your code here
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
# Task 1:
# Import numpy and set random seed to 100.
np.random.seed(100)
# Load popular Boston dataset from sklearn.datasets module and assign it to variable boston.
boston = datasets.load_boston()
# Split boston.data into two sets names X_train X_test. Also, split boston.target into two sets Y_train and Y_test.
# Set random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(boston.data,
boston.target, random_state=30)
# Print the shape of X_train dataset.
# Print the shape of X_test dataset.
print(X_train.shape)
print(X_test.shape)
# Task 2:
# Build a Decision tree Regressor model from X_train set and Y_train labels, with default parameters.
# Name the model as dt_reg.
dt_reg = DecisionTreeRegressor()
dt_reg = dt_reg.fit(X_train, Y_train)
# Evaluate the model accuracy on training data set and print it's score.
print( dt_reg.score(X_train, Y_train))
# Evaluate the model accuracy on testing data set and print it's score.
print( dt_reg.score(X_test, Y_test))
# predict the housing proce for first two samples of X_test set and print them. (Hint: Use predict( function.)
print( dt_reg.predict(X_test[:2] ))
# Task 3:
# Fit multiple Decision tree regressors on X_train data and Y_train labels with max_depth parameters values changing from 2 to 5.
starting_index = 2
ending_index = 5
# Evaluate each model accuracy on testing data set. Hint: Make use of for loop.
test_accuracy = []
for i in range(starting_index , ending_index):
knnn = DecisionTreeRegressor(max_depth = i)
#Fit the model
knn= knnn.fit(X_train, Y_train)
#Compute accuracy on the test set
test_accuracy.append(knn.score(X_test, Y_test))
# Print the max_depth value of the model with highest accuracy.
print(test_accuracy.index(max(test_accuracy))+ starting_index)
LAB 4: Welcome to Machine Learning Using Scikit-Learn | 4 | Ensemble Methods.
Solution 1: Hands-On - Ensemble.
#Write your code here
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
# Task 1:
# Import numpy and set random seed to 100
np.random.seed(100)
# Load popular Boston Dataset from sklearn.datasets module and assign it to variable boston.
boston = datasets.load_boston()
# Split boston.data into two sets names X_train and X_test.
# Also, split boston.target into two sets Y_train and Y_test.
# Set random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(boston.data,
boston.target, random_state=30 )
# Print the shape of X_train dataset.
print(X_train.shape)
# Print the shape of X_test dataset.
print(X_test.shape)
# Task 2:
# Build a Random Forest Regressor model from X_train set and Y_train labels, with default parameters.
# Name the model as rf_reg
rfr = RandomForestRegressor()
rf_reg = rfr.fit(X_train, Y_train)
# Evaluate the model accuracy on training data set and print it's score.
print(rf_reg.score(X_train, Y_train))
# Evaluate the model accuracy on testig data set and print it's score.
print(rf_reg.score(X_test, Y_test))
# Predict the housing price for first two samples of X_test set and print them.
print( rf_reg.predict(X_test[:2] ))
# Task 3:
# Build multiple Random forest Regressor on X_train set and Y_train labels with max_depth parameters
# value changing from 3 to 5 and also setting n_estimators to one of 50,100,200 values.
starting_index = 3
ending_index = 5
c_estimators = [50,100,200]
test_accuracy = []
# Evaluate each model accuracy on testing data set. Hint: Use for Loop.
for i in range(starting_index , ending_index+1):
rfr = RandomForestRegressor(n_estimators =c_estimators[1] , max_depth=i)
#Fit the model
rfr_fit= rfr.fit(X_train, Y_train)
test_accuracy.append(rfr_fit.score(X_test, Y_test))
# Print the parameter value in the form of tuple (a,b).
# 'a' refers to max_depth value and 'b' refers to n_estimators.
index = test_accuracy.index(max(test_accuracy))+ starting_index
print((index,c_estimators[1]))
LAB 5: Welcome to Machine Learning Using Scikit-Learn | 5 | SVM.
Solution 1: Hands-On - SVM.
#Write your code here
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing
# Task 1:
# Load popular digits dataset from sklearn.datasets module and assign it to variable digits.
digits = datasets.load_digits()
# Split digits.data into two sets names X_train and X_test.
# Also, split boston.target into two sets Y_train and Y_test.
# Set random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(digits.data,
digits.target, stratify= digits.target, random_state=30 )
# Print the shape of X_train dataset.
print(X_train.shape)
# Print the shape of X_test dataset.
print(X_test.shape)
# Task 2:
# Build an SVm classifier from X_train set and Y_train labels, with default parameters.
# Name the model as svm_clf
svm_classifier = SVC()
svm_clf = svm_classifier.fit(X_train, Y_train)
# Evaluate the model accuracy on testing data set and print it's score.
print( svm_clf.score(X_test, Y_test))
# Task 3:
# Perform StandardScaler of digits.data and store the transformed data in variable digits_standardized.
pss = preprocessing.StandardScaler()
pas = pss.fit(digits.data)
digits_standardized = pss.transform(digits.data)
# Again, Split digits_standardized into two sets names X_train and X_test.
# Also, split digits.target into two sets Y_train and Y_test.
# Set random_state to 30; and perform stratified sampling.
X_train, X_test, Y_train, Y_test = train_test_split(digits_standardized,
digits.target, stratify= digits.target, random_state=30 )
# Build another SVM classifier from X_train set and Y_train labels, with default parameters.
# Name the model as svm_clf2.
svm_clf2 = svm_classifier.fit(X_train, Y_train)
# Evaluate the model accuracy on testing data set and print it's score.
print(svm_clf2.score(X_test, Y_test))
LAB 6: Welcome to Machine Learning Using Scikit-Learn | 6 | Clustering.
Solution 1: Hands-On - Clustering.
#Write your code here
from sklearn import metrics
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import AffinityPropagation
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import homogeneity_score
# pls try yourself, if facing any error comment below.