Machine Learning Using scikit-learn Fresco Play Handson Solution HackerRank

Machine Learning Using scikit-learn Fresco Play Hands on Preprocessing, Nearest Neighbors Technique, Decision Tree, Ensemble method, SVM, Clustering.
Machine Learning Using scikit-learn Fresco Play Handson Solution HackerRank - www.pdfcup.com

If you will find any issue; Comment below.

Machine Learning Using scikit-learn Fresco Play Hands on Solution Hacker Rank:-

LAB 1: Welcome to Machine Learning Using Scikit-Learn | 1 | Preprocessing

Solution 1: Preprocessing.



#Write your code here
from sklearn import datasets
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import Imputer
import numpy as np

# Task 1:
# Load Popular iris data set from sklearn.datasets module and assign  it to variable 'iris'.
iris = datasets.load_iris()

# Perform Normalization on iris.data with l2 norm and save the transformed data in variable iris_normalized.
iris_normalized = preprocessing.Normalizer(norm= 'l2').fit(iris.data)
iris_normalized = iris_normalized.transform(iris.data)

# Print the mean of every column using the below command.
# print(iris_normalized.mean(axis=0))
print(iris_normalized.mean(axis=0))


# Task 2:
# Convert the categorical integer list iris.target into three binary attribute representation and store the result in variable 'iris_target_onehot'.
enc = preprocessing.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))

# Execute the following print statement  print(iris_target_onehot.toarray()[[0, 50, 100]])
print(iris_target_onehot.toarray()[[0, 50, 100]])


# Task 3:
# Set first 50 row values of iris.data to Null values. Use numpy.nan.
iris.data[0:50, :]= np.nan

# Perform Imputation on 'iris.data' and save the transformed data in variable 'itis_imputed'
inputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
inputer = inputer.fit(iris.data)
iris_imputed = inputer.transform(iris.data)
"""Alternative way, use this only if above statement will not work:
imputer = SimpleImputer(missing_values='NaN', strategy='mean')
imputer = imputer.fit(iris.data)
iris_imputed = imputer.transform(iris.data)"""

# Print the mean of every column using the below command. 
# print(iris_imputed.mean(axis=0))
print(iris_imputed.mean(axis=0))
   
   

LAB 2: Welcome to Machine Learning Using Scikit-Learn | 2 | Nearest Neighbors

Solution 1: Hands-On KNN.


#Write your code here
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

# Task 1:
# Import popular iris data set from the sklearn.datasets module and  assign it to variable iris.
iris = datasets.load_iris()

# Split iris.data into two sets names X_train, X_test. 
# Also, split iris.target into two sets  Y_train, Y_test. Set the random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=30)

# Print the shape of X_trian dataset.
# Print the shape of X_test dataset.
print(X_train.shape)
print(X_test.shape)

# Task 2
# Fit K nearest neighbors model on X_train data and Y_train labels, with default parameters.
# Name the model as knn_clf
knn_classifier = KNeighborsClassifier()  
knn_clf = knn_classifier.fit(X_train, Y_train) 

# Evaluate the model accuracy on training data set and print it's score.
# Evaluate the model accuracy on testing data set and print it's score.
print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))


# Task 3:
# Fit multiple K nearest neighbors models on X_train data and Y_train labels with
#  n_neighbors parameters value changing from 3 to 10.
starting_index = 3
ending_index = 10
neighbors = np.arange(starting_index, ending_index)
train_accuracy =np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))

# Evaluate each model accuracy on testing data set. Hint: Use for loop.
for i,k in enumerate(neighbors):
    #Setup a knn classifier with k neighbors
    knn = KNeighborsClassifier(n_neighbors=k)
    
    #Fit the model
    knn.fit(X_train, Y_train)
    
    #Compute accuracy on the training set
    train_accuracy[i] = knn.score(X_train, Y_train)
    
    #Compute accuracy on the test set
    test_accuracy[i] = knn.score(X_test, Y_test) 

index_of_max_accuracy = list(np.where(test_accuracy== max(test_accuracy)))

# Print the n_neighbors value of the model with highest  accuracy.
f = index_of_max_accuracy[0][0]+starting_index
print(f)
   

LAB 3: Welcome to Machine Learning Using Scikit-Learn | 3 | Decision Trees.

Solution 1: Decision Tree - Hands-On.


#Write your code here
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

# Task 1:
# Import numpy and set random seed to 100.
np.random.seed(100)

# Load popular Boston dataset from sklearn.datasets module and assign it to variable boston.
boston = datasets.load_boston()

# Split boston.data into two sets names  X_train X_test. Also, split boston.target into two sets Y_train and Y_test.
# Set random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(boston.data,
boston.target, random_state=30)

# Print the shape of X_train dataset.
# Print the shape of X_test dataset.
print(X_train.shape)
print(X_test.shape)


# Task 2:
# Build a Decision tree Regressor model from X_train set  and Y_train labels, with default parameters.
# Name the model as dt_reg.
dt_reg = DecisionTreeRegressor()
dt_reg = dt_reg.fit(X_train, Y_train)

# Evaluate the model accuracy on training data set and print it's score.
print( dt_reg.score(X_train, Y_train))

# Evaluate the model accuracy on testing data set and print it's score.
print( dt_reg.score(X_test, Y_test))

# predict the housing proce for first two samples of X_test set and print them. (Hint: Use predict( function.)
print( dt_reg.predict(X_test[:2] ))


# Task 3:
# Fit multiple Decision tree regressors on X_train data and Y_train labels with max_depth parameters values changing from 2 to 5.
starting_index = 2
ending_index = 5

# Evaluate each model accuracy on testing data set. Hint: Make use of for loop.
test_accuracy = []
for i in range(starting_index , ending_index):
    knnn = DecisionTreeRegressor(max_depth = i)    
    #Fit the model
    knn= knnn.fit(X_train, Y_train)
     
    #Compute accuracy on the test set
    test_accuracy.append(knn.score(X_test, Y_test)) 


# Print the max_depth value of the model with highest accuracy.
print(test_accuracy.index(max(test_accuracy))+ starting_index)



   

LAB 4: Welcome to Machine Learning Using Scikit-Learn | 4 | Ensemble Methods.

Solution 1: Hands-On - Ensemble.


#Write your code here
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Task 1:
# Import numpy and set random seed to 100
np.random.seed(100)

# Load popular Boston Dataset from sklearn.datasets module and assign it to variable boston.
boston = datasets.load_boston()

# Split boston.data into two sets names X_train and X_test. 
# Also, split boston.target into two sets Y_train and  Y_test.
# Set random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(boston.data,
boston.target,   random_state=30 )

# Print the shape of X_train dataset.
print(X_train.shape)

# Print the shape of X_test dataset.
print(X_test.shape)

# Task 2:
# Build a Random Forest Regressor model from X_train set and Y_train labels, with default parameters.
# Name the model as rf_reg
rfr = RandomForestRegressor()
rf_reg = rfr.fit(X_train, Y_train) 

# Evaluate the model accuracy on training data set and print it's score.
print(rf_reg.score(X_train, Y_train))

# Evaluate the model accuracy on testig data set and print it's score.
print(rf_reg.score(X_test, Y_test))

# Predict the housing price for first two samples of X_test set and print them.
print( rf_reg.predict(X_test[:2] ))

# Task 3:
# Build multiple Random forest Regressor on X_train set and  Y_train labels with max_depth parameters
# value changing from 3 to 5 and also setting n_estimators to  one of 50,100,200 values.
starting_index = 3
ending_index = 5

c_estimators = [50,100,200]
test_accuracy = []

# Evaluate each model accuracy on testing data set. Hint: Use for Loop.
for i in range(starting_index , ending_index+1):
    rfr = RandomForestRegressor(n_estimators =c_estimators[1] , max_depth=i)    
    #Fit the model
    rfr_fit= rfr.fit(X_train, Y_train)
    test_accuracy.append(rfr_fit.score(X_test, Y_test)) 

# Print the parameter value in the form of tuple (a,b).
# 'a' refers to max_depth value and 'b' refers to n_estimators.
index = test_accuracy.index(max(test_accuracy))+ starting_index
print((index,c_estimators[1]))



LAB 5: Welcome to Machine Learning Using Scikit-Learn | 5 | SVM.

Solution 1: Hands-On - SVM.


#Write your code here
import numpy as np
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing

# Task 1: 
# Load popular digits dataset from sklearn.datasets module  and assign it to variable digits.
digits = datasets.load_digits()

# Split digits.data into two sets names X_train and X_test. 
# Also, split boston.target into two sets Y_train and  Y_test.
# Set random_state to 30.
X_train, X_test, Y_train, Y_test = train_test_split(digits.data,
digits.target, stratify= digits.target,  random_state=30 )

# Print the shape of X_train dataset.
print(X_train.shape)

# Print the shape of X_test dataset.
print(X_test.shape)


# Task 2:
# Build an SVm classifier from X_train set and Y_train labels,  with  default parameters. 
# Name the model as svm_clf
svm_classifier = SVC()
svm_clf = svm_classifier.fit(X_train, Y_train) 

# Evaluate the model accuracy on testing data set and print it's score.
print( svm_clf.score(X_test, Y_test))


# Task 3:
# Perform StandardScaler of digits.data and store the transformed data in variable digits_standardized.
pss = preprocessing.StandardScaler()
pas = pss.fit(digits.data)
digits_standardized = pss.transform(digits.data)

# Again, Split digits_standardized into two sets names X_train and X_test. 
# Also, split digits.target into two sets Y_train and  Y_test.
# Set random_state to 30; and perform stratified sampling.
X_train, X_test, Y_train, Y_test = train_test_split(digits_standardized,
digits.target, stratify= digits.target,  random_state=30 )

# Build another SVM classifier from X_train set and Y_train labels, with default parameters.
# Name the model as svm_clf2.
svm_clf2 = svm_classifier.fit(X_train, Y_train) 

# Evaluate the model accuracy on testing data set and print it's score.
print(svm_clf2.score(X_test, Y_test))


LAB 6: Welcome to Machine Learning Using Scikit-Learn | 6 | Clustering.

Solution 1: Hands-On - Clustering.


#Write your code here
from sklearn import metrics
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering 
from sklearn.cluster import AffinityPropagation
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import homogeneity_score

# pls try yourself, if facing any error comment below.


About the author

D Shwari
I'm a professor at National University's Department of Computer Science. My main streams are data science and data analysis. Project management for many computer science-related sectors. Next working project on Al with deep Learning.....

8 comments

  1. Anonymous
    i will try on six code machine learning but i comes error please help me
    1. Anonymous
      Error is related to what? Could you elaborate once?
  2. Anonymous
    i will try on six code machine learning but i comes error please help me
  3. Anonymous
    can you please provide full code for "LAB 6: Welcome to Machine Learning Using Scikit-Learn | 6 | Clustering."
    1. Anonymous
      #Write your code here
      from sklearn import metrics
      from sklearn import datasets
      from sklearn.cluster import KMeans
      from sklearn.cluster import AgglomerativeClustering
      from sklearn.cluster import AffinityPropagation
      from sklearn.model_selection import train_test_split
      from sklearn.datasets import load_iris
      from sklearn.metrics import homogeneity_score

      # Task 1
      iris = load_iris()

      km_cls = KMeans(n_clusters=3, random_state=42)
      km_labels = km_cls.fit_predict(iris.data)

      km_h_score = homogeneity_score(iris.target, km_labels)

      print('{:.4f}'.format(km_h_score))

      # Task 2
      agg_cls = AgglomerativeClustering(n_clusters=3)
      agg_labels = agg_cls.fit_predict(iris.data)

      agg_h_score = homogeneity_score(iris.target, agg_labels)

      print('{:.4f}'.format(agg_h_score))

      # Task 3
      af_cls = AffinityPropagation()
      af_labels = af_cls.fit_predict(iris.data)

      af_h_score = homogeneity_score(iris.target, af_labels)

      print('{:.4f}'.format(af_h_score))
  4. Anonymous
    can you please provide full code for "LAB 6: Welcome to Machine Learning Using Scikit-Learn | 6 | Clustering."
    1. Anonymous
      Glad to help you, please share the runtime error details you are getting.
  5. Anonymous
    #Write your code here
    from sklearn import metrics
    from sklearn import datasets
    from sklearn.cluster import KMeans
    from sklearn.cluster import AgglomerativeClustering
    from sklearn.cluster import AffinityPropagation
    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_iris
    from sklearn.metrics import homogeneity_score

    # Task 1
    iris = load_iris()

    km_cls = KMeans(n_clusters=3, random_state=42)
    km_labels = km_cls.fit_predict(iris.data)

    km_h_score = homogeneity_score(iris.target, km_labels)

    print('{:.4f}'.format(km_h_score))

    # Task 2
    agg_cls = AgglomerativeClustering(n_clusters=3)
    agg_labels = agg_cls.fit_predict(iris.data)

    agg_h_score = homogeneity_score(iris.target, agg_labels)

    print('{:.4f}'.format(agg_h_score))

    # Task 3
    af_cls = AffinityPropagation()
    af_labels = af_cls.fit_predict(iris.data)

    af_h_score = homogeneity_score(iris.target, af_labels)

    print('{:.4f}'.format(af_h_score))