If you will find any issue; Comment below.
Machine Learning Using scikit-learn Fresco Play Hands on Solution Hacker Rank:-
LAB 1: Welcome to Machine Learning Using Scikit-Learn | 1 | Preprocessing
Solution 1: Preprocessing.
#Write your code here from sklearn import datasets from sklearn import preprocessing from sklearn.impute import SimpleImputer from sklearn.preprocessing import Imputer import numpy as np # Task 1: # Load Popular iris data set from sklearn.datasets module and assign it to variable 'iris'. iris = datasets.load_iris() # Perform Normalization on iris.data with l2 norm and save the transformed data in variable iris_normalized. iris_normalized = preprocessing.Normalizer(norm= 'l2').fit(iris.data) iris_normalized = iris_normalized.transform(iris.data) # Print the mean of every column using the below command. # print(iris_normalized.mean(axis=0)) print(iris_normalized.mean(axis=0)) # Task 2: # Convert the categorical integer list iris.target into three binary attribute representation and store the result in variable 'iris_target_onehot'. enc = preprocessing.OneHotEncoder() iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1)) # Execute the following print statement print(iris_target_onehot.toarray()[[0, 50, 100]]) print(iris_target_onehot.toarray()[[0, 50, 100]]) # Task 3: # Set first 50 row values of iris.data to Null values. Use numpy.nan. iris.data[0:50, :]= np.nan # Perform Imputation on 'iris.data' and save the transformed data in variable 'itis_imputed' inputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) inputer = inputer.fit(iris.data) iris_imputed = inputer.transform(iris.data) """Alternative way, use this only if above statement will not work: imputer = SimpleImputer(missing_values='NaN', strategy='mean') imputer = imputer.fit(iris.data) iris_imputed = imputer.transform(iris.data)""" # Print the mean of every column using the below command. # print(iris_imputed.mean(axis=0)) print(iris_imputed.mean(axis=0))
LAB 2: Welcome to Machine Learning Using Scikit-Learn | 2 | Nearest Neighbors
Solution 1: Hands-On KNN.
#Write your code here from sklearn import datasets from sklearn import model_selection from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier import numpy as np # Task 1: # Import popular iris data set from the sklearn.datasets module and assign it to variable iris. iris = datasets.load_iris() # Split iris.data into two sets names X_train, X_test. # Also, split iris.target into two sets Y_train, Y_test. Set the random_state to 30. X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target, stratify=iris.target, random_state=30) # Print the shape of X_trian dataset. # Print the shape of X_test dataset. print(X_train.shape) print(X_test.shape) # Task 2 # Fit K nearest neighbors model on X_train data and Y_train labels, with default parameters. # Name the model as knn_clf knn_classifier = KNeighborsClassifier() knn_clf = knn_classifier.fit(X_train, Y_train) # Evaluate the model accuracy on training data set and print it's score. # Evaluate the model accuracy on testing data set and print it's score. print(knn_clf.score(X_train,Y_train)) print(knn_clf.score(X_test,Y_test)) # Task 3: # Fit multiple K nearest neighbors models on X_train data and Y_train labels with # n_neighbors parameters value changing from 3 to 10. starting_index = 3 ending_index = 10 neighbors = np.arange(starting_index, ending_index) train_accuracy =np.empty(len(neighbors)) test_accuracy = np.empty(len(neighbors)) # Evaluate each model accuracy on testing data set. Hint: Use for loop. for i,k in enumerate(neighbors): #Setup a knn classifier with k neighbors knn = KNeighborsClassifier(n_neighbors=k) #Fit the model knn.fit(X_train, Y_train) #Compute accuracy on the training set train_accuracy[i] = knn.score(X_train, Y_train) #Compute accuracy on the test set test_accuracy[i] = knn.score(X_test, Y_test) index_of_max_accuracy = list(np.where(test_accuracy== max(test_accuracy))) # Print the n_neighbors value of the model with highest accuracy. f = index_of_max_accuracy[0][0]+starting_index print(f)
LAB 3: Welcome to Machine Learning Using Scikit-Learn | 3 | Decision Trees.
Solution 1: Decision Tree - Hands-On.
#Write your code here import numpy as np from sklearn import datasets from sklearn import model_selection from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import train_test_split # Task 1: # Import numpy and set random seed to 100. np.random.seed(100) # Load popular Boston dataset from sklearn.datasets module and assign it to variable boston. boston = datasets.load_boston() # Split boston.data into two sets names X_train X_test. Also, split boston.target into two sets Y_train and Y_test. # Set random_state to 30. X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30) # Print the shape of X_train dataset. # Print the shape of X_test dataset. print(X_train.shape) print(X_test.shape) # Task 2: # Build a Decision tree Regressor model from X_train set and Y_train labels, with default parameters. # Name the model as dt_reg. dt_reg = DecisionTreeRegressor() dt_reg = dt_reg.fit(X_train, Y_train) # Evaluate the model accuracy on training data set and print it's score. print( dt_reg.score(X_train, Y_train)) # Evaluate the model accuracy on testing data set and print it's score. print( dt_reg.score(X_test, Y_test)) # predict the housing proce for first two samples of X_test set and print them. (Hint: Use predict( function.) print( dt_reg.predict(X_test[:2] )) # Task 3: # Fit multiple Decision tree regressors on X_train data and Y_train labels with max_depth parameters values changing from 2 to 5. starting_index = 2 ending_index = 5 # Evaluate each model accuracy on testing data set. Hint: Make use of for loop. test_accuracy = [] for i in range(starting_index , ending_index): knnn = DecisionTreeRegressor(max_depth = i) #Fit the model knn= knnn.fit(X_train, Y_train) #Compute accuracy on the test set test_accuracy.append(knn.score(X_test, Y_test)) # Print the max_depth value of the model with highest accuracy. print(test_accuracy.index(max(test_accuracy))+ starting_index)
LAB 4: Welcome to Machine Learning Using Scikit-Learn | 4 | Ensemble Methods.
Solution 1: Hands-On - Ensemble.
#Write your code here import numpy as np from sklearn import datasets from sklearn import model_selection from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor # Task 1: # Import numpy and set random seed to 100 np.random.seed(100) # Load popular Boston Dataset from sklearn.datasets module and assign it to variable boston. boston = datasets.load_boston() # Split boston.data into two sets names X_train and X_test. # Also, split boston.target into two sets Y_train and Y_test. # Set random_state to 30. X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30 ) # Print the shape of X_train dataset. print(X_train.shape) # Print the shape of X_test dataset. print(X_test.shape) # Task 2: # Build a Random Forest Regressor model from X_train set and Y_train labels, with default parameters. # Name the model as rf_reg rfr = RandomForestRegressor() rf_reg = rfr.fit(X_train, Y_train) # Evaluate the model accuracy on training data set and print it's score. print(rf_reg.score(X_train, Y_train)) # Evaluate the model accuracy on testig data set and print it's score. print(rf_reg.score(X_test, Y_test)) # Predict the housing price for first two samples of X_test set and print them. print( rf_reg.predict(X_test[:2] )) # Task 3: # Build multiple Random forest Regressor on X_train set and Y_train labels with max_depth parameters # value changing from 3 to 5 and also setting n_estimators to one of 50,100,200 values. starting_index = 3 ending_index = 5 c_estimators = [50,100,200] test_accuracy = [] # Evaluate each model accuracy on testing data set. Hint: Use for Loop. for i in range(starting_index , ending_index+1): rfr = RandomForestRegressor(n_estimators =c_estimators[1] , max_depth=i) #Fit the model rfr_fit= rfr.fit(X_train, Y_train) test_accuracy.append(rfr_fit.score(X_test, Y_test)) # Print the parameter value in the form of tuple (a,b). # 'a' refers to max_depth value and 'b' refers to n_estimators. index = test_accuracy.index(max(test_accuracy))+ starting_index print((index,c_estimators[1]))
LAB 5: Welcome to Machine Learning Using Scikit-Learn | 5 | SVM.
Solution 1: Hands-On - SVM.
#Write your code here import numpy as np from sklearn import datasets from sklearn import model_selection from sklearn.model_selection import train_test_split from sklearn.svm import SVC import sklearn.preprocessing as preprocessing # Task 1: # Load popular digits dataset from sklearn.datasets module and assign it to variable digits. digits = datasets.load_digits() # Split digits.data into two sets names X_train and X_test. # Also, split boston.target into two sets Y_train and Y_test. # Set random_state to 30. X_train, X_test, Y_train, Y_test = train_test_split(digits.data, digits.target, stratify= digits.target, random_state=30 ) # Print the shape of X_train dataset. print(X_train.shape) # Print the shape of X_test dataset. print(X_test.shape) # Task 2: # Build an SVm classifier from X_train set and Y_train labels, with default parameters. # Name the model as svm_clf svm_classifier = SVC() svm_clf = svm_classifier.fit(X_train, Y_train) # Evaluate the model accuracy on testing data set and print it's score. print( svm_clf.score(X_test, Y_test)) # Task 3: # Perform StandardScaler of digits.data and store the transformed data in variable digits_standardized. pss = preprocessing.StandardScaler() pas = pss.fit(digits.data) digits_standardized = pss.transform(digits.data) # Again, Split digits_standardized into two sets names X_train and X_test. # Also, split digits.target into two sets Y_train and Y_test. # Set random_state to 30; and perform stratified sampling. X_train, X_test, Y_train, Y_test = train_test_split(digits_standardized, digits.target, stratify= digits.target, random_state=30 ) # Build another SVM classifier from X_train set and Y_train labels, with default parameters. # Name the model as svm_clf2. svm_clf2 = svm_classifier.fit(X_train, Y_train) # Evaluate the model accuracy on testing data set and print it's score. print(svm_clf2.score(X_test, Y_test))
LAB 6: Welcome to Machine Learning Using Scikit-Learn | 6 | Clustering.
Solution 1: Hands-On - Clustering.
#Write your code here from sklearn import metrics from sklearn import datasets from sklearn.cluster import KMeans from sklearn.cluster import AgglomerativeClustering from sklearn.cluster import AffinityPropagation from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris from sklearn.metrics import homogeneity_score # pls try yourself, if facing any error comment below.