Kindly Note: If you had any programmatic error, please comment below.
LAB 1: Descriptive Statistics.
Question 1: Welcome to Statistics with Python | 1 | Descriptive Statistics
Solution 1:
#!/bin/python3 #Write your code here import numpy as np from scipy import stats def compute_descriptive_stats(data): sample = np.array(data) ''' Compute the following statistical parameters, and retun them ''' # Task 1: # Calculate Mean value for the given parameter 'data'. mean = np.mean(sample) # Task 2: # Calculate Median value for the given parameter 'data'. median = np.median(sample) # Task 3: # Calculate Mode value for the given parameter 'data'. mode = stats.mode(sample)[0] # Task 4: # Calcuate 25th and 75th percentile value for given parameter `data` and return as a numpy array. percentile = np.percentile(sample, [25, 75], interpolation='lower') # Task 5: # Calcuate Inter quartile range value for given parameter `data` iqr = stats.iqr(sample, interpolation='lower') # Task 6: # Calcuate Skewness value for given parameter `data` skew = stats.skew(sample) # Task 7 # Calcuate Kurtosis value for given parameter `data` kutrosis = stats.kurtosis(sample) """ Returns ------- mean : float Mean value for the sample data `data` median : float Median value for the sample data `data` mode : int Mode value for the sample data `data` percentile : list 25th and 75th percentile values for the sample data `data` iqr : float Inter quartile range value for the sample data `data` skew : float Skewness value for the sample data `data` kutrosis : float Kurtosis value for the sample data `data` """ return mean, median, mode, percentile, iqr, skew, kutrosis
LAB 2: Random Distributions.
Question 2: Welcome to Statistics with Python | 2 | Random Distributions.
Solution 2:
#!/bin/python3 #Write your code here from scipy import stats import numpy as np def compute_absolute_difference(mean, std, seed): #Task 1: # Create a normal distribution with mean of `mean` and standard deviation of `std`. normal_distribution = stats.norm(loc=mean, scale=std) # Task 2: # Set the random seed of `seed`, and create a random sample of 100 elements from the above defined distribution. np.random.seed(seed) random_sample = normal_distribution.rvs(100) # Task 3: # Compute the absolute difference between the sample mean and the distribution mean. distribution_mean = np.mean(random_sample) absolute_difference = distribution_mean - mean """ Parameters ---------- mean - float mean value for the normal distribution std - float standard deviation value for the normal distribution seed - int seed valure for randomness Returns ------- absolute_difference : float absolute difference between the sample mean and the distribution mean. """ return absolute_difference
LAB 3: Random Distribution 2
Question 3: Welcome to Statistics with Python | 3 | Random Experiment.
Solution 3:
#!/bin/python3 #Write your code here import sys import numpy as np from scipy.stats import binom def count_random_heads(number_sample, random_state): # Simulate a random experiment of tossing a coin n times, and determine the count of Heads returned. # Task 1: Use binom function from scipy.stats and Set the random state as `random_state'. np.random.seed(random_state) # Task 2: Draw a sample of `number_sample` elements from a defined distribution. Assume that the values '0' and '1' represent Heads and Tails respectively. data_binom = binom.rvs(n=1,p=0.5,size=number_sample) # Task 3: Count the number of 'Heads' and return it. y = np.bincount(data_binom) head_count = y[0] """ Parameters ---------- number_sample - int number_sample represents that the number of times the experiments repeats random_state - int number_sample represents seed/state value for the randomness Returns ------- head_count : int Count the number of 'Heads' """ return head_count
LAB 4: Exercise - Hypothesis testing.
Question 4: Welcome to Statistics with Python | 4 | Hypothesis Testing 1.
Solution 4:
#!/bin/python3 #Write your code here from scipy import stats def perform_ttest(sample1, sample2): # Task 1: # Consider two independent samples are passed as parameter to this. # Compute t-statistic for the above two groups, and return the t-score and p value. t_score, p_value = stats.ttest_ind(sample1, sample2) """ - The samples represent the life satisfaction score (computed through a methodology) of older adults and younger adults respectively. - Hint: Use the ttest_ind function available in scipy. Parameters ---------- sample1 - list sample values of age taken from the group1 sample2 - list sample values of age taken from the group2 Returns ------- t_score : float t-score of t-test p_value: float p-value of t-test """ return t_score, p_value
LAB 5: Exercise - Hypothesis testing.
Question 5: Welcome to Statistics with Python | 5 | Hypothesis Testing 2.
Solution 5:
#!/bin/python3 #Write your code here from scipy import stats def perform_ttest(sample1, sample2): # Task 1: # A researcher noted the number of chocolate chips consumed by 10 rats, with and without electrical stimulation. # Compute t-statistic for the above samples, and return the t-score and p-value. t_score, p_value = stats.ttest_rel(sample1, sample2) """ - The samples represent the number of chocolate chips consumed by 10 rats. `sample1` represents consumption with stimulation, and `sample2` without simulation. - Hint: Use the ttest_rel function available in scipy. Parameters ---------- sample1 - list sample represents chocolate chips consumption with stimulation sample2 - list sample represents chocolate chips consumption without stimulation Returns ------- t_score : float t-score of t-test p_value: float p-value of t-test """ return t_score, p_value
LAB 6: Linear Regression 1.
Question 6: Welcome to Statistics with Python | 6 | Linear Regression 1.
Solution 6:
#!/bin/python3 #Write your code here # from collections.abc import KeysView # from nltk.lm import models import statsmodels.api as sm # import statsmodels.formula.api as smf import pandas as pd # import numpy as np # from statsmodels.stats import anova def build_lr(): # Task 1: Load the R dataset mtcars and capture the data as a pandas dataframe. # mtcars_dataset = sm.datasets.get_rdataset("mtcars", "datasets") mtcars_data = mtcars_dataset.data df = pd.DataFrame(mtcars_data) # Task 2: Build a linear regression model with independent variable `wt`, and dependent variable `mpg`. # df = df[['mpg','wt']] x = sm.add_constant(df['wt']) y = df['mpg'] model = sm.OLS(y, x).fit() # Task 3: Fit the model with data, and return the R-squared value as float. r_squared= float(model.rsquared) """ Returns ------- r_squared : float r-squared value of the trained linear regression model """ return r_squared
LAB 7: Linear Regression 2.
Question 7: Welcome toStatistics with Python | 7 | Linear Regression 2.
Solution 7:
#!/bin/python3 #Write your code here import statsmodels.api as sm import statsmodels.formula.api as smf import pandas as pd import numpy as np def build_lr(): # Task 1: Load the R dataset mtcars and capture the data as a pandas dataframe. mtcars_dataset = sm.datasets.get_rdataset("mtcars", "datasets") mtcars_data = mtcars_dataset.data df = pd.DataFrame(mtcars_data) # Task 2: Build a linear regression model with the log of independent variable `wt`, and log of dependent variable `mpg`. x = 'wt' y = 'mpg' model = smf.ols(formula= f'np.log({y}) ~ np.log({x})', data=mtcars_data).fit() # Task 3: Fit the model with data, and return the R-squared value as float. r_squared= float(model.rsquared) # it will also work. """ Returns ------- r_squared : float r-squared value of the trained linear regression model """ return r_squared
LAB 8: Exercise - Logistic Regression.
Question 8: Welcome to Statistics with Python | 8 | Logistic Regression.
Solution 8:
#!/bin/python3 #Write your code here import statsmodels.api as sm import statsmodels.formula.api as smf import pandas as pd def build_log_reg(): # Task 1: Load the R dataset biopsy from the MASS package and capture the data as a pandas dataframe. biopsy_dataset = sm.datasets.get_rdataset("biopsy", "MASS") biopsy_data = biopsy_dataset.data df = pd.DataFrame(biopsy_data) # Task 2: Rename the column name class to Class. df = df.rename(columns={'class': 'Class'}) # Task 3: Transform the Class column values benign and malignant to '0' and '1' respectively. df['Class'].replace(['benign','malignant'],[0,1] ,inplace=True) # Task 4: Build a logistic regression model with independent variable 'V1' and dependent variable 'Class'. model = smf.logit("Class ~ V1", data=df).fit() # Task 5: Fit the model with data, and return the pseudo R-squared value as float. r_squared= float(model.prsquared) """ Returns ------- r_squared : float r-squared value of the trained logistic regression model """ return r_squared
LAB 9: Exercise - Poisson Regression.
Question 9: Welcome to Statistics with Python | 9 | Poisson Regression.
Solution 9:
#!/bin/python3 #Write your code here import statsmodels.api as sm import statsmodels.formula.api as smf import numpy as np import pandas as pd def build_pos_reg(): # Task 1: Load the R dataset biopsy from the MASS package and capture the data as a pandas dataframe. Insurance_dataset = sm.datasets.get_rdataset("Insurance", "MASS") Insurance_data = Insurance_dataset.data df = pd.DataFrame(Insurance_data) # Task 2: Build a Poisson regression model with a log of an independent variable `Holders`, and dependent variable `Claims` Insurance_data['Holders_New'] = np.log(Insurance_data['Holders']) poisson_model = smf.poisson('Claims ~ Holders_New', Insurance_data).fit() # Task 3: Fit the model with data, and return the sum of the residuals as float. residuals_sum= float(np.sum(poisson_model.resid) ) """ Returns ------- residuals_sum : float sum of the residuals for the trained poission regression model """ return residuals_sum
LAB 10: Exercise - ANOVA 1.
Question 10: Welcome to Statistics with Python | 10 | ANOVA 1.
Solution 10:
#!/bin/python3 #Write your code here import statsmodels.api as sm import statsmodels.formula.api as smf from statsmodels.stats import anova import numpy as np import pandas as pd def build_anova(): # Task 1: Load the R dataset `mtcars`. and capture the data as a pandas dataframe. mtcars_dataset = sm.datasets.get_rdataset("mtcars", "datasets") mtcars_data = mtcars_dataset.data df = pd.DataFrame(mtcars_data) # Task 2: Build a linear regression model with independent variable `wt`, and dependent variable `mpg` mtcars_model = smf.ols('mpg ~ wt', mtcars_data).fit() # Task 3: Fit the model with data, and perform ANOVA on the linear model.(Hint:Use anova.anova_lm) f1_score= float(anova.anova_lm(mtcars_model).F["wt"]) """ - Return the F-statistic value as float. Returns ------- f1_score : float F-statistic value of the ANOVA model """ return f1_score
LAB 11: Exercise - ANOVA 2
Question 11: Welcome to Statistics with Python | 11 | ANOVA 2.
Solution 11:
#!/bin/python3 #Write your code here import statsmodels.api as sm import statsmodels.formula.api as smf from statsmodels.stats import anova import numpy as np import pandas as pd def build_anova(): # Task 1: Load the R dataset `mtcars`. and capture the data as a pandas dataframe. mtcars_dataset = sm.datasets.get_rdataset("mtcars", "datasets") mtcars_data = mtcars_dataset.data df = pd.DataFrame(mtcars_data) # Task 2: Build a linear regression model by considering the `log` of independent variable `wt`, and log of dependent variable mpg. model= smf.ols(formula='np.log(mpg) ~ np.log(wt)', data=df).fit() # Task 3: Fit the model with data, and perform ANOVA on the linear model. (Hint:Use anova.anova_lm) f1_score= float(anova.anova_lm(model).F["np.log(wt)"]) """ - Return the F-statistic value as a float Returns ------- f1_score : float F-statistic value of the ANOVA model """ return f1_score