Turing Machine Data Scientist Program: Use-case 4
Statistical Inference - Mini Project
# Compulsory to run below code
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
Question 1: Suppose a variable X has a bell-shaped distribution with a mean of 150 and a standard deviation of 20.
Solution 1: Suppose a variable X has a bell-shaped distribution with a mean of 150 and a standard deviation of 20.
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
#Task 1:
# Suppose a variable X has a bell-shaped distribution with a mean of 150 and a standard deviation of 20.
# a. What percentage of X values lies between 130 and 170?
# b. What percentage of X values lies between 110 and 190?
# c. What percentage of X values lies above 190?
mean= 150
std_dev=20
lies_Lower= [130,110]
lies_Upper= [170,190]
lies_Lower_percent = [sts.norm.cdf(i, loc=mean, scale=std_dev) for i in lies_Lower]
lies_Upper_percent = [sts.norm.cdf(i, loc=mean, scale=std_dev) for i in lies_Upper]
percentage = [ (lies_Upper_percent[i]-lies_Lower_percent[i]) * 100 for i in range(len(lies_Lower_percent))]
cdf_190 = sts.norm.cdf(190, loc=mean, scale=std_dev)
percentage_above_190 = (1 - cdf_190) * 100
print(percentage)
print(percentage_above_190)
### Assign your answers here
a_1 = math.floor(round(percentage[0],2))
b_1 = math.floor(round(percentage[1],2))
c_1 =round(percentage_above_190,2)
### For evalution
ans_1 = {"a_1":math.floor(a_1),
"b_1": (b_1),
"c_1" : c_1}
quiz.eval(1, ans_1)
Question 2: Variable X has a mean of 15 and a standard deviation of 2
Solution 2: Variable X has a mean of 15 and a standard deviation of 2
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
import numpy as np
import math
import scipy.stats as sts
# Task2: Variable X has a mean of 15 and a standard deviation of 2.
# a. What percentage of X values will lie within 1.5 standard deviation of the mean?
# b. What is the minimum percentage of X values that lie between 8 and 17?
mean= 15
std_dev=2
z_upper = 1.5
z_lower = -1.5
# Compute cumulative probabilities
prob_upper = sts.norm.cdf(z_upper)
prob_lower = sts.norm.cdf(z_lower)
# a. Percentage within 1.5 standard deviations
a = round((prob_upper - prob_lower) * 100, 2)
# b. Minimum percentage of X values between 8 and 17 using the normal distribution
prob_8 = sts.norm.cdf((8 - mean) / std_dev) # Z-score for 8
prob_17 = sts.norm.cdf((17 - mean) / std_dev) # Z-score for 17
# Maximum percentage between 8 and 17
b =round((prob_17 - prob_8) * 100, 2)
print(a, b)
### Assign your answers here
a_2 = a
b_2 = b
### For evalution
ans_2 = {"a_2":a_2, "b_2": b_2}
quiz.eval(2, ans_2)
Question 3: What is the 25 percentile of the below samples
Solution 3: What is the 25 percentile of the below samples
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
# Task 3: What is the 25 percentile of the below samples
# [3.09, 2.48, 2.02, 2.98, 3.53, 2.41, 2.01, 2.95, 2.63, 3.09, 3.26, 2.04, 3.74, 2.99, 2.34, 2.77, 3.05, 3.29, 3.14, 3.17]
samples= [3.09, 2.48, 2.02, 2.98, 3.53, 2.41, 2.01, 2.95, 2.63, 3.09, 3.26, 2.04, 3.74, 2.99, 2.34, 2.77, 3.05, 3.29, 3.14, 3.17]
# a_3 = (stats.percentileofscore(data, 25, kind='strict')) #retuen the percentage
a_3 = round(np.percentile(samples, 25), 2) #return the value
### Assign your answers here
data= [3.09, 2.48, 2.02, 2.98, 3.53, 2.41, 2.01, 2.95, 2.63, 3.09, 3.26, 2.04, 3.74, 2.99, 2.34, 2.77, 3.05, 3.29, 3.14, 3.17]
a_3 = a_3
###For evaluation
ans_3 = {"a_3": a_3}
print(ans_3)
quiz.eval(3, ans_3)
Question 4: Suppose a marble is randomly selected from a jar containing 12 red, 4 black, and 8 blue marbles. Find the probability of the following.
Solution 4: Suppose a marble is randomly selected from a jar containing 12 red, 4 black, and 8 blue marbles. Find the probability of the following.
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
# Task 4: Suppose a marble is randomly selected from a jar containing 12 red, 4 black, and 8 blue marbles. Find the probability of the following:
# a. The marble is red or black
# b. The marble is black or blue
# c. The marble is not blue
# d. The marble is red or not blue
red = 12
black = 4
blue = 8
total_marbles = sum([red , black , blue])
#Probability of red or black
prob_red_or_black = round((red + black) / total_marbles, 2)
#Probability of black or blue
prob_black_or_blue = round((black + blue) / total_marbles, 2)
#Probability of not blue
prob_not_blue = round((red + black) / total_marbles, 2)
#Probability of red or not blue
prob_red_or_not_blue = round((red + black) / total_marbles, 2)
### Assign your answers here
a_4 = prob_red_or_black
b_4 = prob_black_or_blue
c_4 = prob_not_blue
d_4 = prob_red_or_not_blue
### For evalution
ans_4 = {"a_4":a_4, "b_4": b_4, "c_4" : c_4, "d_4": d_4}
quiz.eval(4, ans_4)
Question 5: Let A and B be events with P(A) = 0.2, P(B) = 0.8, and P(A ∩ B) = 0.1 Find the following-
Solution 5: Let A and B be events with P(A) = 0.2, P(B) = 0.8, and P(A ∩ B) = 0.1 Find the following-
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
# Task 5: Let A and B be events with P(A) = 0.2, P(B) = 0.8, and P(A ∩ B) = 0.1 Find the following
# a. 𝑃(𝐵¯)
# b. 𝑃(𝐴¯∩𝐵¯)
# c. 𝑃(𝐵¯ | 𝐴)
# d. 𝑃(𝐴¯∩𝐵)
# Events A and B with given probabilities
P_A = 0.2
P_B = 0.8
P_A_and_B = 0.1
# P(B')
P_B_complement = round(1 - P_B ,2)
# P(A' ∩ B')
P_A_complement_and_B_complement = round(1 - (P_A + P_B - P_A_and_B) , 2)
# P(B' | A)
P_B_complement_given_A = round((P_A - P_A_and_B) / P_A , 2)
# P(A' ∩ B)
P_A_complement_and_B = round(P_B - P_A_and_B,2)
print({P_B_complement})
print( {P_A_complement_and_B_complement})
print( {P_B_complement_given_A})
print( {P_A_complement_and_B})
### Assign your answers here
a_5 = P_B_complement
b_5 = P_A_complement_and_B_complement
c_5 = P_B_complement_given_A
d_5 = P_A_complement_and_B
### For evalution
ans_5 = {"a_5":a_5, "b_5": b_5, "c_5" : c_5, "d_5": d_5}
quiz.eval(5, ans_5)
Question 6: Given a sample of size n = 60 taken from a continuous population distribution with mean 56 and standard deviation 25, find the variance of the sample mean.
Solution 6: Given a sample of size n = 60 taken from a continuous population distribution with mean 56 and standard deviation 25, find the variance of the sample mean.
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
#Task 6: Given a sample of size n = 60 taken from a continuous population distribution with mean 56 and standard deviation 25, find the variance of the sample mean.
population_variance = 25**2 # Standard deviation squared
sample_size = 60
ans_6 = round(population_variance/sample_size, 2)
### Assign your answers here
variance = ans_6
### For evalution
ans_6 = {"variance": variance}
quiz.eval(6, ans_6)
Question 7: 55% of all engineering students prefer internship over final year project. Suppose 12 students are randomly selected and the number in favor of internship is recorded. Find the following:
Solution 7: 55% of all engineering students prefer internship over final year project. Suppose 12 students are randomly selected and the number in favor of internship is recorded. Find the following:
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
# Task 7: 55% of all engineering students prefer internship over final year project. Suppose 12 students are randomly selected and the number in favor of internship is recorded. # Find the following:
# a. The probability that exactly seven of them choose internship.
# b. The probability that at most eight of them choose internship.
# c. The probability that at least five of them choose internship.
# d. The probability that at least seven, but no more than 10, choose internship.
n_students = 12
p_internship = 0.55 #
# The probability that exactly seven of them choose internship
prob_exactly_7 = round(sts.binom.pmf(7, n_students, p_internship), 2)
# The probability that at most eight of them choose internship
prob_at_most_8 = round(sts.binom.cdf(8, n_students, p_internship), 2)
# The probability that at least five of them choose internship
prob_at_least_5 = round(1 - sts.binom.cdf(4, n_students, p_internship),2)
# The probability that at least seven, but no more than 10, choose internship
prob_at_least_7_and_at_most_10 = round(sts.binom.cdf(10, n_students, p_internship) - sts.binom.cdf(6, n_students, p_internship),2)
print({prob_exactly_7})
print({prob_at_most_8})
print( {prob_at_least_5})
print({prob_at_least_7_and_at_most_10})
### Assign your answers here
a_7 = prob_exactly_7
b_7 = prob_at_most_8
c_7 = prob_at_least_5
d_7 = prob_at_least_7_and_at_most_10
### For evalution
ans_7 = {"a_7":a_7, "b_7": b_7, "c_7" : c_7, "d_7": d_7}
quiz.eval(7, ans_7)
Question 8: Suppose the population variable X is N(3, 0.3) and n = 20. How large an interval must be chosen so that the probability is 0.95 that the sample mean 𝑋¯ lies within ±a units of the population mean μ?
Solution 8: Suppose the population variable X is N(3, 0.3) and n = 20. How large an interval must be chosen so that the probability is 0.95 that the sample mean 𝑋¯ lies within ±a units of the population mean μ?
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
# Task 8: Suppose the population variable X is N(3, 0.3) and n = 20. How large an interval must be chosen so that the probability is 0.95 that the sample mean 𝑋¯ lies within ±a units of the population mean μ?
# Parameters for Normal Distribution
mu = 3 # Population mean
sigma = 0.3 # Population standard deviation
alpha = 0.95 # Total confidence level
# interval where the sample mean lies within ± 𝑎 units around the population mean μ with 95% confidence.
a, b = sts.norm.interval(alpha=0.475, loc=mu, scale=sigma)
# Calculate the width of the interval
width = round(b - a, 2)
print(width)
### Assign your answers here
interval = width
### For evalution
ans_8 = {"interval":np.around(interval, 1)}
quiz.eval(8, ans_8)
Question 9: A random variable X is N(25, 4). Find the indicated percentile for X:-
Solution 9: A random variable X is N(25, 4). Find the indicated percentile for X:-
import numpy as np
import math
import scipy.stats as sts
import dill
Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))
# Task 9: A random variable X is N(25, 4). Find the indicated percentile for X:
# a. The 10th percentile
# b. The 90th percentile
# c. The 80th percentile
# d. The 50th percentile
PLEASE TRY TO SOLVE BY OWN