Mini-Project for Data Scientist - Statistical Inference Use-case 4 Fresco Play Hands on Solution Hacker Rank

Learn how to implement Statistical Inference concepts in Python using Scipy Stats module. Normal Distribution, event Draws, Random selection etc.
Mini-Project for Data Scientist - Statistical Inference Use-case 4 Fresco Play Hands on Solution Hacker Rank - www.pdfcup.com

Turing Machine Data Scientist Program: Use-case 4
Statistical Inference - Mini Project


# Compulsory to run below code

import numpy as np
import math
import scipy.stats as sts
import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

Question 1: Suppose a variable X has a bell-shaped distribution with a mean of 150 and a standard deviation of 20.

Solution 1: Suppose a variable X has a bell-shaped distribution with a mean of 150 and a standard deviation of 20.


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

#Task 1:
# Suppose a variable X has a bell-shaped distribution with a mean of 150 and a standard deviation of 20.
# a. What percentage of X values lies between 130 and 170?
# b. What percentage of X values lies between 110 and 190?
# c. What percentage of X values lies above 190?


mean= 150
std_dev=20
lies_Lower= [130,110]
lies_Upper= [170,190]
lies_Lower_percent = [sts.norm.cdf(i, loc=mean, scale=std_dev) for i in lies_Lower]
lies_Upper_percent = [sts.norm.cdf(i, loc=mean, scale=std_dev) for i in lies_Upper]

percentage =   [ (lies_Upper_percent[i]-lies_Lower_percent[i]) * 100 for i in range(len(lies_Lower_percent))]

cdf_190 = sts.norm.cdf(190, loc=mean, scale=std_dev)  
percentage_above_190 = (1 - cdf_190) * 100

print(percentage)
print(percentage_above_190)



### Assign your answers here
a_1 = math.floor(round(percentage[0],2))
b_1 =   math.floor(round(percentage[1],2))
c_1 =round(percentage_above_190,2)

### For evalution
ans_1 = {"a_1":math.floor(a_1),
         "b_1": (b_1),
         "c_1" : c_1}
quiz.eval(1, ans_1)

Question 2: Variable X has a mean of 15 and a standard deviation of 2

Solution 2: Variable X has a mean of 15 and a standard deviation of 2


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

import numpy as np
import math
import scipy.stats as sts

# Task2: Variable X has a mean of 15 and a standard deviation of 2.
# a. What percentage of X values will lie within 1.5 standard deviation of the mean?
# b. What is the minimum percentage of X values that lie between 8 and 17?

mean= 15
std_dev=2

z_upper = 1.5
z_lower = -1.5

# Compute cumulative probabilities
prob_upper = sts.norm.cdf(z_upper)
prob_lower = sts.norm.cdf(z_lower)

# a. Percentage within 1.5 standard deviations
a = round((prob_upper - prob_lower) * 100, 2)

# b. Minimum percentage of X values between 8 and 17 using the normal distribution

prob_8 = sts.norm.cdf((8 - mean) / std_dev) # Z-score for 8
prob_17 = sts.norm.cdf((17 - mean) / std_dev)  # Z-score for 17

# Maximum percentage between 8 and 17
b =round((prob_17 - prob_8) * 100, 2)

print(a, b)


### Assign your answers here
a_2 = a
b_2 = b

### For evalution
ans_2 = {"a_2":a_2, "b_2": b_2}
quiz.eval(2, ans_2)

Question 3: What is the 25 percentile of the below samples

Solution 3: What is the 25 percentile of the below samples


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

# Task 3: What is the 25 percentile of the below samples
# [3.09, 2.48, 2.02, 2.98, 3.53, 2.41, 2.01, 2.95, 2.63, 3.09, 3.26, 2.04, 3.74, 2.99, 2.34, 2.77, 3.05, 3.29, 3.14, 3.17]

samples= [3.09, 2.48, 2.02, 2.98, 3.53, 2.41, 2.01, 2.95, 2.63, 3.09, 3.26, 2.04, 3.74, 2.99, 2.34, 2.77, 3.05, 3.29, 3.14, 3.17]
# a_3 = (stats.percentileofscore(data, 25, kind='strict')) #retuen the percentage 
a_3 = round(np.percentile(samples, 25), 2) #return the value

### Assign your answers here
data= [3.09, 2.48, 2.02, 2.98, 3.53, 2.41, 2.01, 2.95, 2.63, 3.09, 3.26, 2.04, 3.74, 2.99, 2.34, 2.77, 3.05, 3.29, 3.14, 3.17]
a_3 = a_3

###For evaluation
ans_3 = {"a_3": a_3}
print(ans_3)
quiz.eval(3, ans_3)

Question 4: Suppose a marble is randomly selected from a jar containing 12 red, 4 black, and 8 blue marbles. Find the probability of the following.

Solution 4: Suppose a marble is randomly selected from a jar containing 12 red, 4 black, and 8 blue marbles. Find the probability of the following.


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

# Task 4: Suppose a marble is randomly selected from a jar containing 12 red, 4 black, and 8 blue marbles. Find the probability of the following:
# a. The marble is red or black
# b. The marble is black or blue
# c. The marble is not blue
# d. The marble is red or not blue

red   = 12
black = 4
blue  = 8
total_marbles = sum([red , black , blue])

#Probability of red or black
prob_red_or_black = round((red + black) / total_marbles, 2)

#Probability of black or blue
prob_black_or_blue = round((black + blue) / total_marbles, 2)

#Probability of not blue
prob_not_blue = round((red + black) / total_marbles, 2)

#Probability of red or not blue
prob_red_or_not_blue = round((red + black) / total_marbles, 2)


### Assign your answers here
a_4 = prob_red_or_black
b_4 = prob_black_or_blue
c_4 = prob_not_blue
d_4 = prob_red_or_not_blue

### For evalution
ans_4 = {"a_4":a_4, "b_4": b_4, "c_4" : c_4, "d_4": d_4}
quiz.eval(4, ans_4)

Question 5: Let A and B be events with P(A) = 0.2, P(B) = 0.8, and P(A ∩ B) = 0.1 Find the following-

Solution 5: Let A and B be events with P(A) = 0.2, P(B) = 0.8, and P(A ∩ B) = 0.1 Find the following-


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

# Task 5: Let A and B be events with P(A) = 0.2, P(B) = 0.8, and P(A ∩ B) = 0.1 Find the following
# a.  𝑃(𝐵¯)
# b.  𝑃(𝐴¯∩𝐵¯)
# c.  𝑃(𝐵¯ | 𝐴)
# d.  𝑃(𝐴¯∩𝐵)

# Events A and B with given probabilities
P_A = 0.2
P_B = 0.8
P_A_and_B = 0.1

# P(B')
P_B_complement = round(1 - P_B ,2)

# P(A' ∩ B')
P_A_complement_and_B_complement = round(1 - (P_A + P_B - P_A_and_B) , 2)

# P(B' | A)
P_B_complement_given_A = round((P_A - P_A_and_B) / P_A  , 2)

# P(A' ∩ B)
P_A_complement_and_B = round(P_B - P_A_and_B,2)

print({P_B_complement})
print( {P_A_complement_and_B_complement})
print( {P_B_complement_given_A})
print( {P_A_complement_and_B})


### Assign your answers here
a_5 = P_B_complement
b_5 = P_A_complement_and_B_complement
c_5 = P_B_complement_given_A
d_5 = P_A_complement_and_B

### For evalution
ans_5 = {"a_5":a_5, "b_5": b_5, "c_5" : c_5, "d_5": d_5}

quiz.eval(5, ans_5)

Question 6: Given a sample of size n = 60 taken from a continuous population distribution with mean 56 and standard deviation 25, find the variance of the sample mean.

Solution 6: Given a sample of size n = 60 taken from a continuous population distribution with mean 56 and standard deviation 25, find the variance of the sample mean.


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

#Task 6: Given a sample of size n = 60 taken from a continuous population distribution with mean 56 and standard deviation 25, find the variance of the sample mean.

population_variance = 25**2  # Standard deviation squared
sample_size = 60
ans_6 = round(population_variance/sample_size, 2)


### Assign your answers here
variance = ans_6

### For evalution
ans_6 = {"variance": variance}
quiz.eval(6, ans_6)

Question 7: 55% of all engineering students prefer internship over final year project. Suppose 12 students are randomly selected and the number in favor of internship is recorded. Find the following:

Solution 7: 55% of all engineering students prefer internship over final year project. Suppose 12 students are randomly selected and the number in favor of internship is recorded. Find the following:


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

# Task 7: 55% of all engineering students prefer internship over final year project. Suppose 12 students are randomly selected and the number in favor of internship is recorded. # Find the following:
# a. The probability that exactly seven of them choose internship.
# b. The probability that at most eight of them choose internship.
# c. The probability that at least five of them choose internship.
# d. The probability that at least seven, but no more than 10, choose internship.


n_students = 12   
p_internship = 0.55  # 

# The probability that exactly seven of them choose internship
prob_exactly_7 = round(sts.binom.pmf(7, n_students, p_internship), 2)

# The probability that at most eight of them choose internship
prob_at_most_8 = round(sts.binom.cdf(8, n_students, p_internship), 2)

# The probability that at least five of them choose internship
prob_at_least_5 = round(1 - sts.binom.cdf(4, n_students, p_internship),2)

# The probability that at least seven, but no more than 10, choose internship
prob_at_least_7_and_at_most_10 = round(sts.binom.cdf(10, n_students, p_internship) - sts.binom.cdf(6, n_students, p_internship),2)

print({prob_exactly_7})
print({prob_at_most_8})
print( {prob_at_least_5})
print({prob_at_least_7_and_at_most_10})



### Assign your answers here
a_7 = prob_exactly_7
b_7 = prob_at_most_8
c_7 = prob_at_least_5
d_7 = prob_at_least_7_and_at_most_10

### For evalution
ans_7 = {"a_7":a_7, "b_7": b_7, "c_7" : c_7, "d_7": d_7}

quiz.eval(7, ans_7)

Question 8: Suppose the population variable X is N(3, 0.3) and n = 20. How large an interval must be chosen so that the probability is 0.95 that the sample mean 𝑋¯ lies within ±a units of the population mean μ?

Solution 8: Suppose the population variable X is N(3, 0.3) and n = 20. How large an interval must be chosen so that the probability is 0.95 that the sample mean 𝑋¯ lies within ±a units of the population mean μ?


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

# Task 8: Suppose the population variable X is N(3, 0.3) and n = 20. How large an interval must be chosen so that the probability is 0.95 that the sample mean  𝑋¯ lies within ±a units of the population mean μ?

# Parameters for Normal Distribution
mu = 3  # Population mean
sigma = 0.3  # Population standard deviation
alpha = 0.95  # Total confidence level

# interval where the sample mean lies within ± 𝑎  units around the population mean μ with 95% confidence.
a, b = sts.norm.interval(alpha=0.475, loc=mu, scale=sigma)

# Calculate the width of the interval
width = round(b - a, 2)
print(width)


### Assign your answers here
interval = width

### For evalution
ans_8 = {"interval":np.around(interval, 1)}
quiz.eval(8, ans_8)

Question 9: A random variable X is N(25, 4). Find the indicated percentile for X:-

Solution 9: A random variable X is N(25, 4). Find the indicated percentile for X:-


import numpy as np
import math
import scipy.stats as sts

import dill

Quiz = dill.load(open("QuizClass.pik", 'rb'))
quiz = dill.load(open('Quiz.pik', 'rb'))

# Task 9: A random variable X is N(25, 4). Find the indicated percentile for X:
# a. The 10th percentile
# b. The 90th percentile
# c. The 80th percentile
# d. The 50th percentile

PLEASE TRY TO SOLVE BY OWN

About the author

D Shwari
I'm a professor at National University's Department of Computer Science. My main streams are data science and data analysis. Project management for many computer science-related sectors. Next working project on Al with deep Learning.....

1 comment

  1. Anonymous
    ### Assign your answers here
    import scipy.stats as sts

    mu = 25
    sigma = 4 # !!???



    def ceil2(x):
    import math
    return math.ceil(x * 100) / 100

    def trunc2(x):
    return np.floor(x * 100) / 100

    a_9 = ceil2(sts.norm.ppf(0.10, loc=mu, scale=sigma)) # 19.88
    b_9 = trunc2(sts.norm.ppf(0.90, loc=mu, scale=sigma)) # 30.12
    c_9 = trunc2(sts.norm.ppf(0.80, loc=mu, scale=sigma)) # 28.36
    d_9 = round(sts.norm.ppf(0.50, loc=mu, scale=sigma), 2) # 25.00

    print({'a_9': a_9, 'b_9': b_9, 'c_9': c_9, 'd_9': d_9})

    ### For evalution
    ans_9 = {"a_9":a_9, "b_9": b_9, "c_9" : c_9, "d_9": d_9}
    quiz.eval(9, ans_9)