Kindly Note: If you had any problems, please comment below.
LAB 1: Welcome to Python Pandas | 1 | Data Structures in Pandas
Problem 1: Pandas Data Structures - Hands-on
Solution 1: Pandas Data Structures |1|.
#!/bin/python3
# Write your code here
import pandas as pd
import numpy as np
#Task 1
# Create a Series named heights_A with values 176.2, 158.4, 167.6, 156.2 and 161.4.
# These values represent the height of 5 students of class A.
# * Label each student as s1, s2, s3, s4, s5.
# * Determine the shape of height_A and display it.
#
heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4])
heights_A.index = ['s1','s2','s3','s4','s5']
print(heights_A.shape)
# TASK 2
# Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9.
# * Label each students as s1, s2, s3, s4, s5.
# * Determine data type of values in weights_A and display it.
#
weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9])
weights_A.index = ['s1','s2','s3','s4','s5']
print(weights_A.dtype)
# TASK 3
# Create a dataframe named df_A, which contains the heights and weights of five students namely s1,s2,s3,s4,s5.
# * Label the Columns as Student_height and Student_weight, respectively.
# * Display the shape of df_A.
#
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
print(df_A.shape)
# TASK 4
# Create another series named heights_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 170.0 and standard derivation 25.0.
# * Set random seed to 100 before creating the heights_B series.
# Create another series named weights_B from 1-D numpy array of 5 elements derived from the normal distribution of mean 75.0 and standard deviation 12.0.
# * Set random seed to 100 before creating the weights_B series.
#
# Label both series elements as s1,s2,s3,s4,s5
# Print the mean of series heights_B.
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B = pd.Series(np.random.normal(loc = my_mean, scale = my_std, size = 5))
heights_B.index = ['s1','s2','s3','s4','s5']
my_mean1 = 75.0
my_std1 = 12.0
weights_B = pd.Series(np.random.normal(loc = my_mean1,scale = my_std1,size = 5))
weights_B.index = ['s1','s2','s3','s4','s5']
print(heights_B.mean())
# TASK 5
# Create a dataframe df_B containing the height and weight of students s1,s2,s3,s4,s5 belonging to class B.
# * Label the column as Student_height and Student_weight respectively.
# * Display the column names of db_B.
# * Use the heights_B and weights_B series created in the above tasks.
#
df_B = pd.DataFrame()
df_B['Student_height'] = heights_B
df_B['Student_weight'] = weights_B
print(df_B.columns)
LAB 2: Welcome to Python Pandas 2 Accessing Pandas Data Structures.
Problem 2: Access Elements in Data Structures.
Solution 2: Access Elements in Data Structures.
#!/bin/python3
#Write your code here
# Task 1:
# Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A.
# Label each student as s1, s2, s3, s4, s5.
# Print the second element of series heights_A.
#
#
import pandas as pd
import numpy as np
heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4])
heights_A.index = ['s1','s2','s3','s4','s5']
print(heights_A[1])
# Task 2
# Print the middle three elementsof series height_A.
#
print(heights_A[1:4])
# Task 3
#
# Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9.
# These values represent the Weight of 5 students of class A.
# Label each students as s1, s2, s3, s4, s5.
#
weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9])
weights_A.index= ['s1','s2','s3','s4','s5']
# Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5.
# * Label the Columns as Student_height and Student_weight, respectively.
# Select the column df_A referring to student heights, and store it in the variable height.
my_DataFrame={
"Student_height":heights_A,
"Student_weight":weights_A
}
df_A= pd.DataFrame(my_DataFrame)
height= df_A["Student_height"]
# Print the type of object of the variable height.
print(type(height))
# Task 4:
# Select the rows corresponding to students s1,s2 of df_A and capture them in another dataframe df_s1s2.
#
df_s1s2= df_A.loc[["s1","s2"]]
# Print the dataframe df_s1s2
print(df_s1s2)
# Task5
# Select the rows corresponding to students s1,s2 and s5 of df_A in the order s2,s5,s1, and capture them in another dataframe df_s2s5s1.
df_s2s5s1=df_A.loc[["s2","s5","s1"]]
# Print the dataframe df_s2s5s1
print(df_s2s5s1)
#Task 6
# Select those rows of df_A whose index values end with 1 or 4, and capture them in another dataframe df_s1s4.
df_s1s4= df_A[df_A.index.isin(["s1","s4"])]
# Print the dataframe df_s1s4
print(df_s1s4)
LAB 3: Welcome to Python Pandas | 3 | Working with CSV files
Problem 3: Working with CSVs - Hands-on
Solution 3: Working with CSVs |3|.
#!/bin/python3
#Write your code here
import pandas as pd
import numpy as np
# Task 1
# Create a Series named heights_A with values 176.2, 158.4, 167.6, 156.2 and 161.4.
# These values represent the Height of 5 students of class A.
# * Label each student as s1, s2, s3, s4, s5.
# Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9.
# These values represent the Weight of 5 students of class A.
# * Label each students as s1, s2, s3, s4, s5.
#
# Create a dataframe named df_A, which contains the heights and weights of five students namely s1,s2,s3,s4,s5.
# * Label the Columns as Student_height and Student_weight, respectively.
# * Write the Contents of df_A to a CSV file named classA.csv.
#
heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4])
heights_A.index = ['s1','s2','s3','s4','s5']
weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9])
weights_A.index= ['s1','s2','s3','s4','s5']
d={
"Student_height":heights_A,
"Student_weight":weights_A
}
df_A=pd.DataFrame(d)
df_A.to_csv('classA.csv')
# Task 2
# Read the contents of the CSV file, classA.csv into a dataframe named df_A2.
# Display the dataframe df_A2.
#
df_A2=pd.read_csv("classA.csv")
print(df_A2)
# Task 3
# Read the contentsof the CSV file classA.csv into a dataframe named df_A3, such that the first column data values are treated as index to df_A3.
# Display the dataframe df_A3.
#
df_A3=pd.read_csv("classA.csv", index_col=0)
print(df_A3)
# Task 4
# Create another series named height_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 170.0 and standard deviation 25.0.
# * Set random seed to 100 before creating heights_B series.
# Create another series named weight_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 75.0 and standard deviation 12.0.
# * Set random seed to 100 again before creating Weights_B series.
#
# Label both series elements as s1, s2, s3, s4, s5.
# Create a dataframe named df_B, which contains the heights and weights of five students s1,s2,s3,s4,s5 belonging to class B.
# * Label the Columns as Student_height and Student_weight, respectively.
# * Write the Contents of df_B without the index to a CSV file named classB.csv.
# * Display the contents of classB.csv using the command "cat classB.csv".
m=170.0
sd=25.0
np.random.seed(100)
heights_B= pd.Series(np.random.normal(loc=m, scale=sd,size=5))
heights_B.index=['s1','s2','s3','s4','s5']
m2=75.0
sd2=12.0
np.random.seed(100)
weights_B= pd.Series(np.random.normal(loc=m2, scale=sd2,size=5))
weights_B.index=['s1','s2','s3','s4','s5']
d2={
"Student_height":heights_B,
"Student_weight":weights_B
}
df_B= pd.DataFrame(d2)
df_B.to_csv('classB.csv' , index=False)
#print("classB.csv")
# Task 5
# Read the data from the classB.csv file into a dataframe df_B2.
# Display the dataframe df_B2.
#
df_B2=pd.read_csv("classB.csv")
print(df_B2)
# Task 6
# Read data from the classB.csv file into a dataframe df_B3, such that the argument header of read_csv is set to None. When the header is set to None, data is read assuming there is no header in the input CSV file.
# Display the dataframe df_B3.
#
df_B3=pd.read_csv("classB.csv", header=None)
print(df_B3)
# Task 7
# Read data from the classB.csv file into a dataframe df_d4, such that the argument header of read_csv is set to None, and argument skiprows is set to 2.
# Display the dataframe df_B4.
#
df_B4=pd.read_csv("classB.csv", header=None , skiprows=2)
print(df_B4)
LAB 4: Welcome to Python Pandas 4 Indexing Dataframes
Problem 4: Hands-on with Indexes.
Solution 4: Hands-on with Indexes |4|.
#!/bin/python3
#Write your code here
import pandas as pd
import numpy as np
# Task 1
# Create an index named dates representing a range of dates starting from 1-Sep-2017 to 15-Sep-2017.
# Print the 3rd element of the created DateTimeIndex.
DatetimeIndex=pd.date_range(start="09/01/2017", end="09/15/2017")
print(DatetimeIndex[2])
# Task 2:
# Convert the following date string into datetime objects:datelist = ['14-Sep-2017', '9-Sep-2017']
# Capture the result in the variable 'dates_to_be_searched' and print it.
datelist=['14-Sep-2017', '9-Sep-2017']
dates_to_be_searched=pd.to_datetime(datelist)
print(dates_to_be_searched)
# Task 3:
# Check if the element of 'dates_to_be_searched' are present in the DataTimeIndex, 'dates', which created above.
# Print the output.
print(dates_to_be_searched.isin(datelist))
# Task 4:
# Create a multi index named mi_index of two levels, represented in the following array arraylist:
# * arraylist= [["classA"]*5 + ["classB"]*5, ['s1','s2','s3','s4','s5']*2]
# Print levels of mi_index.
arraylist= [["classA"]*5 + ["classB"]*5, ['s1','s2','s3','s4','s5']*2]
mi_index=pd.MultiIndex.from_product(arraylist, names=["First Level", "Second Level"])
print(mi_index.levels)
LAB 5: Welcome to Python Pandas 5 Data Cleaning
Problem 5: Data Cleaning - Handson.
Solution 5: Data Cleaning - Handson |5|.
#!/bin/python3
#Write your code here
import pandas as pd
import numpy as np
# Task 1:
# Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A.
# Label each student as s1, s2, s3, s4, s5.
#
heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4])
heights_A.index = ['s1','s2','s3','s4','s5']
# Task 2:
# Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9.
# These values represent the Weight of 5 students of class A.
# * Label each students as s1, s2, s3, s4, s5.
#
weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9])
weights_A.index= ['s1','s2','s3','s4','s5']
# Task 3:
# Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5.
# * Label the Columns as Student_height and Student_weight, respectively.
# * Set height and weight values of student s3 to NaN.
# * Set the weight of s5 to NaN.
# * Drop the rows having null values in any of the columns, and assign the result to df_A2.
my_dataframe={
"Student_height":heights_A,
"Student_weight":weights_A
}
df_A=pd.DataFrame(my_dataframe)
df_A.loc["s3"] = np.nan
df_A.loc["s5"][1]=np.nan
df_A2= df_A.dropna(how="any") # df_A2= df_A.dropna(axis="rows")
# Task 4:
# Display the dataframe df_A2.
print(df_A2)
LAB 6: Welcome to Python Pandas 6 Data Aggregation
Problem 6: Data Aggregation - Handson.
Solution 6: Data Aggregation - Handson |6|.
#!/bin/python3
#Write your code here
import pandas as pd
import numpy as np
# Task 1:
# Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A.
# Label each student as s1, s2, s3, s4, s5.
#
heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4])
heights_A.index = ['s1','s2','s3','s4','s5']
# Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9.
# These values represent the Weight of 5 students of class A.
# * Label each students as s1, s2, s3, s4, s5.
#
weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9])
weights_A.index= ['s1','s2','s3','s4','s5']
# Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5.
# * Label the Columns as Student_height and Student_weight, respectively.
# * Filter the rows from df_A, whose Student_weight > 160.0 and Student_weight < 80.0, and capture them in another dataframe df_A_filter1.
# Print the dataframe df_A_filter1.
#
#
my_DataFrame={
"Student_height":heights_A,
"Student_weight":weights_A
}
df_A=pd.DataFrame(my_DataFrame)
df_A_filter1= df_A[(df_A.Student_height>160.0 ) &(df_A.Student_weight<80.0)]
print(df_A_filter1)
# Task 2:
# Filter the rows df_A whose index values end with 5, and capture them in another dataframe df_A_filter2.
# Print the dataframe df_A_filter2.
#
df_A_filter2= df_A[df_A.index.isin(["s5"])]
print(df_A_filter2)
# Task 3:
# Create a new column Gender in the dataframe df_A using the command: df_A['Gender'] =["M","F","M","M","F"].
# Group df_A based on Gender, and capture the result in df_groups.
# Print the mean height and weight of each group.
#
df_A["Gender"]=["M","F","M","M","F"]
df_groups= df_A.groupby("Gender")
print(df_groups.mean())
LAB 7: Welcome to Python Pandas 7 Data Merging 1.
Problem 7: Data Merge 1 - Hands-on.
Solution 7: Data Merge 1 - Hands-on.
#!/bin/python3
#Write your code here
import pandas as pd
import numpy as np
#
# Task 1:
#
# Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A.
# Label each student as s1, s2, s3, s4, s5.
heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4])
heights_A.index = ['s1','s2','s3','s4','s5']
# Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9.
# These values represent the Weight of 5 students of class A.
# * Label each students as s1, s2, s3, s4, s5.
#
weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9])
weights_A.index= ['s1','s2','s3','s4','s5']
# Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5.
# * Label the Columns as Student_height and Student_weight, respectively.
DataFrame_A={
"Student_height":heights_A,
"Student_weight":weights_A
}
# Add a new column Gender to the dataframe df_A with values ["M","F","M","M","F"].
df_A = pd.DataFrame(DataFrame_A)
df_A["Gender"] = ["M","F","M","M","F"]
# Create anothr Series s, from a list [165.4, 82.7, "F"].
# Provide the following values to the argument index: ['Student_height', 'Student_Weight', 'Gender']
# Set the value of the "name" argument of series, s, to s6.
s = pd.Series([165.4, 82.7, "F"], index=["Student_height", "Student_weight" ,"Gender" ], name="s6")
# Append the series s to the dadtaframe df_A, and store the captured array in df_AA.
# Display the dataframe df_AA.
df_AA=df_A.append(s)
print(df_AA)
#
# Task 2
#
# Create another series named heights_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 170.0 and standard derivation 25.0.
# Set random seed to 100 before creating the heights_B series.
mean_A=170.0
s_deviation_A=25.0
np.random.seed(100)
heights_B= pd.Series(np.random.normal(loc=mean_A, scale=s_deviation_A,size=5), index=['s1','s2','s3','s4','s5'])
# Create another series named weights_B from 1-D numpy array of 5 elements derived from the normal distribution of mean 75.0 and standard deviation 12.0.
# Set random seed to 100 before creating the weights_B series.
# Label both series elements as s1,s2,s3,s4,s5
mean_B=75.0
s_deviation_B=12.0
np.random.seed(100)
weights_B= pd.Series(np.random.normal(loc=mean_B, scale=s_deviation_B,size=5), index=['s1','s2','s3','s4','s5'])
# Create a dataframe df_B containing the height and weight of students s1,s2,s3,s4,s5 belonging to class B.
# Label the column as Student_height and Student_weight respectively.
DataFrame_B={
"Student_height":heights_B,
"Student_weight":weights_B
}
df_B=pd.DataFrame(DataFrame_B)
# Change the index of df_B to ["s7", "s8", "s9", "s10", "s11"].
# Create the Gender column in df_B with values ["F","M","F","F","M"]
df_B.index=["s7", "s8", "s9", "s10", "s11"]
df_B["Gender"]=["F","M","F","F","M"]
# Concatenate two dataframes df_AA and df_B, and assign the result to df.
# Display the dataframe df.
#
df=pd.concat([df_AA,df_B])
print(df)
LAB 8: Welcome to Python Pandas 8 Data Merging 2.
Problem 8: Data Merge - Hands-on 2.
Solution 8: Data Merge - Hands-on 2.
#!/bin/python3
#Write your code here
import pandas as pd
import numpy as np
#
# Task 1:
# Create the following two Series: 'nameid = pd.Series(range(101,111))' and 'name = pd.Series(['persion'+ str(i) for i in range(1, 11)])'
#
nameid=pd.Series(range(101,111))
name=pd.Series([ "person"+str(i) for i in range(1,11)])
# Create the dataframe master with series nameid and name respectively.
my_DataFrame={
"nameid":nameid,
"name":name
}
master=pd.DataFrame(my_DataFrame)
# Create the dataframe transaction using the command: 'transation = pd.DataFrame({ "nameid": [108,108,108,103], "product": ['iPhone','Nokia','Micromax','Vivo']})'
transaction=pd.DataFrame({ "nameid": [108,108,108,103], "product": ['iPhone','Nokia','Micromax','Vivo']})
# Merge 'master' with 'transaction' on 'nameid' and save the marged dataframes as 'mdf'. Perform inner join.
# 'mdf' should have the columns in this order : 'nameid', 'name', 'product'.
mdf= pd.merge(master ,transaction, on="nameid")
# Display 'mdf'.
print(mdf)