Kindly Note: If you had any problems, please comment below.
LAB 1: Welcome to Python Pandas | 1 | Data Structures in Pandas
Problem 1: Pandas Data Structures - Hands-on
Solution 1: Pandas Data Structures |1|.
#!/bin/python3 # Write your code here import pandas as pd import numpy as np #Task 1 # Create a Series named heights_A with values 176.2, 158.4, 167.6, 156.2 and 161.4. # These values represent the height of 5 students of class A. # * Label each student as s1, s2, s3, s4, s5. # * Determine the shape of height_A and display it. # heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4]) heights_A.index = ['s1','s2','s3','s4','s5'] print(heights_A.shape) # TASK 2 # Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9. # * Label each students as s1, s2, s3, s4, s5. # * Determine data type of values in weights_A and display it. # weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9]) weights_A.index = ['s1','s2','s3','s4','s5'] print(weights_A.dtype) # TASK 3 # Create a dataframe named df_A, which contains the heights and weights of five students namely s1,s2,s3,s4,s5. # * Label the Columns as Student_height and Student_weight, respectively. # * Display the shape of df_A. # df_A = pd.DataFrame() df_A['Student_height'] = heights_A df_A['Student_weight'] = weights_A print(df_A.shape) # TASK 4 # Create another series named heights_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 170.0 and standard derivation 25.0. # * Set random seed to 100 before creating the heights_B series. # Create another series named weights_B from 1-D numpy array of 5 elements derived from the normal distribution of mean 75.0 and standard deviation 12.0. # * Set random seed to 100 before creating the weights_B series. # # Label both series elements as s1,s2,s3,s4,s5 # Print the mean of series heights_B. my_mean = 170.0 my_std = 25.0 np.random.seed(100) heights_B = pd.Series(np.random.normal(loc = my_mean, scale = my_std, size = 5)) heights_B.index = ['s1','s2','s3','s4','s5'] my_mean1 = 75.0 my_std1 = 12.0 weights_B = pd.Series(np.random.normal(loc = my_mean1,scale = my_std1,size = 5)) weights_B.index = ['s1','s2','s3','s4','s5'] print(heights_B.mean()) # TASK 5 # Create a dataframe df_B containing the height and weight of students s1,s2,s3,s4,s5 belonging to class B. # * Label the column as Student_height and Student_weight respectively. # * Display the column names of db_B. # * Use the heights_B and weights_B series created in the above tasks. # df_B = pd.DataFrame() df_B['Student_height'] = heights_B df_B['Student_weight'] = weights_B print(df_B.columns)
LAB 2: Welcome to Python Pandas 2 Accessing Pandas Data Structures.
Problem 2: Access Elements in Data Structures.
Solution 2: Access Elements in Data Structures.
#!/bin/python3 #Write your code here # Task 1: # Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A. # Label each student as s1, s2, s3, s4, s5. # Print the second element of series heights_A. # # import pandas as pd import numpy as np heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4]) heights_A.index = ['s1','s2','s3','s4','s5'] print(heights_A[1]) # Task 2 # Print the middle three elementsof series height_A. # print(heights_A[1:4]) # Task 3 # # Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9. # These values represent the Weight of 5 students of class A. # Label each students as s1, s2, s3, s4, s5. # weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9]) weights_A.index= ['s1','s2','s3','s4','s5'] # Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5. # * Label the Columns as Student_height and Student_weight, respectively. # Select the column df_A referring to student heights, and store it in the variable height. my_DataFrame={ "Student_height":heights_A, "Student_weight":weights_A } df_A= pd.DataFrame(my_DataFrame) height= df_A["Student_height"] # Print the type of object of the variable height. print(type(height)) # Task 4: # Select the rows corresponding to students s1,s2 of df_A and capture them in another dataframe df_s1s2. # df_s1s2= df_A.loc[["s1","s2"]] # Print the dataframe df_s1s2 print(df_s1s2) # Task5 # Select the rows corresponding to students s1,s2 and s5 of df_A in the order s2,s5,s1, and capture them in another dataframe df_s2s5s1. df_s2s5s1=df_A.loc[["s2","s5","s1"]] # Print the dataframe df_s2s5s1 print(df_s2s5s1) #Task 6 # Select those rows of df_A whose index values end with 1 or 4, and capture them in another dataframe df_s1s4. df_s1s4= df_A[df_A.index.isin(["s1","s4"])] # Print the dataframe df_s1s4 print(df_s1s4)
LAB 3: Welcome to Python Pandas | 3 | Working with CSV files
Problem 3: Working with CSVs - Hands-on
Solution 3: Working with CSVs |3|.
#!/bin/python3 #Write your code here import pandas as pd import numpy as np # Task 1 # Create a Series named heights_A with values 176.2, 158.4, 167.6, 156.2 and 161.4. # These values represent the Height of 5 students of class A. # * Label each student as s1, s2, s3, s4, s5. # Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9. # These values represent the Weight of 5 students of class A. # * Label each students as s1, s2, s3, s4, s5. # # Create a dataframe named df_A, which contains the heights and weights of five students namely s1,s2,s3,s4,s5. # * Label the Columns as Student_height and Student_weight, respectively. # * Write the Contents of df_A to a CSV file named classA.csv. # heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4]) heights_A.index = ['s1','s2','s3','s4','s5'] weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9]) weights_A.index= ['s1','s2','s3','s4','s5'] d={ "Student_height":heights_A, "Student_weight":weights_A } df_A=pd.DataFrame(d) df_A.to_csv('classA.csv') # Task 2 # Read the contents of the CSV file, classA.csv into a dataframe named df_A2. # Display the dataframe df_A2. # df_A2=pd.read_csv("classA.csv") print(df_A2) # Task 3 # Read the contentsof the CSV file classA.csv into a dataframe named df_A3, such that the first column data values are treated as index to df_A3. # Display the dataframe df_A3. # df_A3=pd.read_csv("classA.csv", index_col=0) print(df_A3) # Task 4 # Create another series named height_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 170.0 and standard deviation 25.0. # * Set random seed to 100 before creating heights_B series. # Create another series named weight_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 75.0 and standard deviation 12.0. # * Set random seed to 100 again before creating Weights_B series. # # Label both series elements as s1, s2, s3, s4, s5. # Create a dataframe named df_B, which contains the heights and weights of five students s1,s2,s3,s4,s5 belonging to class B. # * Label the Columns as Student_height and Student_weight, respectively. # * Write the Contents of df_B without the index to a CSV file named classB.csv. # * Display the contents of classB.csv using the command "cat classB.csv". m=170.0 sd=25.0 np.random.seed(100) heights_B= pd.Series(np.random.normal(loc=m, scale=sd,size=5)) heights_B.index=['s1','s2','s3','s4','s5'] m2=75.0 sd2=12.0 np.random.seed(100) weights_B= pd.Series(np.random.normal(loc=m2, scale=sd2,size=5)) weights_B.index=['s1','s2','s3','s4','s5'] d2={ "Student_height":heights_B, "Student_weight":weights_B } df_B= pd.DataFrame(d2) df_B.to_csv('classB.csv' , index=False) #print("classB.csv") # Task 5 # Read the data from the classB.csv file into a dataframe df_B2. # Display the dataframe df_B2. # df_B2=pd.read_csv("classB.csv") print(df_B2) # Task 6 # Read data from the classB.csv file into a dataframe df_B3, such that the argument header of read_csv is set to None. When the header is set to None, data is read assuming there is no header in the input CSV file. # Display the dataframe df_B3. # df_B3=pd.read_csv("classB.csv", header=None) print(df_B3) # Task 7 # Read data from the classB.csv file into a dataframe df_d4, such that the argument header of read_csv is set to None, and argument skiprows is set to 2. # Display the dataframe df_B4. # df_B4=pd.read_csv("classB.csv", header=None , skiprows=2) print(df_B4)
LAB 4: Welcome to Python Pandas 4 Indexing Dataframes
Problem 4: Hands-on with Indexes.
Solution 4: Hands-on with Indexes |4|.
#!/bin/python3 #Write your code here import pandas as pd import numpy as np # Task 1 # Create an index named dates representing a range of dates starting from 1-Sep-2017 to 15-Sep-2017. # Print the 3rd element of the created DateTimeIndex. DatetimeIndex=pd.date_range(start="09/01/2017", end="09/15/2017") print(DatetimeIndex[2]) # Task 2: # Convert the following date string into datetime objects:datelist = ['14-Sep-2017', '9-Sep-2017'] # Capture the result in the variable 'dates_to_be_searched' and print it. datelist=['14-Sep-2017', '9-Sep-2017'] dates_to_be_searched=pd.to_datetime(datelist) print(dates_to_be_searched) # Task 3: # Check if the element of 'dates_to_be_searched' are present in the DataTimeIndex, 'dates', which created above. # Print the output. print(dates_to_be_searched.isin(datelist)) # Task 4: # Create a multi index named mi_index of two levels, represented in the following array arraylist: # * arraylist= [["classA"]*5 + ["classB"]*5, ['s1','s2','s3','s4','s5']*2] # Print levels of mi_index. arraylist= [["classA"]*5 + ["classB"]*5, ['s1','s2','s3','s4','s5']*2] mi_index=pd.MultiIndex.from_product(arraylist, names=["First Level", "Second Level"]) print(mi_index.levels)
LAB 5: Welcome to Python Pandas 5 Data Cleaning
Problem 5: Data Cleaning - Handson.
Solution 5: Data Cleaning - Handson |5|.
#!/bin/python3 #Write your code here import pandas as pd import numpy as np # Task 1: # Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A. # Label each student as s1, s2, s3, s4, s5. # heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4]) heights_A.index = ['s1','s2','s3','s4','s5'] # Task 2: # Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9. # These values represent the Weight of 5 students of class A. # * Label each students as s1, s2, s3, s4, s5. # weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9]) weights_A.index= ['s1','s2','s3','s4','s5'] # Task 3: # Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5. # * Label the Columns as Student_height and Student_weight, respectively. # * Set height and weight values of student s3 to NaN. # * Set the weight of s5 to NaN. # * Drop the rows having null values in any of the columns, and assign the result to df_A2. my_dataframe={ "Student_height":heights_A, "Student_weight":weights_A } df_A=pd.DataFrame(my_dataframe) df_A.loc["s3"] = np.nan df_A.loc["s5"][1]=np.nan df_A2= df_A.dropna(how="any") # df_A2= df_A.dropna(axis="rows") # Task 4: # Display the dataframe df_A2. print(df_A2)
LAB 6: Welcome to Python Pandas 6 Data Aggregation
Problem 6: Data Aggregation - Handson.
Solution 6: Data Aggregation - Handson |6|.
#!/bin/python3 #Write your code here import pandas as pd import numpy as np # Task 1: # Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A. # Label each student as s1, s2, s3, s4, s5. # heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4]) heights_A.index = ['s1','s2','s3','s4','s5'] # Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9. # These values represent the Weight of 5 students of class A. # * Label each students as s1, s2, s3, s4, s5. # weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9]) weights_A.index= ['s1','s2','s3','s4','s5'] # Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5. # * Label the Columns as Student_height and Student_weight, respectively. # * Filter the rows from df_A, whose Student_weight > 160.0 and Student_weight < 80.0, and capture them in another dataframe df_A_filter1. # Print the dataframe df_A_filter1. # # my_DataFrame={ "Student_height":heights_A, "Student_weight":weights_A } df_A=pd.DataFrame(my_DataFrame) df_A_filter1= df_A[(df_A.Student_height>160.0 ) &(df_A.Student_weight<80.0)] print(df_A_filter1) # Task 2: # Filter the rows df_A whose index values end with 5, and capture them in another dataframe df_A_filter2. # Print the dataframe df_A_filter2. # df_A_filter2= df_A[df_A.index.isin(["s5"])] print(df_A_filter2) # Task 3: # Create a new column Gender in the dataframe df_A using the command: df_A['Gender'] =["M","F","M","M","F"]. # Group df_A based on Gender, and capture the result in df_groups. # Print the mean height and weight of each group. # df_A["Gender"]=["M","F","M","M","F"] df_groups= df_A.groupby("Gender") print(df_groups.mean())
LAB 7: Welcome to Python Pandas 7 Data Merging 1.
Problem 7: Data Merge 1 - Hands-on.
Solution 7: Data Merge 1 - Hands-on.
#!/bin/python3 #Write your code here import pandas as pd import numpy as np # # Task 1: # # Create a series named height_A with values 176.2,158.4,167.6,156.2,161.4. These values represent heights of 5 students of class A. # Label each student as s1, s2, s3, s4, s5. heights_A = pd.Series([176.2,158.4,167.6,156.2,161.4]) heights_A.index = ['s1','s2','s3','s4','s5'] # Create another series named weights_A with values 85.1, 90.2, 76.8, 80.4, 78.9. # These values represent the Weight of 5 students of class A. # * Label each students as s1, s2, s3, s4, s5. # weights_A = pd.Series([85.1,90.2,76.8,80.4,78.9]) weights_A.index= ['s1','s2','s3','s4','s5'] # Create a dataframe named df_A which contains the height and weight of five students namely s1, s2, s3, s4, s5. # * Label the Columns as Student_height and Student_weight, respectively. DataFrame_A={ "Student_height":heights_A, "Student_weight":weights_A } # Add a new column Gender to the dataframe df_A with values ["M","F","M","M","F"]. df_A = pd.DataFrame(DataFrame_A) df_A["Gender"] = ["M","F","M","M","F"] # Create anothr Series s, from a list [165.4, 82.7, "F"]. # Provide the following values to the argument index: ['Student_height', 'Student_Weight', 'Gender'] # Set the value of the "name" argument of series, s, to s6. s = pd.Series([165.4, 82.7, "F"], index=["Student_height", "Student_weight" ,"Gender" ], name="s6") # Append the series s to the dadtaframe df_A, and store the captured array in df_AA. # Display the dataframe df_AA. df_AA=df_A.append(s) print(df_AA) # # Task 2 # # Create another series named heights_B from a 1-D numpy array of 5 elements derived from the normal distribution of mean 170.0 and standard derivation 25.0. # Set random seed to 100 before creating the heights_B series. mean_A=170.0 s_deviation_A=25.0 np.random.seed(100) heights_B= pd.Series(np.random.normal(loc=mean_A, scale=s_deviation_A,size=5), index=['s1','s2','s3','s4','s5']) # Create another series named weights_B from 1-D numpy array of 5 elements derived from the normal distribution of mean 75.0 and standard deviation 12.0. # Set random seed to 100 before creating the weights_B series. # Label both series elements as s1,s2,s3,s4,s5 mean_B=75.0 s_deviation_B=12.0 np.random.seed(100) weights_B= pd.Series(np.random.normal(loc=mean_B, scale=s_deviation_B,size=5), index=['s1','s2','s3','s4','s5']) # Create a dataframe df_B containing the height and weight of students s1,s2,s3,s4,s5 belonging to class B. # Label the column as Student_height and Student_weight respectively. DataFrame_B={ "Student_height":heights_B, "Student_weight":weights_B } df_B=pd.DataFrame(DataFrame_B) # Change the index of df_B to ["s7", "s8", "s9", "s10", "s11"]. # Create the Gender column in df_B with values ["F","M","F","F","M"] df_B.index=["s7", "s8", "s9", "s10", "s11"] df_B["Gender"]=["F","M","F","F","M"] # Concatenate two dataframes df_AA and df_B, and assign the result to df. # Display the dataframe df. # df=pd.concat([df_AA,df_B]) print(df)
LAB 8: Welcome to Python Pandas 8 Data Merging 2.
Problem 8: Data Merge - Hands-on 2.
Solution 8: Data Merge - Hands-on 2.
#!/bin/python3 #Write your code here import pandas as pd import numpy as np # # Task 1: # Create the following two Series: 'nameid = pd.Series(range(101,111))' and 'name = pd.Series(['persion'+ str(i) for i in range(1, 11)])' # nameid=pd.Series(range(101,111)) name=pd.Series([ "person"+str(i) for i in range(1,11)]) # Create the dataframe master with series nameid and name respectively. my_DataFrame={ "nameid":nameid, "name":name } master=pd.DataFrame(my_DataFrame) # Create the dataframe transaction using the command: 'transation = pd.DataFrame({ "nameid": [108,108,108,103], "product": ['iPhone','Nokia','Micromax','Vivo']})' transaction=pd.DataFrame({ "nameid": [108,108,108,103], "product": ['iPhone','Nokia','Micromax','Vivo']}) # Merge 'master' with 'transaction' on 'nameid' and save the marged dataframes as 'mdf'. Perform inner join. # 'mdf' should have the columns in this order : 'nameid', 'name', 'product'. mdf= pd.merge(master ,transaction, on="nameid") # Display 'mdf'. print(mdf)