LAB 1: Welcome to Data Handling using R-1 - Data Frames and Datasets in R.
Practice Exercises on Data Frames
Solution: Practice Exercises on Data Frames
data_handle_1 <- function() { #Write your code here # Task 1: Perfrom the following tasks in the function data_handle_1 # A. Create 3 vector (name, age, department) with 5 observations each. name <- c("A", "B", "C", "D", "E") department <- c("AA", "BB", "CC" , "DD", "EE") age <- c(10,11,12,13,14) # B. Create a data frame empdetails using these three vectors. empdetails_df <- data.frame(name , age,department ) # C. Print the data frame and view the structure of the data frame and print it. print(empdetails_df) print(str(empdetails_df)) ####################### # Task 2: Perform the following tasks: # A. View the structure in the dataset mtcars and print it. print(str(mtcars)) # B. Print the summary of the dataset mtcars and print it. print(summary(mtcars)) ####################### # Task 3: Print the first, second and tenth columns of the R build-in dataset mtcars. print(mtcars[, c(1, 2,10)]) ####################### # Task 4: Sort and Print mtcars by the column mpg. print(mtcars[order(mtcars$mpg),]) ####################### #Task 5: From mtcars print details of all cars which provide more than 30 mpg. task5 <- mtcars[mtcars$mpg > 30, ] print(task5) #Task 6: From mtcars and find all cars that provide more than 30 mpg, and print the details of the cars in decreasing order of mileage. # If there are two rows with same mileage, sort the two rows in decreasing order of disp. print(task5[rev(order(task5$mpg)),]) } data_handle_1()
Lab 2: Welcome to Data Handling using R-2 - Data Tables in R
Practice using Data Tables.
Solution:
data_table <- function(){ #Write your code here library(data.table) table_df <- as.data.table(mtcars) filter <- table_df[mpg> 15, .( AvgHp= mean(hp), AvgWt = mean(wt)) , by = list(cyl, carb)] print(filter) } data_table()
Lab 3: Welcome to Data Handling using R-3 - Loop Functions
Practice Exercises on Loop Functions
Solution:
loops <- function(){ #Write your code here ############################# # Task 1: Consider a class of 5 students, # A. Create 3 vectors which contians marks scored by the students in the following: # Maths(maths)- (70,75,80,85,90), # Science(science) - (71,76,81,86,91), # English(english)- (73,78,83,88,93). maths <- c(70,75,80,85,90) science <- c(71,76,81,86,91) english <- c(73,78,83,88,93) # B. Create a list using these 3 Vectors. student_list <- list(maths, science, english) # C. Calculate the class average in each of these subjects by using lapply() and print it. ans1 <- lapply(student_list, mean) print(ans1) ############################# # Task 2: Consider a class of 4 students: # A. Create 4 vector which contain the marks of 4 students in 3 subjects - Maths, Science, and English. # The values are s1- (70,80,90), s2- (71,81,91), s3 <- c(72,82,92) and s4 <- c(73,83,93). s1 <- c(70,80,90) s2 <- c(71,81,91) s3 <- c(72,82,92) s4 <- c(73,83,93) # B. Create a list using the 4 vector. student_list2 = list(s1,s2,s3,s4) # C. Calculate the total score(mean) for each of the 4 students using the function sapply() and print it. total_score <- sapply(student_list2 , mean) print(total_score) ############################# # Task 3: Perform the following tasks: # A. Create a 10X5 Matrix using the numbers from 1 to 50. (Arrange 1 to 50 not in row-wise order.) # Hint: GIve byrow =FALSE mtrix <- matrix(c(1:50), nrow = 10, ncol = 5, byrow = FALSE) # print(mtrix) # B. Find the Average of each column of the matrix by using the apply() function and print it. avg_cl <- apply(mtrix, 2, mean) print(avg_cl) ############################# # Task 4: For this excerise, let us the the R built-in Dataset mtcars. # A. Using thr tapply() function, find the average 'mpg' for the various transmission types(am) and number of cylinder(cyl) in a car and print it. avg_mpg <- tapply( mtcars$mpg, list( mtcars$cyl ,mtcars$am) , mean ) print(avg_mpg) ############################# # Task 5 : Perform the Following tasks: # A. Create a vector x with the elements from 1 to 5, # B. Create another vector y with elements from 6 to 10, c <- c(1:5) p <- c(6:10) # C.Use mapply() to multiply the vector x and y, such that the first element of x is multiplied with the first element of y and print it. mult_one <- function(Data1,Data2) { Data1*Data2 } cc <- mapply(mult_one,c, p ) print(cc) } loops()
LAB 4: Welcome to Data Handling Using R-4 - Introduction to dplyr
Introduction to dplyr
Solution:
library(dplyr, warn.conflicts = FALSE) dplyrs <- function(){ # print(getwd()) # print(list.files()) # Task 1: # Include libraries dplyr and read hflights dataset from # the csv file hflights.csv and save it as data frame in # the variable name hflights. Use the dim function of # the dplyr package to discover the dimensionality of # the dataset. Running this function on a dataset will # return the number of records and columns in the # dataset and print it. hflights <- read.csv("hflights.csv") print(dim(hflights)) # Task 2. # Use the functions head () and tail () to take a look # at some instances of the data. Try running this # function on the hflights dataset and print it # separately. print(head(hflights)) print(tail(hflights)) # Task 3: # Running the head function on a local data frame, # prints the given number of rows of the data frame. Try # running this function on the hflights dataset to print # 20 records. print(head(hflights, 20 )) # Task 4: # glimpse () is like a transposed version of print. # Columns run down the page, and data runs across. # This makes it possible to view every column in a data # frame. # Run this function on the hflights dataset. Note - Here # don't print instead run only glimpse of the dataset. glimpse(hflights) # Task 5: # Perform the following tasks: # A. Create a data frame hflights1 which will have first fifty rows of the data set hflights d <- hflights[c(1:50),] # B. Convert hflights1 into a tbl. hflights1 <- as_tibble(d) # C. To see how tbl behaves like data frames, save the UniqueCarrier column of hflights tbl # as an object named carriers, by using standard R syntax and print it. carriers <- hflights1$UniqueCarrier print(carriers) # Task 6: # Perform the following tasks: # A. Create a list abrCarrier which will contain actual carrier names corresponding to the values in the # variable UniqueCarrier. abrCarrier < - c("AA" = "American", "AS" = "Alaska", "B6" = "JetBlue", "CO" = "Continental", "DL" = "Delta", "OO" = "SkyWest", "UA" = "United", "US" = "US_Airways", "WN" = "Southwest", "EV" = "Atlantic_Southeast", "F9" = "Frontier", "FL" = "AirTran", "MQ" = "American_Eagle", "XE" = "ExpressJet", "YV" = "Mesa") # B. Add a new column Carrier to hflights which will contain the actual Carrier name by referring abrCarrier and the UniqueCarrier column of hflights. hflights <- transform( hflights, Carrier= ifelse(hflights$UniqueCarrier %in% names(abrCarrier), abrCarrier[hflights$UniqueCarrier ], "Error")) # C. Print the first 10 rows of the data set to view the values in the newly added column. print(head(hflights, 10)) } dplyrs()
LAB 5: Welcome to Data Handling Using R-5 - dplyr Verbs and Functions
dplyr Verbs and Functions
Solution: dplyr Verbs and Functions.
library(dplyr, warn.conflicts = FALSE) dplyr_verbs <- function (){ #Write your code here hflights <- read.csv("hflights.csv") # Task 1: #Include dplyr and read hflights dataset from the csv file hflights.csv and save it as data frame in the variable name hflights. #1. Find the list of flights in the Data Set hflights with flight number (FlightNum) 428 or 460 and print it. Hint: Use "|" # Filter the dataset for FlightNum 428 or 460 filtered_flights <- hflights %>% filter(FlightNum == 428 | FlightNum == 460) # Print the filtered flights print(filtered_flights) # Task 2: Create a data frame hflights1 which will have first twenty rows of the data set hflights. # While selecting columns, clauses like contains, starts_with, ends_with and matches can be used to match columns by name. #Challenge: In the hflights, select FlightNum and all columns with names that contain Time and print it. # Create a data frame with the first twenty rows hflights1 <- hflights %>% slice_head(n = 20) # Select FlightNum and all columns that contain "Time" in their names hflights1_filtered <- hflights1 %>% select(FlightNum, contains("Time")) # Print the resulting data frame print(hflights1_filtered) # Task 3: # In the hflights1 dataset, using the columns, Distance and AirTime, calculate the speed in Km/Hr and print it. # Calculate speed in Km/Hr hflights1_speed <- hflights1 %>% mutate(velocity = (Distance ) / (AirTime / 60)) # Print the resulting data frame with the new Speed_Km_Hr column print(hflights1_speed) # Task 4: #.List all records in the dataset hflights1, sorted by delay in arrival of flight, in descending order. Hint: Use desc(ColumnName) to order a column in descending order and print it. # newdata <- hflights1[order(hflights1$ArrTime, decreasing = TRUE),] hflights1_sorted <- hflights1 %>% arrange(desc(ArrDelay)) # Print the sorted dataset print(hflights1_sorted) # Task 5: Find the number of flights being cancelled based on column UniqueCarrier as per the Data Set hflights and print it. cancelled_flights_by_carrier <- hflights %>% group_by(UniqueCarrier) %>% # Group by UniqueCarrier summarize(n_flights = n(), n_canc = sum(Cancelled) ) # Count the number of canceled flights # Print the results print(cancelled_flights_by_carrier) # Task 6: Using the summarise and group_byfunctions on the dataset hflights, # find out the number of flights that flew from Houston by Month, and the average delay in arrival and print it. # Find the number of flights from Houston by month and average delay summary_flights <- hflights %>% filter(Origin == "IAH") %>% # Filter flights originating from Houston (IAH) group_by(Month) %>% # Group by month summarize( mean(ArrDelay, na.rm = TRUE) # Calculate the average arrival delay ) # Print the results print(summary_flights) } dplyr_verbs()
LAB 6: Welcome to Data Handling Using R-6 - Set Operations in dplyr
Set Operations in dplyr
Solution:
sets <- function() { library(dplyr) #Write your code here # Perform the following tasks in the function sets # 1.mtcars is an existing dataset in R. # * Create two vectors first and second with each containing rows 1 to 6, and rows 6 to 15 respectively. # These two data frames will be used to perform set operations. # Task 1: first <- mtcars[c(1:6),] row.names(first) <- NULL second <- mtcars[c(6:15),] row.names(second) <- NULL # Task 2: Perform the following tasks: # Run the set operations 'intersect()', union(),union_all() and setdiff() on the objects/data frames first and second and print it. print(intersect(first, second)) print(union(first, second)) print(union_all(first, second)) print(setdiff(first, second)) } sets()
LAB 7: Welcome to Data Handling Using R-7 - Contingency Tables
Pactice Creating Contingency Tables
Solution:
#Write your code here # Task1: Apply the function 'table, cut and preety' on the R build-in Data Set 'mtcars'. # And create a contingency table based on mpg provided by the listed cars and print it. break_point = pretty(mtcars$mpg) column_vector = mtcars$mpg result <- table(cut(column_vector, break_point)) print(result)
LAB 8: Welcome to Data Handling Using R - Test Your Knowledge
Test Your Knowledge
Solution:
#Note: If you faced any issue while compiling the code, kindly discuss the issue on the comment box. # Enter your code here. Read input from STDIN. Print output to STDOUT. library(dplyr) data_oper <- function(){ Try to solve it by yourself. } data_oper()