Data Handling Using R Fresco Play Handson Solution HackerRank

Learn Data Science using R, handson exercise for Data Frames, Data Tables, Loop Functinos, Read-write operations, Dplyr verbs, Set Operations etc.
Data Handling Using R Fresco Play Handson Solution HackerRank - www.pdfcup.com

LAB 1: Welcome to Data Handling using R-1 - Data Frames and Datasets in R.

Practice Exercises on Data Frames

Solution: Practice Exercises on Data Frames


data_handle_1 <- function()
{
#Write your code here
    # Task 1: Perfrom the following tasks in the function data_handle_1
    
    # A. Create 3 vector (name, age, department) with 5 observations each.
    name <- c("A", "B", "C", "D", "E")
    department <- c("AA", "BB", "CC" , "DD", "EE")
    age <- c(10,11,12,13,14)
    
    # B. Create a data frame empdetails using these three vectors.
    empdetails_df <- data.frame(name , age,department )
    
    # C. Print the data frame and view the structure of the data frame and print it.
    print(empdetails_df)
    print(str(empdetails_df))
    
    #######################
    # Task 2: Perform the following tasks:
    # A. View the structure in the dataset mtcars and print it.
    print(str(mtcars))
    
    # B. Print the summary of the dataset mtcars and print it.
    print(summary(mtcars))
    
    #######################
    # Task 3: Print the first, second and tenth columns of the R build-in dataset mtcars.
    print(mtcars[, c(1, 2,10)])
    
    #######################
    # Task 4: Sort and Print mtcars by the column mpg.
    print(mtcars[order(mtcars$mpg),])
    
    #######################
    #Task 5: From mtcars print details of all cars which provide more than 30 mpg.
    task5 <- mtcars[mtcars$mpg > 30, ]
    print(task5)
    
    #Task 6: From mtcars and find all cars that provide more than 30 mpg, and print the details of the cars in decreasing order of mileage.
    # If there are two rows with same mileage, sort the two rows in decreasing order of disp. 
    print(task5[rev(order(task5$mpg)),])  
}
data_handle_1()
 

Lab 2: Welcome to Data Handling using R-2 - Data Tables in R

Practice using Data Tables.

Solution:


data_table <- function(){
    
    #Write your code here
    
    library(data.table)
    
    table_df <- as.data.table(mtcars)
    
    filter <- table_df[mpg> 15, .(  AvgHp= mean(hp), AvgWt = mean(wt)) , by  = list(cyl, carb)]
    print(filter)
    
}

data_table()

 

Lab 3: Welcome to Data Handling using R-3 - Loop Functions

Practice Exercises on Loop Functions

Solution:


loops <- function(){
    
    #Write your code here
    #############################
    # Task 1: Consider a class of 5 students,
    
    # A. Create 3 vectors which contians marks scored by the students in the following:
    #  Maths(maths)- (70,75,80,85,90),
    #  Science(science) - (71,76,81,86,91),
    #  English(english)- (73,78,83,88,93).
    
    
    maths <- c(70,75,80,85,90)
    science <- c(71,76,81,86,91)
    english <- c(73,78,83,88,93)
    
    # B. Create a list using these 3 Vectors.
    student_list <- list(maths, science, english)
    # C. Calculate the class average in each of these subjects by using lapply() and print it.
    ans1  <- lapply(student_list, mean)
    print(ans1)
    
    #############################
    # Task 2: Consider a class of 4 students:
    # A. Create 4 vector which contain the marks of 4 students in 3 subjects - Maths, Science, and English.
    #  The values are s1- (70,80,90), s2- (71,81,91), s3 <- c(72,82,92) and s4 <- c(73,83,93).
    s1 <- c(70,80,90)
    s2 <-  c(71,81,91)
    s3 <- c(72,82,92)
    s4 <- c(73,83,93)
    
    # B. Create a list using the 4 vector.
    student_list2 = list(s1,s2,s3,s4)
    # C. Calculate the total score(mean) for each of the 4 students using the function sapply() and print it.
    total_score <- sapply(student_list2 , mean)
    print(total_score)
    
    
    #############################
    # Task 3: Perform the following tasks:
    #   A. Create a 10X5 Matrix using the numbers from 1 to 50. (Arrange 1 to 50 not in row-wise order.)
    #   Hint: GIve byrow =FALSE

    mtrix <- matrix(c(1:50), nrow = 10, ncol = 5, byrow = FALSE)
    # print(mtrix)
    
    #   B. Find the Average of each column of the matrix by using the apply() function and print it.
    avg_cl <- apply(mtrix, 2,  mean)
    print(avg_cl)
    
    #############################
    # Task 4: For this excerise, let us the the R built-in Dataset mtcars.
    #    A. Using thr tapply() function, find the average 'mpg' for the various transmission types(am) and number of cylinder(cyl) in a car and print it.
    avg_mpg <- tapply( mtcars$mpg, list( mtcars$cyl ,mtcars$am) , mean  )
    print(avg_mpg)
    
    
    
    #############################
    # Task 5 : Perform the Following tasks:
    # A. Create a vector x with the elements from 1 to 5,
    # B. Create another vector y with elements from 6 to 10,
        
    c <- c(1:5)
    p <- c(6:10)
    
    # C.Use mapply() to multiply the vector x and y, such that the first element of x is multiplied with the first element of y and print it.
    mult_one <- function(Data1,Data2)
        {
        Data1*Data2
        }
    cc <- mapply(mult_one,c, p )
    print(cc)
}

loops()
 

LAB 4: Welcome to Data Handling Using R-4 - Introduction to dplyr

Introduction to dplyr

Solution:


library(dplyr, warn.conflicts = FALSE)  
dplyrs <- function(){
    
    # print(getwd())
    # print(list.files())

    # Task 1: 
    # Include libraries dplyr and read hflights dataset from
    # the csv file hflights.csv and save it as data frame in
    # the variable name hflights. Use the dim function of
    # the dplyr package to discover the dimensionality of
    # the dataset. Running this function on a dataset will
    # return the number of records and columns in the
    # dataset and print it.
    
    hflights <- read.csv("hflights.csv")
    print(dim(hflights))

    # Task 2.
    # Use the functions head () and tail () to take a look
    # at some instances of the data. Try running this
    # function on the hflights dataset and print it
    # separately. 
    
    print(head(hflights))
    print(tail(hflights))
    
    # Task 3: 
    # Running the head function on a local data frame,
    # prints the given number of rows of the data frame. Try
    # running this function on the hflights dataset to print
    # 20 records.
    
    print(head(hflights, 20 ))
    
    
    # Task 4:
    # glimpse () is like a transposed version of print.
    # Columns run down the page, and data runs across.
    # This makes it possible to view every column in a data
    # frame.
    # Run this function on the hflights dataset. Note - Here
    # don't print instead run only glimpse of the dataset.
    
    glimpse(hflights)
    
    # Task 5:
    # Perform the following tasks:

    # A. Create a data frame hflights1 which will have first fifty rows of the data set hflights
    d <- hflights[c(1:50),]
    
    # B. Convert hflights1 into a tbl.
    
    hflights1 <- as_tibble(d)
    
    # C. To see how tbl behaves like data frames, save the UniqueCarrier column of hflights tbl
    #  as an object named carriers, by using standard R syntax and print it.
    
    carriers <- hflights1$UniqueCarrier 
    print(carriers)
    
    
    # Task 6:  
    # Perform the following tasks:

    # A. Create a list abrCarrier which will contain actual carrier names corresponding to the values in the
    # variable UniqueCarrier. 

    abrCarrier < - c("AA" = "American", "AS" = "Alaska", "B6" = "JetBlue", "CO" = "Continental", "DL" = "Delta", "OO" = "SkyWest", "UA" = "United", "US" = "US_Airways", "WN" = "Southwest", "EV" = "Atlantic_Southeast", "F9" = "Frontier", "FL" = "AirTran", "MQ" = "American_Eagle", "XE" = "ExpressJet", "YV" = "Mesa")
    
    # B. Add a new column Carrier to hflights which will contain the actual Carrier name by referring abrCarrier and the UniqueCarrier column of hflights.
    
    hflights <- transform( hflights, Carrier= ifelse(hflights$UniqueCarrier  %in% names(abrCarrier), abrCarrier[hflights$UniqueCarrier ], "Error"))
    
    # C. Print the first 10 rows of the data set to view the values in the newly added column.
    print(head(hflights, 10))

}

dplyrs()
 

LAB 5: Welcome to Data Handling Using R-5 - dplyr Verbs and Functions

dplyr Verbs and Functions

Solution: dplyr Verbs and Functions.


library(dplyr, warn.conflicts = FALSE)  
dplyr_verbs <- function (){
    
    #Write your code here
    hflights  <- read.csv("hflights.csv")
    
    # Task 1:
    #Include dplyr and read hflights dataset from the csv file hflights.csv and save it as data frame in the variable name hflights.

    #1. Find the list of flights in the Data Set hflights with flight number (FlightNum) 428 or 460 and print it. Hint: Use "|"
    
    # Filter the dataset for FlightNum 428 or 460
    filtered_flights <- hflights %>%
    filter(FlightNum == 428 | FlightNum == 460)

    # Print the filtered flights
    print(filtered_flights)

    
    # Task 2: Create a data frame hflights1 which will have first twenty rows of the data set hflights. 
    # While selecting columns, clauses like contains, starts_with, ends_with and matches can be used to match columns by name. 
    
    #Challenge: In the hflights, select FlightNum and all columns with names that contain Time and print it.

    # Create a data frame with the first twenty rows
    hflights1 <- hflights %>%
    slice_head(n = 20)

    # Select FlightNum and all columns that contain "Time" in their names
    hflights1_filtered <- hflights1 %>%
    select(FlightNum, contains("Time"))

    # Print the resulting data frame
    print(hflights1_filtered)
    
    
    
    # Task 3:     
    # In the hflights1 dataset, using the columns, Distance and AirTime, calculate the speed in Km/Hr and print it.
    
    
    # Calculate speed in Km/Hr
    hflights1_speed <- hflights1 %>%
    mutate(velocity = (Distance ) / (AirTime / 60))

    # Print the resulting data frame with the new Speed_Km_Hr column
    print(hflights1_speed)
        
    
    # Task 4:
    #.List all records in the dataset hflights1, sorted by delay in arrival of flight, in descending order. Hint: Use desc(ColumnName) to order a column in descending order and print it.
    # newdata  <- hflights1[order(hflights1$ArrTime, decreasing = TRUE),]

    hflights1_sorted <- hflights1 %>%
    arrange(desc(ArrDelay))

    # Print the sorted dataset
    print(hflights1_sorted)
    
    
    # Task 5:  Find the number of flights being cancelled based on column UniqueCarrier as per the Data Set hflights and print it. 

    cancelled_flights_by_carrier <- hflights %>%
    group_by(UniqueCarrier) %>%        # Group by UniqueCarrier
    summarize(n_flights = n(),  n_canc = sum(Cancelled) )  # Count the number of canceled flights

    # Print the results
    print(cancelled_flights_by_carrier)
    
    # Task 6: Using the summarise and group_byfunctions on the dataset hflights, 
    # find out the number of flights that flew from Houston by Month, and the average delay in arrival and print it.
     

    # Find the number of flights from Houston by month and average delay
    summary_flights <- hflights %>%
    filter(Origin == "IAH") %>%           # Filter flights originating from Houston (IAH)
    group_by(Month) %>%                   # Group by month
    summarize(
        
         mean(ArrDelay, na.rm = TRUE) # Calculate the average arrival delay
    )

    # Print the results
    print(summary_flights) 
           
}

dplyr_verbs()

 

LAB 6: Welcome to Data Handling Using R-6 - Set Operations in dplyr

Set Operations in dplyr

Solution:


sets <- function() {
    library(dplyr)
    #Write your code here

# Perform the following tasks in the function sets
#  1.mtcars is an existing dataset in R.
#     * Create two vectors first and second with each containing rows 1 to 6, and rows 6 to 15 respectively. 
#  These two data frames will be used to perform set operations.

    # Task 1:
    
    first <- mtcars[c(1:6),]
    row.names(first) <- NULL
    
    second <- mtcars[c(6:15),]
    row.names(second) <- NULL
    
    # Task 2: Perform the following tasks:
    # Run the set operations 'intersect()', union(),union_all() and setdiff() on the objects/data frames first and second and print it.
    print(intersect(first, second))
    
    print(union(first, second))
    
    print(union_all(first, second))
    
    print(setdiff(first, second))
  
}

sets()

 

LAB 7: Welcome to Data Handling Using R-7 - Contingency Tables

Pactice Creating Contingency Tables

Solution:


#Write your code here
# Task1: Apply the function 'table, cut and preety' on the R build-in Data Set 'mtcars'.
#  And create a contingency table based on mpg provided by the listed cars and print it.
break_point = pretty(mtcars$mpg)
column_vector = mtcars$mpg

result <- table(cut(column_vector, break_point))
print(result)

 

LAB 8: Welcome to Data Handling Using R - Test Your Knowledge

Test Your Knowledge

Solution:



#Note: If you faced any issue while compiling the code, kindly discuss the issue on the comment box. 

# Enter your code here. Read input from STDIN. Print output to STDOUT.
library(dplyr)
data_oper <- function(){
Try to solve it by yourself.

    }


data_oper()
 

About the author

D Shwari
I'm a professor at National University's Department of Computer Science. My main streams are data science and data analysis. Project management for many computer science-related sectors. Next working project on Al with deep Learning.....

9 comments

  1. Anonymous
    Hi, could you please help on LAB4 - Introduction to dplyr. My code is not compiling properly?
    1. Anonymous
      I am getting error in Lab 8, what is the solution please?
  2. Anonymous
    Please help on Lab 4 & 5, code is not compiling properly
  3. Anonymous
    Why the solutions for LAB5 to LAB8 are not displayed? Anyone can see them?
  4. Anonymous
    I am facing difficulty in lab5 , do anyone have any solution for this?
  5. Anonymous
    Hi, can anyone share the Lab *8 solution???
    1. Anonymous
      Can anyone please share the LAB 8 solution, please?
  6. Anonymous
    Test your Knowledge


    # Enter your code here. Read input from STDIN. Print output to STDOUT.
    library(dplyr,warn.conflicts = FALSE)

    data_oper <- function(){
    #Enter code below
    #Task 1
    #Use mtcars dataset and print the first 6 rows after ordering the dataset based on column mpg.
    #Hint: Use order() function
    mtcats_sorted <- mtcars %>%
    arrange(mpg)
    head(mtcats_sorted,6)





    #Task 2
    #Use mtcars dataset, Print rows having mpg greater than 30 and gear equal to 4.
    #Hint: Use filter() function.

    filtered_data <- mtcars %>%
    filter(mpg > 30, gear ==4)
    filtered_data





    #Task 3
    #Perform select operation using columns mpg,cyl,disp,gear on mtcars dataset.
    #Print the first 5 rows.

    select_data <- mtcars %>%
    select(mpg, cyl, disp, gear)
    head(select_data, 5)







    #Task 4
    #Use mtcars, Mutate column mpg in the mtcars by applying round function on mpg, store it in variable s2.
    #Print the first 5 rows of s2.

    s2 <- mtcars %>% mutate(mpg = round(mpg))
    head(s2,5)



    #Task 5
    #Arrange the dataset s2 based on the columns mpg,cyl,disp.
    #Print the first 6 rows after arrange operation.


    arranged_data <- s2 %>%
    arrange(mpg,cyl, disp)
    head(arranged_data,6)


    #Task 6
    #Print the summary of column wt. Use mtcars dataset.


    summary(mtcars$wt)


    #Task 7
    # Use mtcars dataset. Categorize the values of a column mpg . Break it in to intervals (0 to 15,15 to 20,20 to 30) , label them as "Low","Medium","High".
    #Create a table after categorizing.
    #Print the table.
    #Hint: Use table() an cut() functions together.

    mtcars <- mtcars %>% mutate(mpg_category = cut(mpg, breaks = c(0,15,20,30), labels = c("Low","Medium","High")))

    mpg_table <- table(mtcars$mpg_category)
    mpg_table










    #End
    }

    data_oper()
  7. Anonymous
    # Enter your code here. Read input from STDIN. Print output to STDOUT.
    library(dplyr, warn.conflicts = FALSE)
    data_oper <- function(){
    #Enter code below

    #Task 1
    #Use mtcars dataset and print the first 6 rows after ordering the dataset based on column mpg.
    #Hint: Use order() function
    mtcats_sorted <- mtcars %>%
    arrange(mpg)
    print(head(mtcats_sorted,6))


    #Task 2
    #Use mtcars dataset, Print rows having mpg greater than 30 and gear equal to 4.
    #Hint: Use filter() function.

    filtered_data <- mtcars %>%
    filter(mpg > 30, gear ==4)
    print(filtered_data)


    #Task 3
    #Perform select operation using columns mpg,cyl,disp,gear on mtcars dataset.
    #Print the first 5 rows.

    select_data <- mtcars %>%
    select(mpg, cyl, disp, gear)
    print(head(select_data, 5))


    #Task 4
    #Use mtcars, Mutate column mpg in the mtcars by applying round function on mpg, store it in variable s2.
    #Print the first 5 rows of s2.

    s2 <- mtcars %>% mutate(mpg = round(mpg))
    print(head(s2,5))



    #Task 5
    #Arrange the dataset s2 based on the columns mpg,cyl,disp.
    #Print the first 6 rows after arrange operation.
    arranged_data <- s2 %>%
    arrange(mpg,cyl, disp)
    print(head(arranged_data,6))


    #Task 6
    #Print the summary of column wt. Use mtcars dataset.
    print( summary(mtcars$wt))


    #Task 7
    # Use mtcars dataset. Categorize the values of a column mpg . Break it in to intervals (0 to 15,15 to 20,20 to 30) , label them as "Low","Medium","High".
    #Create a table after categorizing.
    #Print the table.
    #Hint: Use table() an cut() functions together.

    mtcars <- mtcars %>% mutate(mpg_category = cut(mpg, breaks = c(0,15,20,30), labels = c("Low","Medium","High")))
    mpg_table <- table(mtcars$mpg_category)
    print(mpg_table)

    #End
    }

    data_oper()