Challenge 1

Read new_diamonds dataset and see compute time differences when specifying columns while using read.csv(). Do this analysis using profvis.

Solution

library(profvis)
# newDiamonds=rbind(diamonds, diamonds,diamonds, diamonds)
# newDiamonds2 = newDiamonds %>% select(carat,cut, price)
# write.csv2(newDiamonds2,file="new_diamonds.csv",row.names=FALSE)

profvis({
new_diamonds = read.csv2("datasets/new_diamonds.csv")

new_diamonds2 = read.csv2("datasets/new_diamonds.csv", colClasses = c("numeric",
                                                 "character","numeric"))
})

Challenge 2

Verify the differences in compute time when calculating means of the numeric values of new_diamonds.

Solution


library(microbenchmark)

diamonds_num = new_diamonds[,c(1,3)]

microbenchmark(
  base::colMeans(diamonds_num),
  apply(diamonds_num,2,mean)
)
Unit: milliseconds

Challenge 3

Verify the compute time differences between:

  1. Preallocating the memory and create a new vector with newvector=1:1000
  2. Create the same vector by generating a loop that grows the vector size with newvector=c(newvector,x)

Solution


create_vector =function(n){
  vec = c()
  for(i in 1:n){
    vec=c(vec,i)
  }
}

microbenchmark(
  newvector = c(1:1000),
  newvector2 = create_vector(1000)
)
Unit: microseconds

Challenge 4

What does the lapply2 function do? Check compute time difference between lapply2 and lapply2_c. - To do this you could appley the function sd to x=list(1:200,1:100,100:340,10:90000)

Solution

lapply2 applies a function to each elelement of a list.

lapply2 <- function(x, f, ...) {
  out <- vector("list", length(x))
  for (i in seq_along(x)) {
    out[[i]] <- f(x[[i]], ...)
  }
  out
}

lapply2_c <- compiler::cmpfun(lapply2)

x=list(1:200,1:100,100:340,10:90000)
microbenchmark(
  lapply2(x,sd),
  lapply2_c(x,sd),
  lapply(x,sd)
)
Unit: microseconds

Challenge 5

Modify the add function to return multiplication for 3 integers and check compute time differences with standard R operator. Do you expect a difference?

Can you extend the function to multiplication of 3 number in general?

Solution

library(Rcpp)

cppFunction('int mult(int x, int y) {
  int prod = x * y;
  return prod;
}')


x=17635
y=10002

microbenchmark(
  a=x*y,
  b=mult(x,y)
)
Unit: nanoseconds

Challenge 6 (Advanced)

Sample 20 passengers from the titanic and calculate the mean_age. Do this 10000 times (in parallel).

Use the titanic DataSet: titanic <- read.csv("https://goo.gl/4Gqsnz")

Tip: Remove NA’s using na.omit(titanic)

Solution


library(dplyr)

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(parallel)
titanic = read.csv("https://goo.gl/4Gqsnz")
titanic= na.omit(titanic)

sample_mean =function(i){
  mean_Age = titanic %>% 
    sample_n(20) %>% 
    summarise(mean_age=mean(Age)) %>% 
    pull(mean_age)
  return(mean_Age)
}

system.time(lapply(1:10000, sample_mean))
   user  system elapsed 
 16.005   0.163  17.155 
system.time(mclapply(1:10000, sample_mean))
   user  system elapsed 
  8.704   0.261   9.362 
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgojIENoYWxsZW5nZSAxCgoqKlJlYWQgbmV3X2RpYW1vbmRzIGRhdGFzZXQgYW5kIHNlZSBjb21wdXRlIHRpbWUgZGlmZmVyZW5jZXMgd2hlbiBzcGVjaWZ5aW5nIGNvbHVtbnMgd2hpbGUgdXNpbmcgcmVhZC5jc3YoKS4gRG8gdGhpcyBhbmFseXNpcyB1c2luZyBwcm9mdmlzLioqCgojIyMgU29sdXRpb24KYGBge3J9CmxpYnJhcnkocHJvZnZpcykKIyBuZXdEaWFtb25kcz1yYmluZChkaWFtb25kcywgZGlhbW9uZHMsZGlhbW9uZHMsIGRpYW1vbmRzKQojIG5ld0RpYW1vbmRzMiA9IG5ld0RpYW1vbmRzICU+JSBzZWxlY3QoY2FyYXQsY3V0LCBwcmljZSkKIyB3cml0ZS5jc3YyKG5ld0RpYW1vbmRzMixmaWxlPSJuZXdfZGlhbW9uZHMuY3N2Iixyb3cubmFtZXM9RkFMU0UpCgpwcm9mdmlzKHsKbmV3X2RpYW1vbmRzID0gcmVhZC5jc3YyKCJkYXRhc2V0cy9uZXdfZGlhbW9uZHMuY3N2IikKCm5ld19kaWFtb25kczIgPSByZWFkLmNzdjIoImRhdGFzZXRzL25ld19kaWFtb25kcy5jc3YiLCBjb2xDbGFzc2VzID0gYygibnVtZXJpYyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiY2hhcmFjdGVyIiwibnVtZXJpYyIpKQp9KQoKYGBgCgojIENoYWxsZW5nZSAyCgpWZXJpZnkgdGhlIGRpZmZlcmVuY2VzIGluIGNvbXB1dGUgdGltZSB3aGVuIGNhbGN1bGF0aW5nIG1lYW5zIG9mIHRoZSBudW1lcmljIHZhbHVlcyBvZiBgbmV3X2RpYW1vbmRzYC4KCi0gU3Vic2V0IHRoZSByZWxldmFudCBjb2x1bW5zIGBuZXdfZGlhbW9uZHNfbnVtID0gbmV3X2RpYW1vbmRzID0gbmV3X2RpYW1vbmRzWyxjKDEsMyldYAotIFVzZSBtaWNyb2JlbmNobWFyayBvbiBgY29sTWVhbnMoKWAgYW5kICBgYXBwbHlgCgojIyMgU29sdXRpb24KYGBge3J9CgpsaWJyYXJ5KG1pY3JvYmVuY2htYXJrKQoKZGlhbW9uZHNfbnVtID0gbmV3X2RpYW1vbmRzWyxjKDEsMyldCgptaWNyb2JlbmNobWFyaygKICBiYXNlOjpjb2xNZWFucyhkaWFtb25kc19udW0pLAogIGFwcGx5KGRpYW1vbmRzX251bSwyLG1lYW4pCikKCgpgYGAKCiMgQ2hhbGxlbmdlIDMKCioqVmVyaWZ5IHRoZSBjb21wdXRlIHRpbWUgZGlmZmVyZW5jZXMgYmV0d2VlbjoqKgoKMi4gUHJlYWxsb2NhdGluZyB0aGUgbWVtb3J5IGFuZCBjcmVhdGUgYSBuZXcgdmVjdG9yIHdpdGggYG5ld3ZlY3Rvcj0xOjEwMDBgCjEuIENyZWF0ZSB0aGUgc2FtZSB2ZWN0b3IgYnkgZ2VuZXJhdGluZyBhIGxvb3AgdGhhdCBncm93cyB0aGUgdmVjdG9yIHNpemUgd2l0aCBgbmV3dmVjdG9yPWMobmV3dmVjdG9yLHgpYAoKIyMjIFNvbHV0aW9uCmBgYHtyfQoKY3JlYXRlX3ZlY3RvciA9ZnVuY3Rpb24obil7CiAgdmVjID0gYygpCiAgZm9yKGkgaW4gMTpuKXsKICAgIHZlYz1jKHZlYyxpKQogIH0KfQoKbWljcm9iZW5jaG1hcmsoCiAgbmV3dmVjdG9yID0gYygxOjEwMDApLAogIG5ld3ZlY3RvcjIgPSBjcmVhdGVfdmVjdG9yKDEwMDApCikKYGBgCgoKIyBDaGFsbGVuZ2UgNAoKKipXaGF0IGRvZXMgdGhlIGxhcHBseTIgZnVuY3Rpb24gZG8/IENoZWNrIGNvbXB1dGUgdGltZSBkaWZmZXJlbmNlIGJldHdlZW4gYGxhcHBseTJgIGFuZCBgbGFwcGx5Ml9jYC4qKgotIFRvIGRvIHRoaXMgeW91IGNvdWxkIGFwcGxleSB0aGUgZnVuY3Rpb24gYHNkYCB0byBgeD1saXN0KDE6MjAwLDE6MTAwLDEwMDozNDAsMTA6OTAwMDApYAoKIyMjIFNvbHV0aW9uCgpgbGFwcGx5MmAgYXBwbGllcyBhIGZ1bmN0aW9uIHRvIGVhY2ggZWxlbGVtZW50IG9mIGEgbGlzdC4KYGBge3IgZXZhbD1UUlVFfQpsYXBwbHkyIDwtIGZ1bmN0aW9uKHgsIGYsIC4uLikgewogIG91dCA8LSB2ZWN0b3IoImxpc3QiLCBsZW5ndGgoeCkpCiAgZm9yIChpIGluIHNlcV9hbG9uZyh4KSkgewogICAgb3V0W1tpXV0gPC0gZih4W1tpXV0sIC4uLikKICB9CiAgb3V0Cn0KCmxhcHBseTJfYyA8LSBjb21waWxlcjo6Y21wZnVuKGxhcHBseTIpCgp4PWxpc3QoMToyMDAsMToxMDAsMTAwOjM0MCwxMDo5MDAwMCkKbWljcm9iZW5jaG1hcmsoCiAgbGFwcGx5Mih4LHNkKSwKICBsYXBwbHkyX2MoeCxzZCksCiAgbGFwcGx5KHgsc2QpCikKCmBgYAoKIyBDaGFsbGVuZ2UgNQoKTW9kaWZ5IHRoZSBgYWRkYCBmdW5jdGlvbiB0byByZXR1cm4gbXVsdGlwbGljYXRpb24gZm9yIDMgaW50ZWdlcnMgYW5kIGNoZWNrIGNvbXB1dGUgdGltZSBkaWZmZXJlbmNlcyB3aXRoIHN0YW5kYXJkIFIgb3BlcmF0b3IuIERvIHlvdSBleHBlY3QgYSBkaWZmZXJlbmNlPwoKQ2FuIHlvdSBleHRlbmQgdGhlIGZ1bmN0aW9uIHRvIG11bHRpcGxpY2F0aW9uIG9mIDMgbnVtYmVyIGluIGdlbmVyYWw/CgojIyMgU29sdXRpb24KYGBge3J9CmxpYnJhcnkoUmNwcCkKCmNwcEZ1bmN0aW9uKCdpbnQgbXVsdChpbnQgeCwgaW50IHkpIHsKICBpbnQgcHJvZCA9IHggKiB5OwogIHJldHVybiBwcm9kOwp9JykKCgp4PTE3NjM1Cnk9MTAwMDIKCm1pY3JvYmVuY2htYXJrKAogIGE9eCp5LAogIGI9bXVsdCh4LHkpCikKCmBgYAoKCiMgQ2hhbGxlbmdlIDYgKEFkdmFuY2VkKQoKU2FtcGxlIDIwIHBhc3NlbmdlcnMgZnJvbSB0aGUgdGl0YW5pYyBhbmQgY2FsY3VsYXRlIHRoZSBtZWFuX2FnZS4gRG8gdGhpcyAxMDAwMCB0aW1lcyAoaW4gcGFyYWxsZWwpLgoKVXNlIHRoZSB0aXRhbmljIERhdGFTZXQ6IGB0aXRhbmljIDwtIHJlYWQuY3N2KCJodHRwczovL2dvby5nbC80R3FzbnoiKWAKCi0gRG9lcyB0aGUgY29kZSBydW4gZmFzdGVyIGlmIHJ1biBpbiBwYXJhbGxlbD8KCipUaXA6IFJlbW92ZSBOQSdzIHVzaW5nIGBuYS5vbWl0KHRpdGFuaWMpYCoKCiMjIyBTb2x1dGlvbgpgYGB7cn0KCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkocGFyYWxsZWwpCnRpdGFuaWMgPSByZWFkLmNzdigiaHR0cHM6Ly9nb28uZ2wvNEdxc256IikKdGl0YW5pYz0gbmEub21pdCh0aXRhbmljKQoKc2FtcGxlX21lYW4gPWZ1bmN0aW9uKGkpewogIG1lYW5fQWdlID0gdGl0YW5pYyAlPiUgCiAgICBzYW1wbGVfbigyMCkgJT4lIAogICAgc3VtbWFyaXNlKG1lYW5fYWdlPW1lYW4oQWdlKSkgJT4lIAogICAgcHVsbChtZWFuX2FnZSkKICByZXR1cm4obWVhbl9BZ2UpCn0KCnN5c3RlbS50aW1lKGxhcHBseSgxOjEwMDAwLCBzYW1wbGVfbWVhbikpCgpzeXN0ZW0udGltZShtY2xhcHBseSgxOjEwMDAwLCBzYW1wbGVfbWVhbikpCgpgYGAKCgoKCg==