Data Frames: Exercice Solutions

cars$wt <- cars$wt * 0.45
cars <- cars[cars$am == 1,]
cars[cars$am == 1,]$gear <- -1
cars[cars$cyl > 6, "model"]
## [1] "Ford Pantera L" "Maserati Bora"
sum(cars[cars$cyl > 6,"hp"])
## [1] 599
cars[,setdiff(colnames(cars), "disp")]
##             model  mpg cyl  hp drat      wt am gear
## 1       Mazda RX4 21.0   6 110 3.90 1.17900  1   -1
## 2   Mazda RX4 Wag 21.0   6 110 3.90 1.29375  1   -1
## 3      Datsun 710 22.8   4  93 3.85 1.04400  1   -1
## 18       Fiat 128 32.4   4  66 4.08 0.99000  1   -1
## 19    Honda Civic 30.4   4  52 4.93 0.72675  1   -1
## 20 Toyota Corolla 33.9   4  65 4.22 0.82575  1   -1
## 26      Fiat X1-9 27.3   4  66 4.08 0.87075  1   -1
## 27  Porsche 914-2 26.0   4  91 4.43 0.96300  1   -1
## 28   Lotus Europa 30.4   4 113 3.77 0.68085  1   -1
## 29 Ford Pantera L 15.8   8 264 4.22 1.42650  1   -1
## 30   Ferrari Dino 19.7   6 175 3.62 1.24650  1   -1
## 31  Maserati Bora 15.0   8 335 3.54 1.60650  1   -1
## 32     Volvo 142E 21.4   4 109 4.11 1.25100  1   -1

More Matrices: Exercise Solution

most_deviant <- function(x) {
  
  most_deviant_in_row <- function(row) {
    row[which.max(abs(median(row) - row))]
  }
  
  apply(x,1,most_deviant_in_row)
  
}

z <- rbind(c(4,7,6,1),c(0,9,0,2),c(5,3,0,7))
most_deviant(z)
## [1] 1 9 0

Walk through to solution

Here’s the kind of sequence I would go through to get to the solution:

#Walk through to solution 

row <- z[1,]
row
## [1] 4 7 6 1
??median
median(row) - row
## [1]  1 -2 -1  4
abs(median(row) - row)
## [1] 1 2 1 4
max(abs(median(row) - row))
## [1] 4
which.max(abs(median(row)) -row )
## [1] 4
row[which.max(abs(median(row) - row))]
## [1] 1
most_deviant_in_row <- function(row) {
  row[which.max(abs(median(row) - row))]
}

apply(z,1,most_deviant_in_row)
## [1] 1 9 0
most_deviant <- function(x) {
  
  most_deviant_in_row <- function(row) {
    m <- max(abs(median(row) - row))
    row[which.max(abs(median(row) - row))]
  }
  
  apply(x,1,most_deviant_in_row)
  
}

lapply and sapply: Exercise Solutions

t <- list(23,6,4,9)
a <- lapply(t,sample)

censor <- function(r) {
  ifelse(r > 10, 10, r)
}

lapply(a, censor)
## [[1]]
##  [1]  1 10 10 10 10 10 10  2 10  5  6 10  8 10 10  3 10  4 10 10  9  7 10
## 
## [[2]]
## [1] 2 5 4 6 1 3
## 
## [[3]]
## [1] 4 1 3 2
## 
## [[4]]
## [1] 8 3 4 7 1 6 9 2 5
standardise <- function(x) {
    x / mean(x)
}

lapply(a, standardise)
## [[1]]
##  [1] 0.08333333 1.83333333 1.16666667 1.58333333 1.50000000 1.66666667
##  [7] 1.75000000 0.16666667 1.25000000 0.41666667 0.50000000 1.00000000
## [13] 0.66666667 0.91666667 1.33333333 0.25000000 0.83333333 0.33333333
## [19] 1.08333333 1.41666667 0.75000000 0.58333333 1.91666667
## 
## [[2]]
## [1] 0.5714286 1.4285714 1.1428571 1.7142857 0.2857143 0.8571429
## 
## [[3]]
## [1] 1.6 0.4 1.2 0.8
## 
## [[4]]
## [1] 1.6 0.6 0.8 1.4 0.2 1.2 1.8 0.4 1.0
cars <- as.data.frame(
  read.table("http://git.io/vmFKV",header=T, stringsAsFactors = F)
  ) 

numeric_columns <- sapply(cars, is.numeric)

sapply(cars[,numeric_columns], function(x) {
  cars$model[which.max(x)]
})
##                   mpg                   cyl                  disp 
##      "Toyota Corolla"   "Hornet Sportabout"  "Cadillac Fleetwood" 
##                    hp                  drat                    wt 
##       "Maserati Bora"         "Honda Civic" "Lincoln Continental" 
##                    am                  gear 
##           "Mazda RX4"       "Porsche 914-2"
  1. b
library(stringr)
source(
  "http://markwestcott34.github.io/economics/teaching/ss2015/R/code/numbers2words.r"
  )

d <- as.data.frame(lapply(1:100, sample, 50,replace=T))
names(d) <- numbers2words(1:100)

withDroppedColumns <- d

withDroppedColumns[which(
  str_detect(names(d),"two")
)] <- list(NULL)

head(names(withDroppedColumns))
## [1] "one"   "three" "four"  "five"  "six"   "seven"
  1. c
removeMatchingElements <- function(l, test_strings)
{
  t <- sapply(test_strings, function(x) { str_detect(names(l),x) })
  l[as.logical(apply(t, 1, max))] <- list(NULL)
}

removeMatchingElements(d, c("one","two","three",
                                 "four","five","six",
                                 "seven","eight","nine"))

Debugging: Exercise Solutions

Exercise 1

findruns <- function(x,k) {
  n <- length(x)
  runs <- NULL
  for (i in 1:(n-k+1)) {
    if (all(x[i:(i+k-1)]==1))
      runs <- c(runs,i)
  }
  return(runs) 
}

Exercise 2

imin <- function(x) {
  lx <- length(x)
  
  i <- x[lx] #number of row from original matrix
  
  # find the minimum entry in the part of the row corresponding to the upper-right triangle 
  j <- which.min(x[(i+1):(lx-1)])
  
  # j is the position of the minimum entry in the x[(i+1):(lx-1)]
  # convert to the position of the minimum entry in the row
  k <- i+j
  
  #return the position of the entry and the value
  return( c(k, x[k]) )
}

mind <- function(d) {
  
  #make sure we are being passed a symetric matrix
  stopifnot(isSymmetric.matrix(d))
  n <- nrow(d)
  
  #add a column to the matrix which identifies the row number
  dd <- cbind(d,1:n)
  
  #find the minimum value and position of that value in each row (apart from the last)
  wmins <- apply(dd[-n,],1,imin)
  
  #wmins now contains two rows
  #row 1 are indices to the lowest value of each original row
  #row 2 the actual value
  
  #now find which original row contained the overall lowest number
  i <- which.min(wmins[2,])
  
  #and the lowest number in that row
  j <- wmins[1,i]
  
  #return the overall minimum value and the position in the matrix  
  return(c(d[i,j],i,j))
}