Read an excel file with separate range of cells

306 views Asked by At

I am trying to import a separate range of cells from an xlsx file using the most common functions (at least for me). I know you can import data and the select, but I guess it's possible to do it the other way

#Option 1
dat <- readxl::read_xlsx("C:/Users/javie/Documents/Estadística/Vitamina B12/Exoma_B12.xlsx", col_names = F, sheet = 2, skip = 1, range = cell_cols(c("A:L", "AP:BA")))

#Option 2
dat <- readxl::read_xlsx("C:/Users/javie/Documents/Estadística/Vitamina B12/Exoma_B12.xlsx", col_names = F, sheet = 2, skip = 1, range = c("A:L", "AP:BA"))


#Option 3
dat <- readxl::read_excel("C:/Users/javie/Documents/Estadística/Vitamina B12/Exoma_B12.xlsx", col_names = F, sheet = 2, skip = 1, range = cell_cols(c("A:L", "AP:BA")))

#Option 4
dat <- readxl::read_excel("C:/Users/javie/Documents/Estadística/Vitamina B12/Exoma_B12.xlsx", col_names = F, sheet = 2, skip = 1, range = c("A:L", "AP:BA"))

I guess this is easy but as far as I was looking through SO there is no a clear solution

2

There are 2 answers

1
Otto Kässi On

readxl almost covers this usecase (link), but does not support non-contiguous columns (i.e. c('a:b','e:f') )

For example:

library(readxl)
library(cellranger)

example_file <- readxl_example("datasets.xlsx")
read_excel(example_excel, range=cell_cols('c:d'), sheet=2, skip=1, col_names=TRUE) -> foo

Output:

> foo
# A tibble: 32 × 2
    disp    hp
   <dbl> <dbl>
 1  160    110
 2  160    110
 3  108     93
 4  258    110
 5  360    175
 6  225    105
 7  360    245
 8  147.    62
 9  141.    95
10  168.   123
# … with 22 more rows
# ℹ Use `print(n = ...)` to see more rows

Openxlsx::read.xlsx also kind of works, but it messes up the column ordering:

> read.xlsx(example_excel, sheet=2, cols=c(1,3,4)) 

        mpg  disp  hp
    1  21.0 160.0 110
    2  21.0 160.0 110
    3  22.8 108.0  93
    4  21.4 258.0 110
    5  18.7 360.0 175
    6  18.1 225.0 105
    7  14.3 360.0 245

mpg is column 1 in the raw data, and it is always returned as the left-most column.

As a third option, it is also relatively straightforward to wrap read_excel into another function, that supports non-contiguous columns. Here's my attempt at that:

library(tidyverse); library(cellranger); library(readxl)
expand_sequence <- function(vec) {
  vec <- strsplit(vec, ":")
  
  vec <- unlist(lapply(vec, function(x) {
    if(length(x) > 1){
      sapply(seq(from = as.integer(charToRaw(as.character(x[1]))), 
                 to = as.integer(charToRaw(as.character(x[2])))), 
             function(y) rawToChar(as.raw(y)))
    } else {
      as.character(x)
    }
  }))
  
  return(vec)
}

    better_read_excel <- function(file, columns, sheet, skip){
  columnvec <- expand_sequence(columns)
  
  data_list <- lapply(columnvec, function(i) {
    if(i == columnvec[1]) {
      table <- read_excel(file, range=cell_cols(i), sheet=sheet, skip=skip)
    } else {
      table <- read_excel(file, range=cell_cols(i), sheet=sheet, skip=skip, col_names=TRUE)
    }
    return(table)
  })
  
  table <- do.call(cbind, data_list)
  
  return(table)
}

better_read_excel(example_file, columns=c('a:c','h','i'), sheet=2, skip=1)

> better_read_excel(example_file, columns=c('a:c','h','i'), sheet=2, skip=1)
    mpg cyl  disp vs am                                                                                                               
1  21.0   6 160.0  0  1
2  21.0   6 160.0  0  1
3  22.8   4 108.0  1  1
4  21.4   6 258.0  1  0
5  18.7   8 360.0  0  0
0
Jan Marvin On

If I understand the question correctly, you want to read specific columns of a workbook. This can be achieved using openxlsx2 release 0.6.1 as follows:

library(openxlsx2)

# create example file
tmp <- temp_xlsx()
write_xlsx(list(head(iris), head(mtcars)), tmp)

# read from second sheet, start at row 2 and avoid column names
# column L is included even though it is not written
read_xlsx(tmp, sheet = 2, startRow = 2, colNames = FALSE, cols = c("A:C", "J:L"))
#>      A B   C J K  L
#> 2 21.0 6 160 4 4 NA
#> 3 21.0 6 160 4 4 NA
#> 4 22.8 4 108 4 1 NA
#> 5 21.4 6 258 3 1 NA
#> 6 18.7 8 360 3 2 NA
#> 7 18.1 6 225 3 1 NA