Reading and writing files and the concept of Working Directory

Silvie Cinková

2025-07-24

Working directory

  • folder from which R sees other files and folders


Interactive control of Working Directory location

Print path to your current Working Directory

 getwd() # in the console/R file
[1] "/lnet/aic/personal/cinkova/NPFL112_2025_ZS"

Set a different Working Directory

  • ~ your Home

  • .. one folder up

  • . current folder

#setwd("~/folder/subfolder/")

RStudio Projects

  • .Rproj file stores project configuration

  • When you open this project next time, it tries to restore the work space from last time.

Project List in RStudio

Working directory in Quarto code chunks

  • working directory = location of the qmd script file
getwd()
[1] "/lnet/aic/personal/cinkova/SCRIPTS.NPFL112"

How to render your copy of this file

  • store part of file path in a variable

  • combine it with file names by using file.path()

project_path <- "~/NPFL112_2025_ZS" # Silvie's path. 
# Replace with something that would work for you. 
file.path(project_path, "DATA.NPFL112")
[1] "~/NPFL112_2025_ZS/DATA.NPFL112"
datasaving_folder <- "DATA.NPFL112" # change this
# you must create your own folder you can write in!

List files from DATA.NPFL112

list.files(file.path(project_path, "DATA.NPFL112"))
 [1] "billionaires_combined.tsv"                                                               
 [2] "DataGeographies-v2-by-Gapminder.xlsx"                                                    
 [3] "ddf--gapminder--systema_globalis_files_from_GitHubAPI.json"                              
 [4] "Founders_Network.csv"                                                                    
 [5] "gapminder_billionaires_ddf--entities--person.csv"                                        
 [6] "gapminder_countries.tsv"                                                                 
 [7] "gapminder_ddf--concepts.csv"                                                             
 [8] "gapminder_ddf--datapoints--daily_income--by--person--time.csv"                           
 [9] "gapminder_firstmarriage.csv"                                                             
[10] "gapminder_geonames.xlsx"                                                                 
[11] "gapminder_hourly_labour_cost_constant_2017_                       usd--by--geo--time.csv"
[12] "gapminder_hourly_labour_cost_constant_2017_usd--by--geo--time.csv"                       
[13] "gapminder_laborcost_cze_deu.csv"                                                         
[14] "gapminder_metadata_filenames.tsv"                                                        
[15] "GitHubURLs_Gapminder_SystemaGlobalis.tsv"                                                
[16] "jrc_1.tsv"                                                                               
[17] "jrc_2.tsv"                                                                               
[18] "jrc_3.tsv"                                                                               
[19] "jrc_4.tsv"                                                                               
[20] "jrc_latin_4.tsv"                                                                         
[21] "jrc_latin.tsv"                                                                           
[22] "JRC_Names"                                                                               
[23] "JRC_Names.tsv"                                                                           
[24] "migrants.tsv"                                                                            
[25] "titanic.csv"                                                                             

Open a csv file from DATA.NPFL112

marriage <- read.csv(file = file.path(project_path, datasaving_folder, "gapminder_firstmarriage.csv"))
head(marriage)
      country    X2005
1 Afghanistan 17.83968
2     Albania 23.32651
3     Algeria 29.60000
4      Angola       NA
5   Argentina 23.26396
6     Armenia 22.98603

Individual files from (GitHub) URL

GitHub default view

Switched to raw file URL

Download a file (e.g. from GitHub)

library(glue) # enables multiline with \\
URL <- glue("https://raw.githubusercontent.com/open-numbers/ddf--gapminder--systema_globalis/refs/heads/master/countries-etc-datapoints/ddf--datapoints--hourly_labour_cost_constant_2017_usd--by--geo--time.csv")
my_destination <- glue(file.path(project_path, datasaving_folder, "\\
gapminder_hourly_labour_cost_constant_2017_\\
usd--by--geo--time.csv"))
download.file(
  url = URL,
  destfile = my_destination
  )

https://www.gapminder.org/

Gapminder

Introducing Gapminder

Read files with base R or tidyverse

  • File too big to open in a text editor?

  • Inspect it reading it as text lines (first 3 lines)

mypath <- glue(file.path(project_path, datasaving_folder, "gapminder_hourly_labour_cost_constant_2017_\\
usd--by--geo--time.csv"))
library(readr)
read_lines(
  file = mypath, 
  n_max = 3)
[1] "geo,time,hourly_labour_cost_constant_2017_usd"
[2] "arg,2011,0.92"                                
[3] "arg,2012,1.04"                                
readLines(
  con = mypath,
  n = 3)
[1] "geo,time,hourly_labour_cost_constant_2017_usd"
[2] "arg,2011,0.92"                                
[3] "arg,2012,1.04"                                

Reading a table with readr

  • read_csv, read_csv2, read_tsv: tailored to the common separators ,, ;, tab

  • read_delim : you name the separator (aka delimiter), more arguments

read_csv(file = mypath, 
         n_max = 3) #just top 3 rows
# A tibble: 3 × 3
  geo    time hourly_labour_cost_constant_2017_usd
  <chr> <dbl>                                <dbl>
1 arg    2011                                 0.92
2 arg    2012                                 1.04
3 arm    2011                                 4.23

Other arguments in read_csv

read_csv(file = mypath, 
         col_names = c("country", "year", "USD_hour_2017"), 
         n_max = 3)
# A tibble: 3 × 3
  country year  USD_hour_2017                       
  <chr>   <chr> <chr>                               
1 geo     time  hourly_labour_cost_constant_2017_usd
2 arg     2011  0.92                                
3 arg     2012  1.04                                

Read directly from URL

URL2 <- glue("https://raw.githubusercontent.com/open-numbers/ddf--gapminder--\\
systema_globalis/refs/heads/master/countries-etc-datapoints/\\
ddf--datapoints--hourly_labour_cost_constant_2017_usd--by--geo--\\
time.csv")
read_csv(file = URL2, 
         n_max = 3)
# A tibble: 3 × 3
  geo    time hourly_labour_cost_constant_2017_usd
  <chr> <dbl>                                <dbl>
1 arg    2011                                 0.92
2 arg    2012                                 1.04
3 arm    2011                                 4.23

Download an Excel file

URL3 <- glue("https://docs.google.com/spreadsheets/d/1qHalit8s\\
XC0R8oVXibc2wa2gY7bkwGzOybEMTWp-08o/export?format=xlsx")
download.file(url = URL3, 
              destfile = 
                file.path(project_path, datasaving_folder, "gapminder_geonames.xlsx"), 
              mode = "wb") # mind the mode

Read Excel

  • readxl reads only local file paths, not URLs.
library(readxl)
read_xlsx(path = file.path(project_path, datasaving_folder, "gapminder_geonames.xlsx"), 
          n_max = 3) # just three rows
# A tibble: 3 × 7
  Data: Geographies — v…¹ ...2  ...3  Free data from www.g…² ...5  id    version
  <chr>                   <chr> <lgl> <chr>                  <lgl> <chr> <chr>  
1 Updated: July 1, 2021   <NA>  NA    CC BY 4.0 LICENCE      NA    geo   v2     
2 Concept:                Geog… NA    Are you seeing this o… NA    <NA>  <NA>   
3 Unit:                   <NA>  NA    gapm.io/datageo        NA    <NA>  <NA>   
# ℹ abbreviated names: ¹​`Data: Geographies — v2`,
#   ²​`Free data from www.gapminder.org`
# readxl::read_xlsx(path = "datasets_ATRIUM/DataGeographies-v2-by-Gapminder.xlsx") #the same file

Excel sheets listed

  • read_xlsx reads the first sheet by default
  • Have the spreadsheets listed:
readxl::excel_sheets(path = file.path(project_path, datasaving_folder, "gapminder_geonames.xlsx"))
[1] "ABOUT"                 "list-of-countries-etc" "list-of-regions"      
[4] "list-of-income-levels" "global"                "geo-names"            
readxl::read_xlsx(path = file.path(project_path, datasaving_folder, "gapminder_geonames.xlsx"), sheet = 2, 
                  n_max = 3) # or sheet = "list-of-countries-etc"
# A tibble: 3 × 13
  geo   name    four_regions eight_regions six_regions members_oecd_g77 Latitude
  <chr> <chr>   <chr>        <chr>         <chr>       <chr>               <dbl>
1 aus   Austra… asia         east_asia_pa… east_asia_… oecd                -25  
2 brn   Brunei  asia         east_asia_pa… east_asia_… g77                   4.5
3 khm   Cambod… asia         east_asia_pa… east_asia_… g77                  13  
# ℹ 6 more variables: Longitude <dbl>, `UN member since` <dttm>,
#   `World bank region` <chr>, `World bank, 4 income groups 2017` <chr>,
#   `World bank, 3 income groups 2017` <chr>, UNHCR <chr>

Google sheets

  • inspect it manually and pick one worksheet
library(googlesheets4)
shURL <- glue("https://docs.google.com/spreadsheets/d/1qHalit8sXC\\
              0R8oVXibc2wa2gY7bkwGzOybEMTWp-08o/edit?gid=425865495#gid=425865495")
gs4_deauth() # skip logging in at GoogleDrive
googlesheets4::read_sheet(shURL, sheet = 2, 
                          n_max = 3)
# A tibble: 3 × 13
  geo   name    four_regions eight_regions six_regions members_oecd_g77 Latitude
  <chr> <chr>   <chr>        <chr>         <chr>       <chr>               <dbl>
1 aus   Austra… asia         east_asia_pa… east_asia_… oecd                -25  
2 brn   Brunei  asia         east_asia_pa… east_asia_… g77                   4.5
3 khm   Cambod… asia         east_asia_pa… east_asia_… g77                  13  
# ℹ 6 more variables: Longitude <dbl>, `UN member since` <dttm>,
#   `World bank region` <chr>, `World bank, 4 income groups 2017` <chr>,
#   `World bank, 3 income groups 2017` <chr>, UNHCR <chr>

Saving tabular files

gapminder_countries <- readxl::read_xlsx(file.path(project_path, 
  datasaving_folder, "gapminder_geonames.xlsx"),
                                         sheet = 2, 
                                         n_max = 3)
readr::write_tsv(x = gapminder_countries, 
                 file = file.path(project_path, 
                   datasaving_folder, "gapminder_countries.tsv"))

Create a folder

  • create a file to save your exercise scripts
dir.create(path = "HOMEWORK",
mode = '750', recursive = TRUE )

List files into a vector

list.files(path = project_path, recursive = FALSE, 
  include.dirs = FALSE, pattern = "qmd", full.names = TRUE)
character(0)
  • list files in a folder

  • just those with qmd in their names

  • recursive: search in subfolders?

Set up a file path

  • creates a character vector, not a file!
file.path(project_path, "a_new_file.XXX")
[1] "~/NPFL112_2025_ZS/a_new_file.XXX"