--- title: "hysplit" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{hysplit} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(rtorf) library(data.table) library(rslurm) ``` Assuming you have a receptor file as shown in the previous articles/vignettes: 1. Get sure you know your working directory. You can use `getwd()` to check it. You can set it as ```{r, eval = F} setwd("/path/to/myfootprints/") ``` Read receptor ```{r, eval = F} x <- fread("receptor.csv") ``` Rename variables and add the expected footprint id name ```{r, eval = F} x$altitude <- x$altitude_final x[, id := obs_footname(year = year, month = month, day = day, hour = hour, minute = minute, lat = latitude, lon = longitude, alt = altitude)] ``` for instance ```{r, eval = F} x$id[1] [1] "2020x12x30x09x54x03.2133Nx030.9131Ex00497" ``` Which include - year (for digits) - x - month (two digits) - x - day (two digits) - x - hour (two digits) - x - minute (two digits) - x - latitude (floating point 6 digits, 4 decimals) - Capital N or S - longitude (floating point 7 digits, 4 decimals) - Capital for E or W - x - altitude (five digits) Now we need to create the directories where we will be running hysplit ```{r, eval = F} x[, dir := paste0("/Path/To/Footprints/", year, "/", sprintf("%02d", month), "/tmp_", id)] ``` For instance ```{r, eval = F} x$dir[1] [1] "/Path/To/Footprints/2020/12/tmp_2020x12x30x09x54x03.2133Nx030.9131Ex00497" ``` The structure is: - path - year (four digits) - month (2 digits) - tmp_ + id Now we can create the directories recursively. If you run this command again you will receive warning that the directory exists and nothing will be overwritten ```{r, eval = F} invisible(sapply(x$dir, dir.create, recursive = T)) ``` We can add an index for each row ```{r, eval = F} x[, idn := 1:.N] ``` Now we can create the SETUP in each directory ```{r, eval = F} for(i in seq_along(x$dir)) { obs_hysplit_setup(setup = paste0(x$dir[i], "/SETUP.CFG")) } ``` Now we can create the ASCDATA in each directory ```{r, eval = F} for(i in seq_along(x$dir)) { obs_hysplit_ascdata(ascdata = paste0(x$dir[i], "/ASCDATA.CFG")) } ``` And now the same with the CONTROL files: ```{r, eval = F} for(i in seq_along(x$dir)) { # print(paste0(x$dir[i], "/CONTROL")) obs_hysplit_control(df = x[i], top_model_domain = 10000, met = "gfs0p25", metpath = "/Path/To/metfiles/gfs0p25/", emissions_rate = 0, hour_emissions = 0.01, center_conc_grids = c(5, 45), grid_spacing = c(1, 1), grid_span = c(69, 69), height_vert_levels = 50, sampling_interval_type = c(0, 1, 0), control = paste0(x$dir[i], "/CONTROL")) } ``` We can order the data.table by id, just in case ```{r, eval = F} setorderv(x, "idn") ``` We have some internal R scripts to transform hysplit outputs into a NetCDF file. We are currently porting the functions into an R package, well documented, available and easy to use. So we need to check if the file exists: ```{r, eval = F} x[, nc := paste0(dir, "/hysplit", id, ".nc")] x[, nc_exists := file.exists(paste0(dir, "/hysplit", id, ".nc"))] ``` for instance: ```{r, eval = F} x$nc[1] [1] "/Path/To/Footprints/2020/12/tmp_2020x12x30x09x54x03.2133Nx030.9131Ex00497/hysplit2020x12x30x09x54x03.2133Nx030.9131Ex00497.nc" ``` We can check the number of NetCDF generated: ```{r, eval = F} x[,.N, by = nc_exists] x <- x[nc_exists == FALSE] if(nrow(x) == 0) stop("ALL FOOTPRINTS GENERATED") ``` Now we can write the function to run parallel hysplit: ```{r, eval = F} fx <- function(dir, idn){ torf <- " *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@& %@@@@@. %@@@@@ @@@@ @ ,@@@ @@@ *@@@@ %@@& @@@ @@@@@@.@@@@@@@@@@@# @@@ @@@ @@@@%@@* @@@@@@@@@ /@@@@@( ,@@, @@@ @@@@@ @@@@@@@@@@@# ,@( * ,@@, @@@ @@@@@@% (@@@@@@@@@ ,@@, @@@ &@@@@@@@@ @@@@@@@@@, @@* (@@@@@@@# ,@@, @@@ @@@@@@@@@@ @@@@@@@@@@@( ,@@, @@@ @@@@@@@@@ @@@@@@@@@@@@@@@& ,@@, @@@ @. .@@@@@@, @@@@@@@. ,@@, @@@ @@@ @@@@@ %@@ @@@ ,@@, @@@ @@@@. & / ,@@, @@@ ,@@, @@@ ,@@, @@@ @@@@@@@@@* @@@@@@@@@@ @@@@@@@@@@ @@@@@@@@@@ ,@@, @@@ @@@ @@@ @@@@ @@@ @@@ @@@ ,@@, @@@ @@@ #@@@ @@@ @@@@@@@@@. @@@@@@@. ,@@, @@@ @@@ @@@@* @@@@ @@@ @@@@ @@@ @@@ @@@# @@@ (@@@@@@@ @@@ @@@. @@@ @@@ @@@@ .@@@@ %@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ " setwd(dir) system("/Path/To/hysplit/exec/hycs_std") sink("log.txt") cat("") cat(torf) cat("\n\n") utils::sessionInfo() cat("\n\n") cat("Receptor:\n") cat("receptor.csv\n\n") cat("logs:\n") cat("_rslurm_rtorf_job/slurm_*.out\n\n") sink() rdir="/Path/To/rscripts/" system( paste0('Rscript ', rdir, '/hysplit_netcdf.r ', '--rsource=', rdir, ' --gridspecs=', rdir, '/gridspecs_uganda.txt', ' --plotfoot', ' --footnearfield', ' --thinpart', ' --outpath=', dir, '/')) } ``` and to submit parallel jobs we use: ```{r, eval = F} sjob <- slurm_apply(fx, x[, c("dir", "idn")], jobname = 'rtorf_job', nodes = 8, cpus_per_node = 4, submit = T) ``` You can use `submit = FALSE` and check and edit the script ```{r, eval = F} file.edit("_rslurm_rtorf_job/submit.sh") #!/bin/bash # #SBATCH --array=0-7 #SBATCH --cpus-per-task=4 #SBATCH --job-name=rtorf_job #SBATCH --output=slurm_%a.out /Path/To/R/bin/Rscript --vanilla slurm_run.R ```