---
title: "hysplit"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{hysplit}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
```{r setup}
library(rtorf)
library(data.table)
library(rslurm)
```
Assuming you have a receptor file as shown in the previous articles/vignettes:
1. Get sure you know your working directory. You can use `getwd()` to check it. You can set it as
```{r, eval = F}
setwd("/path/to/myfootprints/")
```
Read receptor
```{r, eval = F}
x <- fread("receptor.csv")
```
Rename variables and add the expected footprint id name
```{r, eval = F}
x$altitude <- x$altitude_final
x[, id := obs_footname(year = year,
month = month,
day = day,
hour = hour,
minute = minute,
lat = latitude,
lon = longitude,
alt = altitude)]
```
for instance
```{r, eval = F}
x$id[1]
[1] "2020x12x30x09x54x03.2133Nx030.9131Ex00497"
```
Which include
- year (for digits)
- x
- month (two digits)
- x
- day (two digits)
- x
- hour (two digits)
- x
- minute (two digits)
- x
- latitude (floating point 6 digits, 4 decimals)
- Capital N or S
- longitude (floating point 7 digits, 4 decimals)
- Capital for E or W
- x
- altitude (five digits)
Now we need to create the directories where we will be running
hysplit
```{r, eval = F}
x[, dir := paste0("/Path/To/Footprints/",
year,
"/",
sprintf("%02d",
month),
"/tmp_",
id)]
```
For instance
```{r, eval = F}
x$dir[1]
[1] "/Path/To/Footprints/2020/12/tmp_2020x12x30x09x54x03.2133Nx030.9131Ex00497"
```
The structure is:
- path
- year (four digits)
- month (2 digits)
- tmp_ + id
Now we can create the directories recursively. If you run this command
again you will receive warning that the directory exists and nothing will be
overwritten
```{r, eval = F}
invisible(sapply(x$dir, dir.create, recursive = T))
```
We can add an index for each row
```{r, eval = F}
x[, idn := 1:.N]
```
Now we can create the SETUP in each directory
```{r, eval = F}
for(i in seq_along(x$dir)) {
obs_hysplit_setup(setup = paste0(x$dir[i], "/SETUP.CFG"))
}
```
Now we can create the ASCDATA in each directory
```{r, eval = F}
for(i in seq_along(x$dir)) {
obs_hysplit_ascdata(ascdata = paste0(x$dir[i], "/ASCDATA.CFG"))
}
```
And now the same with the CONTROL files:
```{r, eval = F}
for(i in seq_along(x$dir)) {
# print(paste0(x$dir[i], "/CONTROL"))
obs_hysplit_control(df = x[i],
top_model_domain = 10000,
met = "gfs0p25",
metpath = "/Path/To/metfiles/gfs0p25/",
emissions_rate = 0,
hour_emissions = 0.01,
center_conc_grids = c(5, 45),
grid_spacing = c(1, 1),
grid_span = c(69, 69),
height_vert_levels = 50,
sampling_interval_type = c(0, 1, 0),
control = paste0(x$dir[i], "/CONTROL"))
}
```
We can order the data.table by id, just in case
```{r, eval = F}
setorderv(x, "idn")
```
We have some internal R scripts to transform hysplit outputs into a NetCDF file.
We are currently porting the functions into an R package, well documented,
available and easy to use. So we need to check if the file exists:
```{r, eval = F}
x[, nc := paste0(dir, "/hysplit", id, ".nc")]
x[, nc_exists := file.exists(paste0(dir, "/hysplit", id, ".nc"))]
```
for instance:
```{r, eval = F}
x$nc[1]
[1] "/Path/To/Footprints/2020/12/tmp_2020x12x30x09x54x03.2133Nx030.9131Ex00497/hysplit2020x12x30x09x54x03.2133Nx030.9131Ex00497.nc"
```
We can check the number of NetCDF generated:
```{r, eval = F}
x[,.N, by = nc_exists]
x <- x[nc_exists == FALSE]
if(nrow(x) == 0) stop("ALL FOOTPRINTS GENERATED")
```
Now we can write the function to run parallel hysplit:
```{r, eval = F}
fx <- function(dir, idn){
torf <- "
*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@&
%@@@@@. %@@@@@
@@@@ @ ,@@@
@@@ *@@@@ %@@&
@@@ @@@@@@.@@@@@@@@@@@# @@@
@@@ @@@@%@@* @@@@@@@@@ /@@@@@( ,@@,
@@@ @@@@@ @@@@@@@@@@@# ,@( * ,@@,
@@@ @@@@@@% (@@@@@@@@@ ,@@,
@@@ &@@@@@@@@ @@@@@@@@@, @@* (@@@@@@@# ,@@,
@@@ @@@@@@@@@@ @@@@@@@@@@@( ,@@,
@@@ @@@@@@@@@ @@@@@@@@@@@@@@@& ,@@,
@@@ @. .@@@@@@, @@@@@@@. ,@@,
@@@ @@@ @@@@@ %@@ @@@ ,@@,
@@@ @@@@. & / ,@@,
@@@ ,@@,
@@@ ,@@,
@@@ @@@@@@@@@* @@@@@@@@@@ @@@@@@@@@@ @@@@@@@@@@ ,@@,
@@@ @@@ @@@ @@@@ @@@ @@@ @@@ ,@@,
@@@ @@@ #@@@ @@@ @@@@@@@@@. @@@@@@@. ,@@,
@@@ @@@ @@@@* @@@@ @@@ @@@@ @@@ @@@
@@@# @@@ (@@@@@@@ @@@ @@@. @@@ @@@
@@@@ .@@@@
%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
"
setwd(dir)
system("/Path/To/hysplit/exec/hycs_std")
sink("log.txt")
cat("")
cat(torf)
cat("\n\n")
utils::sessionInfo()
cat("\n\n")
cat("Receptor:\n")
cat("receptor.csv\n\n")
cat("logs:\n")
cat("_rslurm_rtorf_job/slurm_*.out\n\n")
sink()
rdir="/Path/To/rscripts/"
system(
paste0('Rscript ',
rdir,
'/hysplit_netcdf.r ',
'--rsource=',
rdir,
' --gridspecs=',
rdir,
'/gridspecs_uganda.txt',
' --plotfoot',
' --footnearfield',
' --thinpart',
' --outpath=',
dir,
'/'))
}
```
and to submit parallel jobs we use:
```{r, eval = F}
sjob <- slurm_apply(fx,
x[, c("dir", "idn")],
jobname = 'rtorf_job',
nodes = 8,
cpus_per_node = 4,
submit = T)
```
You can use `submit = FALSE` and check and edit the script
```{r, eval = F}
file.edit("_rslurm_rtorf_job/submit.sh")
#!/bin/bash
#
#SBATCH --array=0-7
#SBATCH --cpus-per-task=4
#SBATCH --job-name=rtorf_job
#SBATCH --output=slurm_%a.out
/Path/To/R/bin/Rscript --vanilla slurm_run.R
```