Title: | Dynamic Function-Oriented 'Make'-Like Declarative Pipelines |
---|---|
Description: | Pipeline tools coordinate the pieces of computationally demanding analysis projects. The 'targets' package is a 'Make'-like pipeline tool for statistics and data science in R. The package skips costly runtime for tasks that are already up to date, orchestrates the necessary computation with implicit parallel computing, and abstracts files as R objects. If all the current output matches the current upstream code and data, then the whole pipeline is up to date, and the results are more trustworthy than otherwise. The methodology in this package borrows from GNU 'Make' (2015, ISBN:978-9881443519) and 'drake' (2018, <doi:10.21105/joss.00550>). |
Authors: | William Michael Landau [aut, cre] , Matthew T. Warkentin [ctb], Mark Edmondson [ctb] , Samantha Oliver [rev] , Tristan Mahr [rev] , Eli Lilly and Company [cph, fnd] |
Maintainer: | William Michael Landau <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.8.0.9009 |
Built: | 2024-11-08 18:21:30 UTC |
Source: | https://github.com/ropensci/targets |
A pipeline toolkit for Statistics and data science in R,
the targets
package brings function-oriented programming to
Make-like declarative pipelines. targets
orchestrates a pipeline
as a graph of dependencies,
skips steps that are already up to date, runs the necessary
computations with optional parallel workers, abstracts files as
R objects, and provides tangible evidence that the results are
reproducible given the underlying code and data.
The methodology in this package
borrows from GNU Make (2015, ISBN:978-9881443519)
and drake
(2018, doi:10.21105/joss.00550).
Other help:
tar_reprex()
,
use_targets()
,
use_targets_rmd()
Return TRUE
if called in a target or _targets.R
and
the pipeline is running.
tar_active()
tar_active()
Logical of length 1, TRUE
if called in a target or _targets.R
and the pipeline is running (FALSE
otherwise).
Other utilities:
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_active() # FALSE tar_script({ library(targets) library(tarchetypes) message("Pipeline running? ", tar_active()) tar_target(x, tar_active()) }) tar_manifest() # prints "Pipeline running? FALSE" tar_make() # prints "pipeline running? TRUE" tar_read(x) # TRUE }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_active() # FALSE tar_script({ library(targets) library(tarchetypes) message("Pipeline running? ", tar_active()) tar_target(x, tar_active()) }) tar_manifest() # prints "Pipeline running? FALSE" tar_make() # prints "pipeline running? TRUE" tar_read(x) # TRUE }) }
These functions assert the correctness of user inputs
and generate custom error conditions as needed. Useful
for writing packages built on top of targets
.
tar_assert_chr(x, msg = NULL) tar_assert_dbl(x, msg = NULL) tar_assert_df(x, msg = NULL) tar_assert_equal_lengths(x, msg = NULL) tar_assert_envir(x, msg = NULL) tar_assert_expr(x, msg = NULL) tar_assert_flag(x, choices, msg = NULL) tar_assert_file(x) tar_assert_finite(x, msg = NULL) tar_assert_function(x, msg = NULL) tar_assert_function_arguments(x, args, msg = NULL) tar_assert_ge(x, threshold, msg = NULL) tar_assert_identical(x, y, msg = NULL) tar_assert_in(x, choices, msg = NULL) tar_assert_not_dirs(x, msg = NULL) tar_assert_not_dir(x, msg = NULL) tar_assert_not_in(x, choices, msg = NULL) tar_assert_inherits(x, class, msg = NULL) tar_assert_int(x, msg = NULL) tar_assert_internet(msg = NULL) tar_assert_lang(x, msg = NULL) tar_assert_le(x, threshold, msg = NULL) tar_assert_list(x, msg = NULL) tar_assert_lgl(x, msg = NULL) tar_assert_name(x) tar_assert_named(x, msg = NULL) tar_assert_names(x, msg = NULL) tar_assert_nonempty(x, msg = NULL) tar_assert_null(x, msg = NULL) tar_assert_not_expr(x, msg = NULL) tar_assert_nzchar(x, msg = NULL) tar_assert_package(package, msg = NULL) tar_assert_path(path, msg = NULL) tar_assert_match(x, pattern, msg = NULL) tar_assert_nonmissing(x, msg = NULL) tar_assert_positive(x, msg = NULL) tar_assert_scalar(x, msg = NULL) tar_assert_store(store) tar_assert_target(x, msg = NULL) tar_assert_target_list(x) tar_assert_true(x, msg = NULL) tar_assert_unique(x, msg = NULL) tar_assert_unique_targets(x)
tar_assert_chr(x, msg = NULL) tar_assert_dbl(x, msg = NULL) tar_assert_df(x, msg = NULL) tar_assert_equal_lengths(x, msg = NULL) tar_assert_envir(x, msg = NULL) tar_assert_expr(x, msg = NULL) tar_assert_flag(x, choices, msg = NULL) tar_assert_file(x) tar_assert_finite(x, msg = NULL) tar_assert_function(x, msg = NULL) tar_assert_function_arguments(x, args, msg = NULL) tar_assert_ge(x, threshold, msg = NULL) tar_assert_identical(x, y, msg = NULL) tar_assert_in(x, choices, msg = NULL) tar_assert_not_dirs(x, msg = NULL) tar_assert_not_dir(x, msg = NULL) tar_assert_not_in(x, choices, msg = NULL) tar_assert_inherits(x, class, msg = NULL) tar_assert_int(x, msg = NULL) tar_assert_internet(msg = NULL) tar_assert_lang(x, msg = NULL) tar_assert_le(x, threshold, msg = NULL) tar_assert_list(x, msg = NULL) tar_assert_lgl(x, msg = NULL) tar_assert_name(x) tar_assert_named(x, msg = NULL) tar_assert_names(x, msg = NULL) tar_assert_nonempty(x, msg = NULL) tar_assert_null(x, msg = NULL) tar_assert_not_expr(x, msg = NULL) tar_assert_nzchar(x, msg = NULL) tar_assert_package(package, msg = NULL) tar_assert_path(path, msg = NULL) tar_assert_match(x, pattern, msg = NULL) tar_assert_nonmissing(x, msg = NULL) tar_assert_positive(x, msg = NULL) tar_assert_scalar(x, msg = NULL) tar_assert_store(store) tar_assert_target(x, msg = NULL) tar_assert_target_list(x) tar_assert_true(x, msg = NULL) tar_assert_unique(x, msg = NULL) tar_assert_unique_targets(x)
x |
R object, input to be validated. The kind of object depends on the specific assertion function called. |
msg |
Character of length 1, a message to be printed to the console
if |
choices |
Character vector of choices of |
args |
Character vector of expected function argument names. Order matters. |
threshold |
Numeric of length 1, lower/upper bound for
assertions like |
y |
R object, value to compare against |
class |
Character vector of expected class names. |
package |
Character of length 1, name of an R package. |
path |
Character, file path. |
pattern |
Character of length 1, a |
store |
Character of length 1, path to the data store of the pipeline. |
Other utilities to extend targets:
tar_condition
,
tar_language
,
tar_test()
tar_assert_chr("123") try(tar_assert_chr(123))
tar_assert_chr("123") try(tar_assert_chr(123))
Superseded: configure exponential backoff while polling for tasks during the pipeline.
tar_backoff(min = 0.001, max = 0.1, rate = 1.5)
tar_backoff(min = 0.001, max = 0.1, rate = 1.5)
min |
Positive numeric of length 1,
minimum polling interval in seconds.
Must be at least |
max |
Positive numeric of length 1,
maximum polling interval in seconds.
Must be at least |
rate |
Positive numeric of length 1, greater than or equal to 1.
Multiplicative rate parameter that allows the exponential backoff
minimum polling interval to increase from |
This function is superseded and is now only relevant to other
superseded functions tar_make_clustermq()
and tar_make_future()
.
tar_make()
uses crew
in an efficient non-polling way, making
exponential backoff unnecessary.
In high-performance computing it can be expensive to repeatedly poll the
priority queue if no targets are ready to process. The number of seconds
between polls is runif(1, min, max(max, min * rate ^ index))
,
where index
is the number of consecutive polls so far that found
no targets ready to skip or run, and min
, max
, and rate
are arguments to tar_backoff()
.
(If no target is ready, index
goes up by 1. If a target is ready,
index
resets to 0. For more information on exponential,
backoff, visit https://en.wikipedia.org/wiki/Exponential_backoff).
Raising min
or max
is kinder to the CPU etc. but may incur delays
in some instances.
Other utilities:
tar_active()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_option_set(backoff = tar_backoff(min = 0.001, max = 0.1, rate = 1.5)) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_option_set(backoff = tar_backoff(min = 0.001, max = 0.1, rate = 1.5)) }) }
Get the integer indexes of individual branch names within their corresponding dynamic branching targets.
tar_branch_index(names, store = targets::tar_config_get("store"))
tar_branch_index(names, store = targets::tar_config_get("store"))
names |
Character vector of branch names. |
store |
Character of length 1, path to the
|
A named integer vector of branch indexes.
Other branching:
tar_branch_names()
,
tar_branches()
,
tar_pattern()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(4)), tar_target(y, 2 * x, pattern = map(x)), tar_target(z, y, pattern = map(y)) ) }, ask = FALSE) tar_make() names <- c( tar_meta(y, children)$children[[1]][c(2, 3)], tar_meta(z, children)$children[[1]][2] ) names tar_branch_index(names) # c(2, 3, 2) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(4)), tar_target(y, 2 * x, pattern = map(x)), tar_target(z, y, pattern = map(y)) ) }, ask = FALSE) tar_make() names <- c( tar_meta(y, children)$children[[1]][c(2, 3)], tar_meta(z, children)$children[[1]][2] ) names tar_branch_index(names) # c(2, 3, 2) }) }
Get the branch names of a dynamic branching target
using numeric indexes.
tar_branch_names()
expects an unevaluated symbol
for the name
argument, whereas tar_branch_names_raw()
expects a character string for name
.
tar_branch_names(name, index, store = targets::tar_config_get("store")) tar_branch_names_raw(name, index, store = targets::tar_config_get("store"))
tar_branch_names(name, index, store = targets::tar_config_get("store")) tar_branch_names_raw(name, index, store = targets::tar_config_get("store"))
name |
Name of the dynamic branching target.
|
index |
Integer vector of branch indexes. |
store |
Character string, directory path to the
|
A character vector of branch names.
Other branching:
tar_branch_index()
,
tar_branches()
,
tar_pattern()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(4)), tar_target(y, 2 * x, pattern = map(x)), tar_target(z, y, pattern = map(y)) ) }, ask = FALSE) tar_make() tar_branch_names(z, c(2, 3)) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(4)), tar_target(y, 2 * x, pattern = map(x)), tar_target(z, y, pattern = map(y)) ) }, ask = FALSE) tar_make() tar_branch_names(z, c(2, 3)) }) }
Given a branching pattern, use available metadata to reconstruct branch names and the names of each branch's dependencies. The metadata of each target must already exist and be consistent with the metadata of the other targets involved.
tar_branches( name, pattern = NULL, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_branches( name, pattern = NULL, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
name |
Symbol, name of the target. |
pattern |
Language to define branching for a target
(just like in |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
The results from this function can help you retroactively figure out correspondences between upstream branches and downstream branches. However, it does not always correctly predict what the names of the branches will be after the next run of the pipeline. Dynamic branching happens while the pipeline is running, so we cannot always know what the names of the branches will be in advance (or even how many there will be).
A tibble
with one row per branch and one column for each target
(including the branched-over targets and the target with the pattern.)
Other branching:
tar_branch_index()
,
tar_branch_names()
,
tar_pattern()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, head(letters, 2)), tar_target(z, head(LETTERS, 2)), tar_target(dynamic, c(x, y, z), pattern = cross(z, map(x, y))) ) }, ask = FALSE) tar_make() tar_branches(dynamic) tar_branches(dynamic, pattern = cross(z, map(x, y))) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, head(letters, 2)), tar_target(z, head(LETTERS, 2)), tar_target(dynamic, c(x, y, z), pattern = cross(z, map(x, y))) ) }, ask = FALSE) tar_make() tar_branches(dynamic) tar_branches(dynamic, pattern = cross(z, map(x, y))) }) }
targets
function.Get the name of the currently running targets
interface function. Returns NULL
if not invoked inside
a target or _targets.R
(i.e. if not directly invoked
by tar_make()
, tar_visnetwork()
, etc.).
tar_call()
tar_call()
Character of length 1, name of the currently running targets
interface function. For example, suppose you have a call to
tar_call()
inside a target or _targets.R
. Then if you run
tar_make()
, tar_call()
will return "tar_make"
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_call() # NULL tar_script({ library(targets) library(tarchetypes) message("called function: ", tar_call()) tar_target(x, tar_call()) }) tar_manifest() # prints "called function: tar_manifest" tar_make() # prints "called function: tar_make" tar_read(x) # "tar_make" }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_call() # NULL tar_script({ library(targets) library(tarchetypes) message("called function: ", tar_call()) tar_target(x, tar_call()) }) tar_manifest() # prints "called function: tar_manifest" tar_make() # prints "called function: tar_make" tar_read(x) # "tar_make" }) }
Cancel a target while its command is running if a condition is met.
tar_cancel(condition = TRUE)
tar_cancel(condition = TRUE)
condition |
Logical of length 1, whether to cancel the target. |
Must be invoked by the target itself. tar_cancel()
cannot interrupt a target from another process.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_cancel(1 > 0))) tar_make() # Should cancel target x. }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_cancel(1 > 0))) tar_make() # Should cancel target x. }) }
List targets whose progress is "canceled"
.
tar_canceled(names = NULL, store = targets::tar_config_get("store"))
tar_canceled(names = NULL, store = targets::tar_config_get("store"))
names |
Optional, names of the targets. If supplied, the
output is restricted to the selected targets.
The object supplied to |
store |
Character of length 1, path to the
|
A character vector of canceled targets.
Other progress:
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_canceled() tar_canceled(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_canceled() tar_canceled(starts_with("y_")) # see also any_of() }) }
List targets whose progress is "completed"
.
tar_completed(names = NULL, store = targets::tar_config_get("store"))
tar_completed(names = NULL, store = targets::tar_config_get("store"))
names |
Optional, names of the targets. If supplied, the
output is restricted to the selected targets.
The object supplied to |
store |
Character of length 1, path to the
|
A character vector of completed targets.
Other progress:
tar_canceled()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_completed() tar_completed(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_completed() tar_completed(starts_with("y_")) # see also any_of() }) }
These functions throw custom
targets
-specific error conditions.
Useful for error handling in packages built on top of targets
.
tar_message_run(...) tar_throw_file(...) tar_throw_run(..., class = character(0)) tar_throw_validate(...) tar_warn_deprecate(...) tar_warn_run(...) tar_warn_validate(...) tar_message_validate(...) tar_print(...) tar_error(message, class) tar_warning(message, class) tar_message(message, class)
tar_message_run(...) tar_throw_file(...) tar_throw_run(..., class = character(0)) tar_throw_validate(...) tar_warn_deprecate(...) tar_warn_run(...) tar_warn_validate(...) tar_message_validate(...) tar_print(...) tar_error(message, class) tar_warning(message, class) tar_message(message, class)
... |
zero or more objects which can be coerced to character (and which are pasted together with no separator) or a single condition object. |
class |
Character vector of S3 classes of the message. |
message |
Character of length 1, text of the message. |
Other utilities to extend targets:
tar_assert
,
tar_language
,
tar_test()
try(tar_throw_validate("something is not valid"))
try(tar_throw_validate("something is not valid"))
Read the custom settings for the current project in the optional YAML configuration file.
tar_config_get( name, config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
tar_config_get( name, config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
name |
Character of length 1, name of the specific configuration setting to retrieve. |
config |
Character of length 1, file path of the YAML
configuration file with |
project |
Character of length 1, name of the current
|
The value of the configuration setting from
the YAML configuration file (default: _targets.yaml
)
or the default value if the setting is not available.
The data type of the return value depends on your choice
of name
.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
For several key functions like tar_make()
, the
default values of arguments are controlled though
tar_config_get()
. tar_config_get()
retrieves data
from an optional YAML configuration file.
You can control the settings in the YAML
file programmatically with tar_config_set()
.
The default file path of this YAML file is _targets.yaml
, and you can
set another path globally using the TAR_CONFIG
environment variable. The YAML file can store configuration
settings for multiple projects, and you can globally
set the default project with the TAR_PROJECT
environment
variable.
The structure of the YAML file
follows rules similar to the config
R package, e.g.
projects can inherit settings from one another using the inherits
field.
Exceptions include:
There is no requirement to have a configuration named "default"
.
Other projects do not inherit from the default project' automatically.
Not all fields need values because targets
already has defaults.
targets
does not actually invoke
the config
package. The implementation in targets
was written from scratch without viewing or copying any
part of the source code of config
.
Other configuration:
tar_config_projects()
,
tar_config_set()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_get()
,
tar_option_reset()
,
tar_option_set()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_config_get("store") # "_targets" store_path <- tempfile() tar_config_set(store = store_path) tar_config_get("store") # Shows a temp file. tar_make() # Writes to the custom data store identified in _targets.yaml. tar_read(x) # tar_read() knows about _targets.yaml too. file.exists("_targets") # FALSE file.exists(store_path) # TRUE }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_config_get("store") # "_targets" store_path <- tempfile() tar_config_set(store = store_path) tar_config_get("store") # Shows a temp file. tar_make() # Writes to the custom data store identified in _targets.yaml. tar_read(x) # tar_read() knows about _targets.yaml too. file.exists("_targets") # FALSE file.exists(store_path) # TRUE }) }
List the names of projects defined in _targets.yaml
.
tar_config_projects(config = Sys.getenv("TAR_CONFIG", "_targets.yaml"))
tar_config_projects(config = Sys.getenv("TAR_CONFIG", "_targets.yaml"))
config |
Character of length 1, file path of the YAML
configuration file with |
Character vector of names of projects defined in _targets.yaml
.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
For several key functions like tar_make()
, the
default values of arguments are controlled though
tar_config_get()
. tar_config_get()
retrieves data
from an optional YAML configuration file.
You can control the settings in the YAML
file programmatically with tar_config_set()
.
The default file path of this YAML file is _targets.yaml
, and you can
set another path globally using the TAR_CONFIG
environment variable. The YAML file can store configuration
settings for multiple projects, and you can globally
set the default project with the TAR_PROJECT
environment
variable.
The structure of the YAML file
follows rules similar to the config
R package, e.g.
projects can inherit settings from one another using the inherits
field.
Exceptions include:
There is no requirement to have a configuration named "default"
.
Other projects do not inherit from the default project' automatically.
Not all fields need values because targets
already has defaults.
targets
does not actually invoke
the config
package. The implementation in targets
was written from scratch without viewing or copying any
part of the source code of config
.
Other configuration:
tar_config_get()
,
tar_config_set()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_get()
,
tar_option_reset()
,
tar_option_set()
yaml <- tempfile() tar_config_set(store = "my_store_a", config = yaml, project = "project_a") tar_config_set(store = "my_store_b", config = yaml, project = "project_b") tar_config_projects(config = yaml)
yaml <- tempfile() tar_config_set(store = "my_store_a", config = yaml, project = "project_a") tar_config_set(store = "my_store_b", config = yaml, project = "project_b") tar_config_projects(config = yaml)
tar_config_set()
writes special custom settings
for the current project to an optional YAML configuration file.
tar_config_set( inherits = NULL, as_job = NULL, garbage_collection = NULL, label = NULL, label_width = NULL, level_separation = NULL, reporter_make = NULL, reporter_outdated = NULL, script = NULL, seconds_meta_append = NULL, seconds_meta_upload = NULL, seconds_reporter = NULL, seconds_interval = NULL, store = NULL, shortcut = NULL, use_crew = NULL, workers = NULL, config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
tar_config_set( inherits = NULL, as_job = NULL, garbage_collection = NULL, label = NULL, label_width = NULL, level_separation = NULL, reporter_make = NULL, reporter_outdated = NULL, script = NULL, seconds_meta_append = NULL, seconds_meta_upload = NULL, seconds_reporter = NULL, seconds_interval = NULL, store = NULL, shortcut = NULL, use_crew = NULL, workers = NULL, config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
inherits |
Character of length 1, name of the project from which
the current project should inherit configuration settings.
The current project is the |
as_job |
Logical of length 1, |
garbage_collection |
Deprecated. Use the |
label |
Argument of |
label_width |
Argument of |
level_separation |
Argument of |
reporter_make |
Character of length 1, |
reporter_outdated |
Character of length 1, |
script |
Character of length 1, path to the target script file
that defines the pipeline ( |
seconds_meta_append |
Argument of |
seconds_meta_upload |
Argument of |
seconds_reporter |
Argument of |
seconds_interval |
Deprecated on 2023-08-24 (version 1.2.2.9001).
Use |
store |
Character of length 1, path to the data store of the pipeline.
If |
shortcut |
logical of length 1, default |
use_crew |
Logical of length 1, whether to use |
workers |
Positive numeric of length 1, |
config |
Character of length 1, file path of the YAML
configuration file with |
project |
Character of length 1, name of the current
|
NULL
(invisibly)
For several key functions like tar_make()
, the
default values of arguments are controlled though
tar_config_get()
. tar_config_get()
retrieves data
from an optional YAML configuration file.
You can control the settings in the YAML
file programmatically with tar_config_set()
.
The default file path of this YAML file is _targets.yaml
, and you can
set another path globally using the TAR_CONFIG
environment variable. The YAML file can store configuration
settings for multiple projects, and you can globally
set the default project with the TAR_PROJECT
environment
variable.
The structure of the YAML file
follows rules similar to the config
R package, e.g.
projects can inherit settings from one another using the inherits
field.
Exceptions include:
There is no requirement to have a configuration named "default"
.
Other projects do not inherit from the default project' automatically.
Not all fields need values because targets
already has defaults.
targets
does not actually invoke
the config
package. The implementation in targets
was written from scratch without viewing or copying any
part of the source code of config
.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_get()
,
tar_option_reset()
,
tar_option_set()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_config_get("store") # NULL (data store defaults to "_targets/") store_path <- tempfile() tar_config_set(store = store_path) tar_config_get("store") # Shows a temp file. tar_make() # Writes to the custom data store identified in _targets.yaml. tar_read(x) # tar_read() knows about _targets.yaml too. file.exists("_targets") # FALSE file.exists(store_path) # TRUE }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_config_get("store") # NULL (data store defaults to "_targets/") store_path <- tempfile() tar_config_set(store = store_path) tar_config_get("store") # Shows a temp file. tar_make() # Writes to the custom data store identified in _targets.yaml. tar_read(x) # tar_read() knows about _targets.yaml too. file.exists("_targets") # FALSE file.exists(store_path) # TRUE }) }
Unset (i.e. delete) one or more
custom settings for the current project
from the optional YAML configuration file.
After that, tar_option_get()
will return the original
default values for those settings for the project.
tar_config_unset( names = character(0), config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
tar_config_unset( names = character(0), config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
names |
Character vector of configuration settings to delete from the current project. |
config |
Character of length 1, file path of the YAML
configuration file with |
project |
Character of length 1, name of the current
|
NULL
(invisibly)
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
For several key functions like tar_make()
, the
default values of arguments are controlled though
tar_config_get()
. tar_config_get()
retrieves data
from an optional YAML configuration file.
You can control the settings in the YAML
file programmatically with tar_config_set()
.
The default file path of this YAML file is _targets.yaml
, and you can
set another path globally using the TAR_CONFIG
environment variable. The YAML file can store configuration
settings for multiple projects, and you can globally
set the default project with the TAR_PROJECT
environment
variable.
The structure of the YAML file
follows rules similar to the config
R package, e.g.
projects can inherit settings from one another using the inherits
field.
Exceptions include:
There is no requirement to have a configuration named "default"
.
Other projects do not inherit from the default project' automatically.
Not all fields need values because targets
already has defaults.
targets
does not actually invoke
the config
package. The implementation in targets
was written from scratch without viewing or copying any
part of the source code of config
.
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_set()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_get()
,
tar_option_reset()
,
tar_option_set()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_config_get("store") # "_targets" store_path <- tempfile() tar_config_set(store = store_path) tar_config_get("store") # Shows a temp file. tar_config_unset("store") tar_config_get("store") # _targets }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_config_get("store") # "_targets" store_path <- tempfile() tar_config_set(store = store_path) tar_config_get("store") # Shows a temp file. tar_config_unset("store") tar_config_get("store") # _targets }) }
_targets.yaml
.Read the YAML content of _targets.yaml
.
tar_config_yaml(config = Sys.getenv("TAR_CONFIG", "_targets.yaml"))
tar_config_yaml(config = Sys.getenv("TAR_CONFIG", "_targets.yaml"))
config |
Character of length 1, file path of the YAML
configuration file with |
Nested list of fields defined in _targets.yaml
.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
For several key functions like tar_make()
, the
default values of arguments are controlled though
tar_config_get()
. tar_config_get()
retrieves data
from an optional YAML configuration file.
You can control the settings in the YAML
file programmatically with tar_config_set()
.
The default file path of this YAML file is _targets.yaml
, and you can
set another path globally using the TAR_CONFIG
environment variable. The YAML file can store configuration
settings for multiple projects, and you can globally
set the default project with the TAR_PROJECT
environment
variable.
The structure of the YAML file
follows rules similar to the config
R package, e.g.
projects can inherit settings from one another using the inherits
field.
Exceptions include:
There is no requirement to have a configuration named "default"
.
Other projects do not inherit from the default project' automatically.
Not all fields need values because targets
already has defaults.
targets
does not actually invoke
the config
package. The implementation in targets
was written from scratch without viewing or copying any
part of the source code of config
.
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_set()
,
tar_config_unset()
,
tar_envvars()
,
tar_option_get()
,
tar_option_reset()
,
tar_option_set()
yaml <- tempfile() tar_config_set(store = "my_store_a", config = yaml, project = "project_a") tar_config_set(store = "my_store_b", config = yaml, project = "project_b") str(tar_config_yaml(config = yaml))
yaml <- tempfile() tar_config_set(store = "my_store_a", config = yaml, project = "project_a") tar_config_set(store = "my_store_b", config = yaml, project = "project_b") str(tar_config_yaml(config = yaml))
For the most recent run of the pipeline with tar_make()
where a crew
controller was started, get summary-level information
of the workers.
tar_crew(store = targets::tar_config_get("store"))
tar_crew(store = targets::tar_config_get("store"))
store |
Character of length 1, path to the
|
A data frame one row per crew
worker and the following columns:
controller
: name of the crew
controller.
launches
: number of times the worker was launched.
seconds
: number of seconds the worker spent running tasks.
targets
: number of targets the worker completed and delivered.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other data:
tar_pid()
,
tar_process()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. if (requireNamespace("crew", quietly = TRUE)) { tar_script({ library(targets) library(tarchetypes) tar_option_set(controller = crew::crew_controller_local()) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_process() tar_process(pid) } }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. if (requireNamespace("crew", quietly = TRUE)) { tar_script({ library(targets) library(tarchetypes) tar_option_set(controller = crew::crew_controller_local()) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_process() tar_process(pid) } }) }
Declare the rules that mark a target as outdated.
tar_cue( mode = c("thorough", "always", "never"), command = TRUE, depend = TRUE, format = TRUE, repository = TRUE, iteration = TRUE, file = TRUE, seed = TRUE )
tar_cue( mode = c("thorough", "always", "never"), command = TRUE, depend = TRUE, format = TRUE, repository = TRUE, iteration = TRUE, file = TRUE, seed = TRUE )
mode |
Cue mode. If |
command |
Logical, whether to rerun the target if command changed since last time. |
depend |
Logical, whether to rerun the target if the value of one of the dependencies changed. |
format |
Logical, whether to rerun the target if the user-specified
storage format changed. The storage format is user-specified through
|
repository |
Logical, whether to rerun the target if the user-specified
storage repository changed. The storage repository is user-specified
through |
iteration |
Logical, whether to rerun the target if the user-specified
iteration method changed. The iteration method is user-specified through
|
file |
Logical, whether to rerun the target if the file(s) with the return value changed or at least one is missing. |
seed |
Logical, whether to rerun the target if pseudo-random
number generator seed either changed or is |
targets
uses internal metadata and special cues
to decide whether a target is up to date (can skip)
or is outdated/invalidated (needs to rerun). By default,
targets
moves through the following list of cues
and declares a target outdated if at least one is cue activated.
There is no metadata record of the target.
The target errored last run.
The target has a different class than it did before.
The cue mode equals "always"
.
The cue mode does not equal "never"
.
The command
metadata field (the hash of the R command)
is different from last time.
The depend
metadata field (the hash of the immediate upstream
dependency targets and global objects) is different from last time.
The storage format is different from last time.
The iteration mode is different from last time.
A target's file (either the one in _targets/objects/
or a dynamic file) does not exist or changed since last time.
The user can suppress many of the above cues using the tar_cue()
function, which creates the cue
argument of tar_target()
.
Cues objects also constitute more nuanced target invalidation rules.
The tarchetypes
package has many such examples, including
tar_age()
, tar_download()
, tar_cue_age()
, tar_cue_force()
,
and tar_cue_skip()
.
If the cue of a target has depend = TRUE
(default) then the target
is marked invalidated/outdated when its upstream dependencies change.
A target's dependencies include upstream targets,
user-defined functions, and other global objects populated
in the target script file (default: _targets.R
).
To determine if a given dependency changed
since the last run of the pipeline, targets
computes hashes.
The hash of a target is computed on its files in storage
(usually a file in _targets/objects/
). The hash of a
non-function global object dependency is computed directly on its
in-memory data. User-defined functions are hashed in the following way:
Deparse the function with targets:::tar_deparse_safe()
. This
function computes a string representation of the function
body and arguments. This string representation is invariant to
changes in comments and whitespace, which means
trivial changes to formatting do not cue targets to rerun.
Manually remove any literal pointers from the function string
using targets:::mask_pointers()
. Such pointers arise from
inline compiled C/C++ functions.
Using static code analysis (i.e. tar_deps()
, which is based on
codetools::findGlobals()
) identify any user-defined functions
and global objects that the current function depends on.
Append the hashes of those dependencies to the string representation
of the current function.
Compute the hash of the final string representation using
targets:::hash_object()
.
Above, (3) is important because user-defined functions
have dependencies of their own, such as other user-defined
functions and other global objects. (3) ensures that a change to
a function's dependencies invalidates the function itself, which
in turn invalidates any calling functions and any targets downstream
with the depend
cue turned on.
Other targets:
tar_target()
# The following target will always run when the pipeline runs. x <- tar_target(x, download_data(), cue = tar_cue(mode = "always"))
# The following target will always run when the pipeline runs. x <- tar_target(x, download_data(), cue = tar_cue(mode = "always"))
For developers only: get the full definition of the
target currently running. This target definition is the same kind
of object produced by tar_target()
.
tar_definition( default = targets::tar_target_raw("target_name", quote(identity())) )
tar_definition( default = targets::tar_target_raw("target_name", quote(identity())) )
default |
Environment, value to return if |
Most users should not use tar_definition()
because accidental
modifications could break the pipeline.
tar_definition()
only exists in order to support third-party interface
packages, and even then the returned target definition is not modified..
If called from a running target, tar_definition()
returns
the target object of the currently running target.
See the "Target objects" section for details.
Functions like tar_target()
produce target objects,
special objects with specialized sets of S3 classes.
Target objects represent skippable steps of the analysis pipeline
as described at https://books.ropensci.org/targets/.
Please read the walkthrough at
https://books.ropensci.org/targets/walkthrough.html
to understand the role of target objects in analysis pipelines.
For developers, https://wlandau.github.io/targetopia/contributing.html#target-factories explains target factories (functions like this one which generate targets) and the design specification at https://books.ropensci.org/targets-design/ details the structure and composition of target objects.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
class(tar_definition()) tar_definition()$name if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script( tar_target(x, tar_definition()$settings$memory, memory = "transient") ) tar_make(x) tar_read(x) }) }
class(tar_definition()) tar_definition()$name if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script( tar_target(x, tar_definition()$settings$memory, memory = "transient") ) tar_make(x) tar_read(x) }) }
Delete the output values of targets in _targets/objects/
(or the cloud if applicable)
but keep the records in the metadata.
tar_delete( names, cloud = TRUE, batch_size = 1000L, verbose = TRUE, store = targets::tar_config_get("store") )
tar_delete( names, cloud = TRUE, batch_size = 1000L, verbose = TRUE, store = targets::tar_config_get("store") )
names |
Optional, names of the targets to delete. If supplied, the
|
cloud |
Logical of length 1, whether to delete objects
from the cloud if applicable (e.g. AWS, GCP). If |
batch_size |
Positive integer between 1 and 1000, number of target objects to delete from the cloud with each HTTP API request. Currently only supported for AWS. Cannot be more than 1000. |
verbose |
Logical of length 1, whether to print console messages to show progress when deleting each batch of targets from each cloud bucket. Batched deletion with verbosity is currently only supported for AWS. |
store |
Character of length 1, path to the
|
If you have a small number of data-heavy targets you
need to discard to conserve storage, this function can help.
Local external files files (i.e. format = "file"
and repository = "local"
) are not deleted.
For targets with repository
not equal "local"
, tar_delete()
attempts
to delete the file and errors out if the deletion is unsuccessful.
If deletion fails, either log into the cloud platform
and manually delete the file (e.g. the AWS web console
in the case of repository = "aws"
) or call
tar_invalidate()
on that target so that targets
does not try to delete the object.
For patterns recorded in the metadata, all the branches
will be deleted. For patterns no longer in the metadata,
branches are left alone.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Some buckets in Amazon S3 or Google Cloud Storage are "versioned",
which means they track historical versions of each data object.
If you use targets
with cloud storage
(https://books.ropensci.org/targets/cloud-storage.html)
and versioning is turned on, then targets
will record each
version of each target in its metadata.
Functions like tar_read()
and tar_load()
load the version recorded in the local metadata,
which may not be the same as the "current" version of the
object in the bucket. Likewise, functions tar_delete()
and tar_destroy()
only remove
the version ID of each target as recorded in the local
metadata.
If you want to interact with the latest version of an object instead of the version ID recorded in the local metadata, then you will need to delete the object from the metadata.
Make sure your local copy of the metadata is current and
up to date. You may need to run tar_meta_download()
or
tar_meta_sync()
first.
Run tar_unversion()
to remove the recorded version IDs of
your targets in the local metadata.
With the version IDs gone from the local metadata,
functions like tar_read()
and tar_destroy()
will use the
latest version of each target data object.
Optional: to back up the local metadata file with the version IDs
deleted, use tar_meta_upload()
.
Other clean:
tar_destroy()
,
tar_invalidate()
,
tar_prune()
,
tar_prune_list()
,
tar_unversion()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() tar_delete(starts_with("y")) # Only deletes y1 and y2. tar_make() # y1 and y2 rerun but return the same values, so z is up to date. }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() tar_delete(starts_with("y")) # Only deletes y1 and y2. tar_make() # y1 and y2 rerun but return the same values, so z is up to date. }) }
List the dependencies of a function or expression.
tar_deps()
expects the expr
argument to be an unevaluated
expression,
whereas tar_deps_raw()
expects expr
to be an evaluated
expression object. Functions can be passed normally in either case.
tar_deps(expr) tar_deps_raw(expr)
tar_deps(expr) tar_deps_raw(expr)
expr |
An R expression or function.
|
targets
detects the dependencies of commands using
static code analysis. Use tar_deps()
to run the
code analysis and see the dependencies for yourself.
Character vector of the dependencies of a function or expression.
Other inspect:
tar_manifest()
,
tar_network()
,
tar_outdated()
,
tar_sitrep()
,
tar_validate()
tar_deps(x <- y + z) tar_deps(quote(x <- y + z)) tar_deps({ x <- 1 x + a }) tar_deps(function(a = b) map_dfr(data, ~do_row(.x))) tar_deps_raw(function(a = b) map_dfr(data, ~do_row(.x)))
tar_deps(x <- y + z) tar_deps(quote(x <- y + z)) tar_deps({ x <- 1 x + a }) tar_deps(function(a = b) map_dfr(data, ~do_row(.x))) tar_deps_raw(function(a = b) map_dfr(data, ~do_row(.x)))
Select a subset of targets in the _targets.R
file
based on their custom descriptions.
tar_described_as( described_as = NULL, tidyselect = TRUE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script") )
tar_described_as( described_as = NULL, tidyselect = TRUE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script") )
described_as |
A |
tidyselect |
If |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
Targets with empty descriptions are ignored.
If tidyselect
is TRUE
, then tar_described_as()
returns
a call to tidyselect::all_of()
which can be supplied to the names
argument of functions like tar_manifest()
and tar_make()
.
This allows functions like tar_manifest()
and tar_make()
to focus on only the targets with the matching descriptions.
If tidyselect
is FALSE
, then tar_described_as()
returns
a simple character vector of the names of all the targets in the
pipeline with matching descriptions.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(b2, TRUE, description = "blue two"), tar_target(b3, TRUE, description = "blue three"), tar_target(g2, TRUE, description = "green two"), tar_target(g3, TRUE, description = "green three"), tar_target(g4, TRUE, description = "green three") ) }, ask = FALSE) tar_described_as(starts_with("green"), tidyselect = FALSE) tar_make(names = tar_described_as(starts_with("green"))) tar_progress() # Only `g2`, `g3`, and `g4` ran. }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(b2, TRUE, description = "blue two"), tar_target(b3, TRUE, description = "blue three"), tar_target(g2, TRUE, description = "green two"), tar_target(g3, TRUE, description = "green three"), tar_target(g4, TRUE, description = "green three") ) }, ask = FALSE) tar_described_as(starts_with("green"), tidyselect = FALSE) tar_make(names = tar_described_as(starts_with("green"))) tar_progress() # Only `g2`, `g3`, and `g4` ran. }) }
Destroy the data store written by the pipeline.
tar_destroy( destroy = c("all", "cloud", "local", "meta", "process", "progress", "objects", "scratch", "workspaces", "user"), batch_size = 1000L, verbose = TRUE, ask = NULL, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_destroy( destroy = c("all", "cloud", "local", "meta", "process", "progress", "objects", "scratch", "workspaces", "user"), batch_size = 1000L, verbose = TRUE, ask = NULL, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
destroy |
Character of length 1, what to destroy. Choices:
|
batch_size |
Positive integer between 1 and 1000, number of target objects to delete from the cloud with each HTTP API request. Currently only supported for AWS. Cannot be more than 1000. |
verbose |
Logical of length 1, whether to print console messages to show progress when deleting each batch of targets from each cloud bucket. Batched deletion with verbosity is currently only supported for AWS. |
ask |
Logical of length 1, whether to pause with a menu prompt
before deleting files. To disable this menu, set the |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
The data store is a folder created by tar_make()
(or tar_make_future()
or tar_make_clustermq()
).
The details of the data store are explained at
https://books.ropensci.org/targets/data.html#local-data-store.
The data store folder contains the output data
and metadata of the targets in the pipeline. Usually,
the data store is a folder called _targets/
(see tar_config_set()
to customize), and it may
link to data on the cloud if you used AWS or GCP
buckets. By default, tar_destroy()
deletes the entire
_targets/
folder (or wherever the data store is located),
including custom user-supplied files in _targets/user/
,
as well as any cloud data that the pipeline uploaded.
See the destroy
argument to customize this behavior
and only delete part of the data store, and see functions like
tar_invalidate()
, tar_delete()
, and tar_prune()
to remove
information pertaining to some but not all targets in the pipeline.
After calling tar_destroy()
with default arguments,
the entire data store is gone, which means all the output data from
previous runs of the pipeline is gone (except for
input/output files tracked with tar_target(..., format = "file")
).
The next run of the pipeline will start from scratch,
and it will not skip any targets.
NULL
(invisibly).
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Some buckets in Amazon S3 or Google Cloud Storage are "versioned",
which means they track historical versions of each data object.
If you use targets
with cloud storage
(https://books.ropensci.org/targets/cloud-storage.html)
and versioning is turned on, then targets
will record each
version of each target in its metadata.
Functions like tar_read()
and tar_load()
load the version recorded in the local metadata,
which may not be the same as the "current" version of the
object in the bucket. Likewise, functions tar_delete()
and tar_destroy()
only remove
the version ID of each target as recorded in the local
metadata.
If you want to interact with the latest version of an object instead of the version ID recorded in the local metadata, then you will need to delete the object from the metadata.
Make sure your local copy of the metadata is current and
up to date. You may need to run tar_meta_download()
or
tar_meta_sync()
first.
Run tar_unversion()
to remove the recorded version IDs of
your targets in the local metadata.
With the version IDs gone from the local metadata,
functions like tar_read()
and tar_destroy()
will use the
latest version of each target data object.
Optional: to back up the local metadata file with the version IDs
deleted, use tar_meta_upload()
.
Other clean:
tar_delete()
,
tar_invalidate()
,
tar_prune()
,
tar_prune_list()
,
tar_unversion()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, 1 + 1)) }) tar_make() # Creates the _targets/ data store. tar_destroy() print(file.exists("_targets")) # Should be FALSE. }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, 1 + 1)) }) tar_make() # Creates the _targets/ data store. tar_destroy() print(file.exists("_targets")) # Should be FALSE. }) }
List the targets with progress status "dispatched"
.
tar_dispatched(names = NULL, store = targets::tar_config_get("store"))
tar_dispatched(names = NULL, store = targets::tar_config_get("store"))
names |
Optional, names of the targets. If supplied, the
function restricts its output to these targets.
You can supply symbols
or |
store |
Character of length 1, path to the
|
A target is "dispatched"
if it is sent off to be run. Depending
on your high-performance computing configuration via the crew
package,
the may not actually start right away. This may happen if the target
is ready to start but all available parallel workers are busy.
A character vector of dispatched targets.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_dispatched() tar_dispatched(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_dispatched() tar_dispatched(starts_with("y_")) # see also any_of() }) }
Open the target script file for editing.
Requires the usethis
package.
tar_edit(script = targets::tar_config_get("script"))
tar_edit(script = targets::tar_config_get("script"))
script |
Character of length 1, path to the
target script file. Defaults to |
The target script file is an R code file
that defines the pipeline. The default path is _targets.R
,
but the default for the current project
can be configured with tar_config_set()
Other scripts:
tar_github_actions()
,
tar_helper()
,
tar_renv()
,
tar_script()
knitr
engineknitr
language engine that runs targets
code chunks in Target Markdown.
tar_engine_knitr(options)
tar_engine_knitr(options)
options |
A named list of |
Character, output generated from knitr::engine_output()
.
Target Markdown has two modes:
Non-interactive mode. This is the default when you
run knitr::knit()
or rmarkdown::render()
.
Here, the code in targets
code chunks gets written
to special script files in order to set up a targets
pipeline to run later.
Interactive mode: here, no scripts are written to set up a pipeline. Rather, the globals or targets in question are run in the current environment and the values are assigned to that environment.
The mode is interactive if !isTRUE(getOption("knitr.in.progress"))
,
is TRUE
. The knitr.in.progress
option is TRUE
when you run knitr::knit()
or rmarkdown::render()
and NULL
if you are running one chunk at a time interactively
in an integrated development environment, e.g. the
notebook interface in RStudio:
https://bookdown.org/yihui/rmarkdown/notebook.html.
You can choose the mode with the tar_interactive
chunk option.
(In targets
0.6.0, tar_interactive
defaults to interactive()
instead of !isTRUE(getOption("knitr.in.progress"))
.)
Target Markdown introduces the following knitr
code chunk options.
Most other standard knitr
code chunk options should just work
in non-interactive mode. In interactive mode, not all
tar_globals
: Logical of length 1,
whether to define globals or targets.
If TRUE
, the chunk code defines functions, objects, and options
common to all the targets. If FALSE
or NULL
(default),
then the chunk returns formal targets for the pipeline.
tar_interactive
: Logical of length 1, whether to run in
interactive mode or non-interactive mode.
See the "Target Markdown interactive mode" section of this
help file for details.
tar_name
: name to use for writing helper script files
(e.g. _targets_r/targets/target_script.R
)
and specifying target names if the tar_simple
chunk option
is TRUE
. All helper scripts and target names must have
unique names, so please do not set this option globally
with knitr::opts_chunk$set()
.
tar_script
: Character of length 1, where to write the
target script file in non-interactive mode. Most users can
skip this option and stick with the default _targets.R
script path.
Helper script files are always written next to the target script in
a folder with an "_r"
suffix. The tar_script
path must either be
absolute or be relative to the project root
(where you call tar_make()
or similar).
If not specified, the target script path defaults to
tar_config_get("script")
(default: _targets.R
;
helpers default: _targets_r/
). When you run tar_make()
etc.
with a non-default target script, you must select the correct target
script file either with the script
argument or with
tar_config_set(script = ...)
. The function will source()
the script file from the current working directory
(i.e. with chdir = FALSE
in source()
).
tar_simple
: Logical of length 1.
Set to TRUE
to define a single target with a simplified interface.
In code chunks with tar_simple
equal to TRUE
, the chunk label
(or the tar_name
chunk option if you set it)
becomes the name, and the chunk code becomes the command.
In other words, a code chunk with label targetname
and
command mycommand()
automatically gets converted to
tar_target(name = targetname, command = mycommand())
.
All other arguments of tar_target()
remain at their default
values (configurable with tar_option_set()
in a
tar_globals = TRUE
chunk).
https://books.ropensci.org/targets/literate-programming.html
Other Target Markdown:
tar_interactive()
,
tar_noninteractive()
,
tar_toggle()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # Register the engine. if (requireNamespace("knitr", quietly = TRUE)) { knitr::knit_engines$set(targets = targets::tar_engine_knitr) } # Then, `targets` code chunks in a knitr report will run # as described at # <https://books.ropensci.org/targets/literate-programming.html>. }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # Register the engine. if (requireNamespace("knitr", quietly = TRUE)) { knitr::knit_engines$set(targets = targets::tar_engine_knitr) } # Then, `targets` code chunks in a knitr report will run # as described at # <https://books.ropensci.org/targets/literate-programming.html>. }
For developers only: get the environment where a
target runs its command. Designed to be called
while the target is running. The environment
inherits from tar_option_get("envir")
.
tar_envir(default = parent.frame())
tar_envir(default = parent.frame())
default |
Environment, value to return if |
Most users should not use tar_envir()
because accidental
modifications to parent.env(tar_envir())
could break the pipeline.
tar_envir()
only exists in order to support third-party interface
packages, and even then the returned environment is not modified.
If called from a running target, tar_envir()
returns
the environment where the target runs its command.
If called outside a pipeline, the return value is
whatever the user supplies to default
(which defaults to parent.frame()
).
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
tar_envir() tar_envir(default = new.env(parent = emptyenv())) if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_envir(default = parent.frame()))) tar_make(x) tar_read(x) }) }
tar_envir() tar_envir(default = new.env(parent = emptyenv())) if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_envir(default = parent.frame()))) tar_make(x) tar_read(x) }) }
targets
environment variables.Show all the special environment variables
available for customizing targets
.
tar_envvars(unset = "")
tar_envvars(unset = "")
unset |
Character of length 1, value to return for any environment variable that is not set. |
You can customize the behavior of targets
with special environment variables. The sections in this help file
describe each environment variable, and the tar_envvars()
function
lists their current values.
If you modify environment variables, please set them
in project-level .Renviron
file so you do not lose your
configuration when you restart your R session.
Modify the project-level .Renviron
file with
usethis::edit_r_environ(scope = "project")
. Restart
your R session after you are done editing.
For targets that run on parallel workers
created by tar_make_clustermq()
or tar_make_future()
,
only the environment variables listed by tar_envvars()
are specifically exported to the targets.
For all other environment variables, you will have to set
the values manually, e.g. a project-level .Renviron
file
(for workers that have access to the local file system).
A data frame with one row per environment variable
and columns with the name and current value of each.
An unset environment variable will have a value of ""
by default. (Customize with the unset
argument).
The TAR_ASK
environment variable accepts values "true"
and "false"
.
If TAR_ASK
is not set, or if it is set to "true"
,
then targets
asks permission in a menu
before overwriting certain files, such as the target script file
(default: _targets.R
) in tar_script()
.
If TAR_ASK
is "false"
, then targets
overwrites the old files
with the new ones without asking. Once you are comfortable with
tar_script()
, tar_github_actions()
, and similar functions,
you can safely set TAR_ASK
to "false"
in either a project-level
or user-level .Renviron
file.
The TAR_CONFIG
environment variable controls the file path to the
optional YAML configuration file with project settings.
See the help file of tar_config_set()
for details.
The TAR_PROJECT
environment variable sets the name of project
to set and get settings when working with the YAML configuration file.
See the help file of tar_config_set()
for details.
The TAR_WARN
environment variable accepts values "true"
and "false"
.
If TAR_WARN
is not set, or if it is set to "true"
,
then targets
throws warnings in certain edge cases,
such as target/global name conflicts and dangerous use of
devtools::load_all()
. If TAR_WARN
is "false"
, then targets
does not throw warnings in these cases.
These warnings can detect potentially serious
issues with your pipeline, so please do not set TAR_WARN
unless your use case absolutely requires it.
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_set()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_option_get()
,
tar_option_reset()
,
tar_option_set()
tar_envvars()
tar_envvars()
List targets whose progress is "errored"
.
tar_errored(names = NULL, store = targets::tar_config_get("store"))
tar_errored(names = NULL, store = targets::tar_config_get("store"))
names |
Optional, names of the targets. If supplied, the
output is restricted to the selected targets.
The object supplied to |
store |
Character of length 1, path to the
|
A character vector of errored targets.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_errored() tar_errored(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_errored() tar_errored(starts_with("y_")) # see also any_of() }) }
Check if the target metadata file _targets/meta/meta
exists for the current project.
tar_exist_meta(store = targets::tar_config_get("store"))
tar_exist_meta(store = targets::tar_config_get("store"))
store |
Character of length 1, path to the
|
To learn more about data storage in targets
, visit
https://books.ropensci.org/targets/data.html.
Logical of length 1, whether the current project's metadata exists.
Other existence:
tar_exist_objects()
,
tar_exist_process()
,
tar_exist_progress()
,
tar_exist_script()
tar_exist_meta()
tar_exist_meta()
Check if output target data exists in either
_targets/objects/
or the cloud for one or more targets.
tar_exist_objects( names, cloud = TRUE, store = targets::tar_config_get("store") )
tar_exist_objects( names, cloud = TRUE, store = targets::tar_config_get("store") )
names |
Character vector of target names.
Not |
cloud |
Logical of length 1, whether to include
cloud targets in the output
(e.g. |
store |
Character of length 1, path to the
|
If a target has no metadata or if the repository
argument of tar_target()
was set to "local"
,
then the _targets/objects/
folder is checked. Otherwise,
if there is metadata and repsitory
is not "local"
,
then tar_exist_objects()
checks the cloud repository
selected.
Logical of length length(names)
, whether
each given target has an existing file in either
_targets/objects/
or the cloud.
Other existence:
tar_exist_meta()
,
tar_exist_process()
,
tar_exist_progress()
,
tar_exist_script()
tar_exist_objects(c("target1", "target2"))
tar_exist_objects(c("target1", "target2"))
Check if the process metadata file _targets/meta/process
exists for the current project.
tar_exist_process(store = targets::tar_config_get("store"))
tar_exist_process(store = targets::tar_config_get("store"))
store |
Character of length 1, path to the
|
To learn more about data storage in targets
, visit
https://books.ropensci.org/targets/data.html.
Logical of length 1, whether the current project's metadata exists.
Other existence:
tar_exist_meta()
,
tar_exist_objects()
,
tar_exist_progress()
,
tar_exist_script()
tar_exist_process()
tar_exist_process()
Check if the progress metadata file _targets/meta/progress
exists for the current project.
tar_exist_progress(store = targets::tar_config_get("store"))
tar_exist_progress(store = targets::tar_config_get("store"))
store |
Character of length 1, path to the
|
To learn more about data storage in targets
, visit
https://books.ropensci.org/targets/data.html.
Logical of length 1, whether the current project's metadata exists.
Other existence:
tar_exist_meta()
,
tar_exist_objects()
,
tar_exist_process()
,
tar_exist_script()
tar_exist_progress()
tar_exist_progress()
Check if the target script file exists for the
current project. The target script is _targets.R
by default,
but the path can be configured for the current project
using tar_config_set()
.
tar_exist_script(script = targets::tar_config_get("script"))
tar_exist_script(script = targets::tar_config_get("script"))
script |
Character of length 1, path to the
target script file. Defaults to |
Logical of length 1, whether the current project's metadata exists.
Other existence:
tar_exist_meta()
,
tar_exist_objects()
,
tar_exist_process()
,
tar_exist_progress()
tar_exist_script()
tar_exist_script()
Define a custom target storage format for the
format
argument of tar_target()
or tar_option_set()
.
tar_format( read = NULL, write = NULL, marshal = NULL, unmarshal = NULL, convert = NULL, copy = NULL, substitute = list(), repository = NULL )
tar_format( read = NULL, write = NULL, marshal = NULL, unmarshal = NULL, convert = NULL, copy = NULL, substitute = list(), repository = NULL )
read |
A function with a single argument named |
write |
A function with two arguments: |
marshal |
A function with a single argument named |
unmarshal |
A function with a single argument named |
convert |
The |
copy |
The |
substitute |
Named list of values to be inserted into the
body of each custom function in place of symbols in the body.
For example, if
Please do not include temporary or sensitive information
such as authentication credentials.
If you do, then |
repository |
Deprecated. Use the |
A character string of length 1 encoding the custom format.
You can supply this string directly to the format
argument of tar_target()
or tar_option_set()
.
If an object can only be used in the R session
where it was created, it is called "non-exportable".
Examples of non-exportable R objects are Keras models,
Torch objects, xgboost
matrices, xml2
documents,
rstan
model objects, sparklyr
data objects, and
database connection objects. These objects cannot be
exported to parallel workers (e.g. for tar_make_future()
)
without special treatment. To send an non-exportable
object to a parallel worker, the object must be marshalled:
converted into a form that can be exported safely
(similar to serialization but not always the same).
Then, the worker must unmarshal the object: convert it
into a form that is usable and valid in the current R session.
Arguments marshal
and unmarshal
of tar_format()
let you control how marshalling and unmarshalling happens.
In tar_format()
, functions like read
, write
,
marshal
, and unmarshal
must be perfectly pure
and perfectly self-sufficient.
They must load or namespace all their own packages,
and they must not depend on any custom user-defined
functions or objects in the global environment of your pipeline.
targets
converts each function to and from text,
so it must not rely on any data in the closure.
This disqualifies functions produced by Vectorize()
,
for example.
The write
function must write only a single file,
and the file it writes must not be a directory.
The functions to read and write the object
should not do any conversions on the object. That is the job
of the convert
argument. The convert
argument is a function
that accepts the object returned by the command of the target
and changes it into an acceptable format (e.g. can be
saved with the read
function). Working with the convert
function is best because it ensures the in-memory copy
of an object during the running pipeline session
is the same as the copy of the object that is saved
to disk.
Other storage:
tar_load()
,
tar_load_everything()
,
tar_objects()
,
tar_read()
# The following target is equivalent to the current superseded # tar_target(name, command(), format = "keras"). # An improved version of this would supply a `convert` argument # to handle NULL objects, which are returned by the target if it # errors and the error argument of tar_target() is "null". tar_target( name = keras_target, command = your_function(), format = tar_format( read = function(path) { keras::load_model_hdf5(path) }, write = function(object, path) { keras::save_model_hdf5(object = object, filepath = path) }, marshal = function(object) { keras::serialize_model(object) }, unmarshal = function(object) { keras::unserialize_model(object) } ) ) # And the following is equivalent to the current superseded # tar_target(name, torch::torch_tensor(seq_len(4)), format = "torch"), # except this version has a `convert` argument to handle # cases when `NULL` is returned (e.g. if the target errors out # and the `error` argument is "null" in tar_target() # or tar_option_set()) tar_target( name = torch_target, command = torch::torch_tensor(), format = tar_format( read = function(path) { torch::torch_load(path) }, write = function(object, path) { torch::torch_save(obj = object, path = path) }, marshal = function(object) { con <- rawConnection(raw(), open = "wr") on.exit(close(con)) torch::torch_save(object, con) rawConnectionValue(con) }, unmarshal = function(object) { con <- rawConnection(object, open = "r") on.exit(close(con)) torch::torch_load(con) } ) )
# The following target is equivalent to the current superseded # tar_target(name, command(), format = "keras"). # An improved version of this would supply a `convert` argument # to handle NULL objects, which are returned by the target if it # errors and the error argument of tar_target() is "null". tar_target( name = keras_target, command = your_function(), format = tar_format( read = function(path) { keras::load_model_hdf5(path) }, write = function(object, path) { keras::save_model_hdf5(object = object, filepath = path) }, marshal = function(object) { keras::serialize_model(object) }, unmarshal = function(object) { keras::unserialize_model(object) } ) ) # And the following is equivalent to the current superseded # tar_target(name, torch::torch_tensor(seq_len(4)), format = "torch"), # except this version has a `convert` argument to handle # cases when `NULL` is returned (e.g. if the target errors out # and the `error` argument is "null" in tar_target() # or tar_option_set()) tar_target( name = torch_target, command = torch::torch_tensor(), format = tar_format( read = function(path) { torch::torch_load(path) }, write = function(object, path) { torch::torch_save(obj = object, path = path) }, marshal = function(object) { con <- rawConnection(raw(), open = "wr") on.exit(close(con)) torch::torch_save(object, con) rawConnectionValue(con) }, unmarshal = function(object) { con <- rawConnection(object, open = "r") on.exit(close(con)) torch::torch_load(con) } ) )
Get the storage format of the target currently running.
tar_format_get()
tar_format_get()
This function is meant to be called inside a target in a
running pipeline. If it is called outside a target in the running
pipeline, it will return the default format given by
tar_option_get("format")
.
A character string, storage format of the target currently
running in the pipeline. If called outside a target in the running
pipeline, tar_format_get()
will return the default format given by
tar_option_get("format")
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
tar_target(x, tar_format_get(), format = "qs")
tar_target(x, tar_format_get(), format = "qs")
Writes a GitHub Actions workflow file so the pipeline
runs on every push to GitHub. Historical runs accumulate in the
targets-runs
branch, and the latest output is restored before
tar_make()
so up-to-date targets do not rerun.
tar_github_actions( path = file.path(".github", "workflows", "targets.yaml"), ask = NULL )
tar_github_actions( path = file.path(".github", "workflows", "targets.yaml"), ask = NULL )
path |
Character of length 1, file path to write the GitHub Actions workflow file. |
ask |
Logical, whether to ask before writing if the workflow file
already exists. If |
Steps to set up continuous deployment:
Ensure your pipeline stays within the resource limitations of
GitHub Actions and repositories, both for storage and compute.
For storage, you may wish to reduce the burden with
an alternative repository (e.g. tar_target(..., repository = "aws")
).
Ensure Actions are enabled in your GitHub repository. You may have to visit the Settings tab.
Call targets::tar_renv(extras = character(0))
to expose hidden package dependencies.
Set up renv
for your project (with renv::init()
or renv::snapshot()
). Details at
https://rstudio.github.io/renv/articles/ci.html.
Commit the renv.lock
file to the main
(recommended)
or master
Git branch.
Run tar_github_actions()
to create the workflow file.
Commit this file to main
(recommended) or master
in Git.
Push your project to GitHub. Verify that a GitHub Actions
workflow runs and pushes results to targets-runs
.
Subsequent runs will only recompute the outdated targets.
Nothing (invisibly). This function writes a GitHub Actions workflow file as a side effect.
Other scripts:
tar_edit()
,
tar_helper()
,
tar_renv()
,
tar_script()
tar_github_actions(tempfile())
tar_github_actions(tempfile())
Analyze the pipeline defined in the target script file
(default: _targets.R
)
and visualize the directed acyclic graph of targets.
Unlike tar_visnetwork()
, tar_glimpse()
does not account for
metadata or progress information, which means the graph
renders faster. Also, tar_glimpse()
omits functions and other global
objects by default (but you can include them with targets_only = FALSE
).
tar_glimpse( targets_only = TRUE, names = NULL, shortcut = FALSE, allow = NULL, exclude = ".Random.seed", label = targets::tar_config_get("label"), label_width = targets::tar_config_get("label_width"), level_separation = targets::tar_config_get("level_separation"), degree_from = 1L, degree_to = 1L, zoom_speed = 1, physics = FALSE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_glimpse( targets_only = TRUE, names = NULL, shortcut = FALSE, allow = NULL, exclude = ".Random.seed", label = targets::tar_config_get("label"), label_width = targets::tar_config_get("label_width"), level_separation = targets::tar_config_get("level_separation"), degree_from = 1L, degree_to = 1L, zoom_speed = 1, physics = FALSE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
targets_only |
Logical, whether to restrict the output to just targets
( |
names |
Names of targets. The graph visualization will operate
only on these targets (and unless |
shortcut |
Logical of length 1, how to interpret the |
allow |
Optional, define the set of allowable vertices in the graph.
Unlike |
exclude |
Optional, define the set of exclude vertices from the graph.
Unlike |
label |
Character vector of one or more aesthetics to add to the
vertex labels. Currently, the only option is |
label_width |
Positive numeric of length 1, maximum width (in number of characters) of the node labels. |
level_separation |
Numeric of length 1,
|
degree_from |
Integer of length 1. When you click on a node,
the graph highlights a neighborhood of that node. |
degree_to |
Integer of length 1. When you click on a node,
the graph highlights a neighborhood of that node. |
zoom_speed |
Positive numeric of length 1, scaling factor on the zoom speed. Above 1 zooms faster than default, below 1 zooms lower than default. |
physics |
Logical of length 1, whether to implement interactive physics in the graph, e.g. edge elasticity. |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
A visNetwork
HTML widget object.
The dependency graph of a pipeline is a directed acyclic graph (DAG)
where each node indicates a target or global object and each directed
edge indicates where a downstream node depends on an upstream node.
The DAG is not always a tree, but it never contains a cycle because
no target is allowed to directly or indirectly depend on itself.
The dependency graph should show a natural progression of work from
left to right. targets
uses static code analysis to create the graph,
so the order of tar_target()
calls in the _targets.R
file
does not matter. However, targets does not support self-referential
loops or other cycles. For more information on the dependency graph,
please read
https://books.ropensci.org/targets/targets.html#dependencies.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other visualize:
tar_mermaid()
,
tar_visnetwork()
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_glimpse() tar_glimpse(allow = starts_with("y")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_glimpse() tar_glimpse(allow = starts_with("y")) # see also any_of() }) }
Like dplyr::group_by()
, but for patterns.
tar_group()
allows you to map or cross over subsets of data frames.
Requires iteration = "group"
on the target. See the example.
tar_group(x)
tar_group(x)
x |
Grouped data frame from |
The goal of tar_group()
is to post-process the return value
of a data frame target to allow downstream targets to branch over
subsets of rows. It takes the groups defined by dplyr::group_by()
and translates that information into a special tar_group
is a column.
tar_group
is a vector of positive integers
from 1 to the number of groups. Rows with the same integer in tar_group
belong to the same group, and branches are arranged in increasing order
with respect to the integers in tar_group
.
The assignment of tar_group
integers to group levels
depends on the orderings inside the grouping variables and not the order
of rows in the dataset. dplyr::group_keys()
on the grouped data frame
shows how the grouping variables correspond to the integers in the
tar_group
column.
A data frame with a special tar_group
column that
targets
will use to find subsets of your data frame.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # The tar_group() function simply creates # a tar_group column to partition the rows # of a data frame. data.frame( x = seq_len(6), id = rep(letters[seq_len(3)], each = 2) ) %>% dplyr::group_by(id) %>% tar_group() # We use tar_group() below to branch over # subsets of a data frame defined with dplyr::group_by(). tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(dplyr) library(targets) library(tarchetypes) list( tar_target( data, data.frame( x = seq_len(6), id = rep(letters[seq_len(3)], each = 2) ) %>% group_by(id) %>% tar_group(), iteration = "group" ), tar_target( sums, sum(data$x), pattern = map(data), iteration = "vector" ) ) }) tar_make() tar_read(sums) # Should be c(3, 7, 11). }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # The tar_group() function simply creates # a tar_group column to partition the rows # of a data frame. data.frame( x = seq_len(6), id = rep(letters[seq_len(3)], each = 2) ) %>% dplyr::group_by(id) %>% tar_group() # We use tar_group() below to branch over # subsets of a data frame defined with dplyr::group_by(). tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(dplyr) library(targets) library(tarchetypes) list( tar_target( data, data.frame( x = seq_len(6), id = rep(letters[seq_len(3)], each = 2) ) %>% group_by(id) %>% tar_group(), iteration = "group" ), tar_target( sums, sum(data$x), pattern = map(data), iteration = "vector" ) ) }) tar_make() tar_read(sums) # Should be c(3, 7, 11). }) }
Write a helper R script for a targets
pipeline.
Could be supporting functions or the target script file
(default: _targets.R
) itself.
tar_helper()
expects an unevaluated expression for the code
argument, whereas tar_helper_raw()
expects an evaluated
expression object.
tar_helper(path = NULL, code = NULL, tidy_eval = TRUE, envir = parent.frame()) tar_helper_raw(path = NULL, code = NULL)
tar_helper(path = NULL, code = NULL, tidy_eval = TRUE, envir = parent.frame()) tar_helper_raw(path = NULL, code = NULL)
path |
Character of length 1, path to write (or overwrite) |
code |
Code to write to |
tidy_eval |
Logical, whether to use tidy evaluation on |
envir |
Environment for tidy evaluation. |
tar_helper()
is a specialized version of tar_script()
with flexible paths and tidy evaluation.
NULL
(invisibly)
Other scripts:
tar_edit()
,
tar_github_actions()
,
tar_renv()
,
tar_script()
# Without tidy evaluation: path <- tempfile() tar_helper(path, code = x <- 1) tar_helper_raw(path, code = quote(x <- 1)) # equivalent writeLines(readLines(path)) # With tidy evaluation: y <- 123 tar_helper(path, x <- !!y) writeLines(readLines(path))
# Without tidy evaluation: path <- tempfile() tar_helper(path, code = x <- 1) tar_helper_raw(path, code = quote(x <- 1)) # equivalent writeLines(readLines(path)) # With tidy evaluation: y <- 123 tar_helper(path, x <- !!y) writeLines(readLines(path))
In Target Markdown, run the enclosed code only if interactive mode is activated. Otherwise, do not run the code.
tar_interactive(code)
tar_interactive(code)
code |
R code to run if Target Markdown interactive mode is turned on. |
Visit <books.ropensci.org/targets/literate-programming.html> to learn about Target Markdown and interactive mode.
If Target Markdown interactive mode is turned on,
the function returns the result of running the code.
Otherwise, the function invisibly returns NULL
.
Other Target Markdown:
tar_engine_knitr()
,
tar_noninteractive()
,
tar_toggle()
tar_interactive(message("In interactive mode."))
tar_interactive(message("In interactive mode."))
Delete the metadata of records in _targets/meta/meta
but keep the return values of targets in _targets/objects/
.
tar_invalidate(names, store = targets::tar_config_get("store"))
tar_invalidate(names, store = targets::tar_config_get("store"))
names |
Names of the targets to remove from the metadata list.
The object supplied to |
store |
Character of length 1, path to the
|
This function forces one or more targets to rerun
on the next tar_make()
, regardless of the cues and regardless
of how those targets are stored. After tar_invalidate()
,
you will still be able to locate the data files with tar_path_target()
and manually salvage them in an emergency.
However, tar_load()
and tar_read()
will not be able to
read the data into R, and subsequent calls to tar_make()
will attempt to rerun those targets.
For patterns recorded in the metadata, all the branches
will be invalidated. For patterns no longer in the metadata,
branches are left alone.
NULL
(invisibly).
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other clean:
tar_delete()
,
tar_destroy()
,
tar_prune()
,
tar_prune_list()
,
tar_unversion()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() tar_invalidate(starts_with("y")) # Only invalidates y1 and y2. tar_make() # y1 and y2 rerun but return same values, so z is up to date. }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() tar_invalidate(starts_with("y")) # Only invalidates y1 and y2. tar_make() # y1 and y2 rerun but return same values, so z is up to date. }) }
These functions help with metaprogramming in
packages built on top of targets
.
tar_deparse_language(expr) tar_deparse_safe(expr, collapse = "\n", backtick = TRUE) tar_tidy_eval(expr, envir, tidy_eval) tar_tidyselect_eval(names_quosure, choices)
tar_deparse_language(expr) tar_deparse_safe(expr, collapse = "\n", backtick = TRUE) tar_tidy_eval(expr, envir, tidy_eval) tar_tidyselect_eval(names_quosure, choices)
expr |
A language object to modify or deparse. |
collapse |
Character of length 1, delimiter in deparsing. |
backtick |
logical indicating whether symbolic names should be enclosed in backticks if they do not follow the standard syntax. |
envir |
An environment to find objects for tidy evaluation. |
tidy_eval |
Logical of length 1, whether to apply tidy evaluation. |
names_quosure |
An |
choices |
A character vector of choices for character elements returned by tidy evaluation. |
tar_deparse_language()
is a wrapper around tar_deparse_safe()
which leaves character vectors and NULL
objects alone,
which helps with subsequent user input validation.
tar_deparse_safe()
is a wrapper around base::deparse()
with a custom set of fast default settings and guardrails
to ensure the output always has length 1.
tar_tidy_eval()
applies tidy evaluation to a language object
and returns another language object.
tar_tidyselect_eval()
applies tidyselect
selection with
some special guardrails around NULL
inputs.
Other utilities to extend targets:
tar_assert
,
tar_condition
,
tar_test()
tar_deparse_language(quote(run_model()))
tar_deparse_language(quote(run_model()))
Load the return values of targets into the current environment
(or the environment of your choosing). For a typical target, the return
value lives in a file in _targets/objects/
. For dynamic files (i.e.
format = "file"
) the paths loaded in place of the values.
tar_load_everything()
is shorthand for tar_load(everything())
to load all targets.
tar_load()
uses non-standard evaluation in the names
argument
(example: tar_load(names = everything())
), whereas tar_load_raw()
uses standard evaluation for names
(example: tar_load_raw(names = quote(everything()))
).
tar_load( names, branches = NULL, meta = targets::tar_meta(targets_only = TRUE, store = store), strict = TRUE, silent = FALSE, envir = parent.frame(), store = targets::tar_config_get("store") ) tar_load_raw( names, branches = NULL, meta = tar_meta(store = store), strict = TRUE, silent = FALSE, envir = parent.frame(), store = targets::tar_config_get("store") )
tar_load( names, branches = NULL, meta = targets::tar_meta(targets_only = TRUE, store = store), strict = TRUE, silent = FALSE, envir = parent.frame(), store = targets::tar_config_get("store") ) tar_load_raw( names, branches = NULL, meta = tar_meta(store = store), strict = TRUE, silent = FALSE, envir = parent.frame(), store = targets::tar_config_get("store") )
names |
Names of the targets to load.
The object supplied to |
branches |
Integer of indices of the branches to load for any targets that are patterns. |
meta |
Data frame of target metadata from |
strict |
Logical of length 1, whether to error out
if one of the selected targets is in the metadata
but cannot be loaded.
Set to |
silent |
Logical of length 1. Only relevant when
|
envir |
R environment in which to load target return values. |
store |
Character of length 1, directory path to the data store of the pipeline. |
Nothing.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Some buckets in Amazon S3 or Google Cloud Storage are "versioned",
which means they track historical versions of each data object.
If you use targets
with cloud storage
(https://books.ropensci.org/targets/cloud-storage.html)
and versioning is turned on, then targets
will record each
version of each target in its metadata.
Functions like tar_read()
and tar_load()
load the version recorded in the local metadata,
which may not be the same as the "current" version of the
object in the bucket. Likewise, functions tar_delete()
and tar_destroy()
only remove
the version ID of each target as recorded in the local
metadata.
If you want to interact with the latest version of an object instead of the version ID recorded in the local metadata, then you will need to delete the object from the metadata.
Make sure your local copy of the metadata is current and
up to date. You may need to run tar_meta_download()
or
tar_meta_sync()
first.
Run tar_unversion()
to remove the recorded version IDs of
your targets in the local metadata.
With the version IDs gone from the local metadata,
functions like tar_read()
and tar_destroy()
will use the
latest version of each target data object.
Optional: to back up the local metadata file with the version IDs
deleted, use tar_meta_upload()
.
Other storage:
tar_format()
,
tar_load_everything()
,
tar_objects()
,
tar_read()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() ls() # Does not have "y1", "y2", or "z". tar_load(starts_with("y")) ls() # Has "y1" and "y2" but not "z". tar_load_raw(quote(any_of("z"))) ls() # Has "y1", "y2", and "z". }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() ls() # Does not have "y1", "y2", or "z". tar_load(starts_with("y")) ls() # Has "y1" and "y2" but not "z". tar_load_raw(quote(any_of("z"))) ls() # Has "y1", "y2", and "z". }) }
Shorthand for tar_load(everything())
to load all
targets with entries in the metadata.
tar_load_everything( branches = NULL, meta = tar_meta(targets_only = TRUE, store = store), strict = TRUE, silent = FALSE, envir = parent.frame(), store = targets::tar_config_get("store") )
tar_load_everything( branches = NULL, meta = tar_meta(targets_only = TRUE, store = store), strict = TRUE, silent = FALSE, envir = parent.frame(), store = targets::tar_config_get("store") )
branches |
Integer of indices of the branches to load for any targets that are patterns. |
meta |
Data frame of target metadata from |
strict |
Logical of length 1, whether to error out
if one of the selected targets is in the metadata
but cannot be loaded.
Set to |
silent |
Logical of length 1. Only relevant when
|
envir |
R environment in which to load target return values. |
store |
Character of length 1, directory path to the data store of the pipeline. |
Nothing.
Other storage:
tar_format()
,
tar_load()
,
tar_objects()
,
tar_read()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() ls() # Does not have "y1", "y2", or "z". tar_load_everything() ls() # Has "y1", "y2", and "z". }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() ls() # Does not have "y1", "y2", or "z". tar_load_everything() ls() # Has "y1", "y2", and "z". }) }
Load user-defined packages, functions, global objects, and
settings defined in the target script file (default: _targets.R
).
This function is for debugging, testing, and prototyping only.
It is not recommended for use inside a serious pipeline
or to report the results of a serious pipeline.
tar_load_globals( envir = parent.frame(), script = targets::tar_config_get("script") )
tar_load_globals( envir = parent.frame(), script = targets::tar_config_get("script") )
envir |
Environment to source the target script (default: |
script |
Character of length 1, path to the target script file
that defines the pipeline ( |
This function first sources the target script file
(default: _targets.R
)
to loads all user-defined functions, global objects, and settings
into the current R process. Then, it loads all the packages defined
in tar_option_get("packages")
(default: (.packages())
)
using library()
with lib.loc
defined in tar_option_get("library")
(default: NULL
).
NULL
(invisibly).
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other debug:
tar_traceback()
,
tar_workspace()
,
tar_workspaces()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(packages = "callr") analyze_data <- function(data) { summary(data) } list( tar_target(x, 1 + 1), tar_target(y, 1 + 1) ) }, ask = FALSE) tar_load_globals() print(analyze_data) print("callr" %in% (.packages())) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(packages = "callr") analyze_data <- function(data) { summary(data) } list( tar_target(x, 1 + 1), tar_target(y, 1 + 1) ) }, ask = FALSE) tar_load_globals() print(analyze_data) print("callr" %in% (.packages())) }) }
Run the pipeline you defined in the targets
script file (default: _targets.R
). tar_make()
runs the correct targets in the correct order and stores the return
values in _targets/objects/
. Use tar_read()
to read a target
back into R, and see
https://docs.ropensci.org/targets/reference/index.html#clean
to manage output files.
tar_make( names = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_make"), seconds_meta_append = targets::tar_config_get("seconds_meta_append"), seconds_meta_upload = targets::tar_config_get("seconds_meta_upload"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store"), garbage_collection = NULL, use_crew = targets::tar_config_get("use_crew"), terminate_controller = TRUE, as_job = targets::tar_config_get("as_job") )
tar_make( names = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_make"), seconds_meta_append = targets::tar_config_get("seconds_meta_append"), seconds_meta_upload = targets::tar_config_get("seconds_meta_upload"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store"), garbage_collection = NULL, use_crew = targets::tar_config_get("use_crew"), terminate_controller = TRUE, as_job = targets::tar_config_get("as_job") )
names |
Names of the targets to run or check. Set to |
shortcut |
Logical of length 1, how to interpret the |
reporter |
Character of length 1, name of the reporter to user.
Controls how messages are printed as targets run in the pipeline.
Defaults to
|
seconds_meta_append |
Positive numeric of length 1 with the minimum
number of seconds between saves to the local metadata and progress files
in the data store.
Higher values generally make the pipeline run faster, but unsaved
work (in the event of a crash) is not up to date.
When the pipeline ends,
all the metadata and progress data is saved immediately,
regardless of |
seconds_meta_upload |
Positive numeric of length 1 with the minimum
number of seconds between uploads of the metadata and progress data
to the cloud
(see https://books.ropensci.org/targets/cloud-storage.html).
Higher values generally make the pipeline run faster, but unsaved
work (in the event of a crash) may not be backed up to the cloud.
When the pipeline ends,
all the metadata and progress data is uploaded immediately,
regardless of |
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
seconds_interval |
Deprecated on 2023-08-24 (version 1.2.2.9001).
Use |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
garbage_collection |
Deprecated. Use the |
use_crew |
Logical of length 1, whether to use |
terminate_controller |
Logical of length 1. For a |
as_job |
|
NULL
except if callr_function = callr::r_bg()
, in which case
a handle to the callr
background process is returned. Either way,
the value is invisibly returned.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other pipeline:
tar_make_clustermq()
,
tar_make_future()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make(starts_with("y")) # Only processes y1 and y2. # Distributed computing with crew: if (requireNamespace("crew", quietly = TRUE)) { tar_script({ library(targets) library(tarchetypes) tar_option_set(controller = crew::controller_local()) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() } }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make(starts_with("y")) # Only processes y1 and y2. # Distributed computing with crew: if (requireNamespace("crew", quietly = TRUE)) { tar_script({ library(targets) library(tarchetypes) tar_option_set(controller = crew::controller_local()) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() } }) }
clustermq
workers.Superseded. Use tar_make()
with crew
:
https://books.ropensci.org/targets/crew.html.
tar_make_clustermq( names = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_make"), seconds_meta_append = targets::tar_config_get("seconds_meta_append"), seconds_meta_upload = targets::tar_config_get("seconds_meta_upload"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), workers = targets::tar_config_get("workers"), log_worker = FALSE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store"), garbage_collection = NULL )
tar_make_clustermq( names = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_make"), seconds_meta_append = targets::tar_config_get("seconds_meta_append"), seconds_meta_upload = targets::tar_config_get("seconds_meta_upload"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), workers = targets::tar_config_get("workers"), log_worker = FALSE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store"), garbage_collection = NULL )
names |
Names of the targets to run or check. Set to |
shortcut |
Logical of length 1, how to interpret the |
reporter |
Character of length 1, name of the reporter to user.
Controls how messages are printed as targets run in the pipeline.
Defaults to
|
seconds_meta_append |
Positive numeric of length 1 with the minimum
number of seconds between saves to the local metadata and progress files
in the data store.
Higher values generally make the pipeline run faster, but unsaved
work (in the event of a crash) is not up to date.
When the pipeline ends,
all the metadata and progress data is saved immediately,
regardless of |
seconds_meta_upload |
Positive numeric of length 1 with the minimum
number of seconds between uploads of the metadata and progress data
to the cloud
(see https://books.ropensci.org/targets/cloud-storage.html).
Higher values generally make the pipeline run faster, but unsaved
work (in the event of a crash) may not be backed up to the cloud.
When the pipeline ends,
all the metadata and progress data is uploaded immediately,
regardless of |
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
seconds_interval |
Deprecated on 2023-08-24 (version 1.2.2.9001).
Use |
workers |
Positive integer, number of persistent |
log_worker |
Logical, whether to write a log file for each worker.
Same as the |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
garbage_collection |
Deprecated. Use the |
tar_make_clustermq()
is like tar_make()
except that targets
run in parallel on persistent workers. A persistent worker is an
R process that runs for a long time and runs multiple
targets during its lifecycle. Persistent
workers launch as soon as the pipeline reaches an outdated
target with deployment = "worker"
, and they keep running
until the pipeline starts to wind down.
To configure tar_make_clustermq()
, you must configure
the clustermq
package. To do this, set global options
clustermq.scheduler
and clustermq.template
inside the target script file (default: _targets.R
).
To read more about configuring clustermq
for your scheduler, visit
https://mschubert.github.io/clustermq/articles/userguide.html#configuration # nolint
or https://books.ropensci.org/targets/hpc.html.
clustermq
is not a strict dependency of targets
,
so you must install clustermq
yourself.
NULL
except if callr_function = callr::r_bg()
, in which case
a handle to the callr
background process is returned. Either way,
the value is invisibly returned.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other pipeline:
tar_make()
,
tar_make_future()
if (!identical(tolower(Sys.info()[["sysname"]]), "windows")) { if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) options(clustermq.scheduler = "multiprocess") # Does not work on Windows. tar_option_set() list(tar_target(x, 1 + 1)) }, ask = FALSE) tar_make_clustermq() }) } }
if (!identical(tolower(Sys.info()[["sysname"]]), "windows")) { if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) options(clustermq.scheduler = "multiprocess") # Does not work on Windows. tar_option_set() list(tar_target(x, 1 + 1)) }, ask = FALSE) tar_make_clustermq() }) } }
future
workers.Superseded. Use tar_make()
with crew
:
https://books.ropensci.org/targets/crew.html.
tar_make_future( names = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_make"), seconds_meta_append = targets::tar_config_get("seconds_meta_append"), seconds_meta_upload = targets::tar_config_get("seconds_meta_upload"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), workers = targets::tar_config_get("workers"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store"), garbage_collection = NULL )
tar_make_future( names = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_make"), seconds_meta_append = targets::tar_config_get("seconds_meta_append"), seconds_meta_upload = targets::tar_config_get("seconds_meta_upload"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), workers = targets::tar_config_get("workers"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store"), garbage_collection = NULL )
names |
Names of the targets to run or check. Set to |
shortcut |
Logical of length 1, how to interpret the |
reporter |
Character of length 1, name of the reporter to user.
Controls how messages are printed as targets run in the pipeline.
Defaults to
|
seconds_meta_append |
Positive numeric of length 1 with the minimum
number of seconds between saves to the local metadata and progress files
in the data store.
Higher values generally make the pipeline run faster, but unsaved
work (in the event of a crash) is not up to date.
When the pipeline ends,
all the metadata and progress data is saved immediately,
regardless of |
seconds_meta_upload |
Positive numeric of length 1 with the minimum
number of seconds between uploads of the metadata and progress data
to the cloud
(see https://books.ropensci.org/targets/cloud-storage.html).
Higher values generally make the pipeline run faster, but unsaved
work (in the event of a crash) may not be backed up to the cloud.
When the pipeline ends,
all the metadata and progress data is uploaded immediately,
regardless of |
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
seconds_interval |
Deprecated on 2023-08-24 (version 1.2.2.9001).
Use |
workers |
Positive integer, maximum number of transient
|
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
garbage_collection |
Deprecated. Use the |
This function is like tar_make()
except that targets
run in parallel with transient future
workers. It requires
that you declare your future::plan()
inside the
target script file (default: _targets.R
).
future
is not a strict dependency of targets
,
so you must install future
yourself.
To configure tar_make_future()
with a computing cluster,
see the future.batchtools
package documentation.
NULL
except if callr_function = callr::r_bg()
, in which case
a handle to the callr
background process is returned. Either way,
the value is invisibly returned.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other pipeline:
tar_make()
,
tar_make_clustermq()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) future::plan(future::multisession, workers = 2) list( tar_target(x, 1 + 1), tar_target(y, 1 + 1) ) }, ask = FALSE) tar_make_future() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) future::plan(future::multisession, workers = 2) list( tar_target(x, 1 + 1), tar_target(y, 1 + 1) ) }, ask = FALSE) tar_make_future() }) }
Along with tar_visnetwork()
and tar_glimpse()
,
tar_manifest()
helps check that you constructed your pipeline correctly.
tar_manifest( names = NULL, fields = tidyselect::any_of(c("name", "command", "pattern", "description")), drop_missing = TRUE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script") )
tar_manifest( names = NULL, fields = tidyselect::any_of(c("name", "command", "pattern", "description")), drop_missing = TRUE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script") )
names |
Names of the targets to show. Set to |
fields |
Names of the fields, or columns, to show. Set to
|
drop_missing |
Logical of length 1, whether to automatically omit empty columns and columns with all missing values. |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
A data frame of information about the targets in the pipeline. Rows appear in topological order (the order they will run without any influence from parallel computing or priorities).
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other inspect:
tar_deps()
,
tar_network()
,
tar_outdated()
,
tar_sitrep()
,
tar_validate()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2), tar_target(m, z, pattern = map(z), description = "branching over z"), tar_target(c, z, pattern = cross(z)) ) }, ask = FALSE) tar_manifest() tar_manifest(fields = any_of(c("name", "command"))) tar_manifest(fields = any_of("command")) tar_manifest(fields = starts_with("cue")) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2), tar_target(m, z, pattern = map(z), description = "branching over z"), tar_target(c, z, pattern = cross(z)) ) }, ask = FALSE) tar_manifest() tar_manifest(fields = any_of(c("name", "command"))) tar_manifest(fields = any_of("command")) tar_manifest(fields = starts_with("cue")) }) }
mermaid.js
dependency graph.Visualize the dependency graph with a static mermaid.js
graph.
tar_mermaid( targets_only = FALSE, names = NULL, shortcut = FALSE, allow = NULL, exclude = ".Random.seed", outdated = TRUE, label = targets::tar_config_get("label"), label_width = targets::tar_config_get("label_width"), legend = TRUE, color = TRUE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_mermaid( targets_only = FALSE, names = NULL, shortcut = FALSE, allow = NULL, exclude = ".Random.seed", outdated = TRUE, label = targets::tar_config_get("label"), label_width = targets::tar_config_get("label_width"), legend = TRUE, color = TRUE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
targets_only |
Logical, whether to restrict the output to just targets
( |
names |
Names of targets. The graph visualization will operate
only on these targets (and unless |
shortcut |
Logical of length 1, how to interpret the |
allow |
Optional, define the set of allowable vertices in the graph.
Unlike |
exclude |
Optional, define the set of exclude vertices from the graph.
Unlike |
outdated |
Logical, whether to show colors to distinguish outdated
targets from up-to-date targets. (Global functions and objects
still show these colors.) Looking for outdated targets
takes a lot of time for large pipelines with lots of branches,
and setting |
label |
Character vector of one or more aesthetics to add to the
vertex labels. Can contain |
label_width |
Positive numeric of length 1, maximum width (in number of characters) of the node labels. |
legend |
Logical of length 1, whether to display the legend. |
color |
Logical of length 1, whether to color the graph vertices by status. |
reporter |
Character of length 1, name of the reporter to user. Controls how messages are printed as targets are checked. Choices:
|
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
mermaid.js
is a JavaScript library for constructing
static visualizations of graphs.
A character vector of lines of code of the mermaid.js
graph.
You can visualize the graph by copying the text
into a public online mermaid.js
editor or a mermaid
GitHub code chunk
(https://github.blog/2022-02-14-include-diagrams-markdown-files-mermaid/
).
Alternatively, you can render it inline in an R Markdown or Quarto
document using a results = "asis"
code chunk like so:
```{r, results = "asis", echo = FALSE} cat(c("```{mermaid}", targets::tar_mermaid(), "```"), sep = "\n") ```
The dependency graph of a pipeline is a directed acyclic graph (DAG)
where each node indicates a target or global object and each directed
edge indicates where a downstream node depends on an upstream node.
The DAG is not always a tree, but it never contains a cycle because
no target is allowed to directly or indirectly depend on itself.
The dependency graph should show a natural progression of work from
left to right. targets
uses static code analysis to create the graph,
so the order of tar_target()
calls in the _targets.R
file
does not matter. However, targets does not support self-referential
loops or other cycles. For more information on the dependency graph,
please read
https://books.ropensci.org/targets/targets.html#dependencies.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other visualize:
tar_glimpse()
,
tar_visnetwork()
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2, description = "sum of two other sums") ) }) # Copy the text into a mermaid.js online editor # or a mermaid GitHub code chunk: tar_mermaid() }) }
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2, description = "sum of two other sums") ) }) # Copy the text into a mermaid.js online editor # or a mermaid GitHub code chunk: tar_mermaid() }) }
Read the metadata of all recorded targets and global objects.
tar_meta( names = NULL, fields = NULL, targets_only = FALSE, complete_only = FALSE, store = targets::tar_config_get("store") )
tar_meta( names = NULL, fields = NULL, targets_only = FALSE, complete_only = FALSE, store = targets::tar_config_get("store") )
names |
Optional, names of the targets. If supplied, |
fields |
Optional, names of columns/fields to select. If supplied,
|
targets_only |
Logical, whether to just show information about targets or also return metadata on functions and other global objects. |
complete_only |
Logical, whether to return only complete rows
(no |
store |
Character of length 1, path to the
|
A metadata row only updates when the target completes.
tar_progress()
shows information on targets that are running.
That is why the number of branches may disagree between tar_meta()
and tar_progress()
for actively running pipelines.
A data frame with one row per target/object and the selected fields.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Metadata files help targets
read data objects and decide if the pipeline is up to date.
Usually, these metadata files live in files in the local
_targets/meta/
folder in your project, e.g. _targets/meta/meta
.
But in addition, if you set repository
to anything other than
"local"
in tar_option_set()
in _targets.R
, then tar_make()
continuously uploads the metadata files to the bucket you specify
in resources
. tar_meta_delete()
will delete those files from the
cloud, and so will tar_destroy()
if destroy
is
set to either "all"
or "cloud"
.
Other functions in targets
, such as tar_meta()
,
tar_visnetwork()
, tar_outdated()
, and tar_invalidate()
,
use the local metadata only and ignore the copies on the cloud.
So if you are working on a different computer than the
one running the pipeline, you will need to download the cloud metadata
to your current machine using tar_meta_download()
. Other functions
tar_meta_upload()
, tar_meta_sync()
, and tar_meta_delete()
also manage metadata across the cloud and the local file system.
Remarks:
The repository_meta
option in tar_option_set()
is actually
what controls where the metadata lives in the cloud, but it defaults
to repository
.
Like tar_make()
, tar_make_future()
and tar_make_clustermq()
also continuously upload metadata files to the cloud bucket
specified in resources
.
tar_meta_download()
and related functions need to run _targets.R
to detect tar_option_set()
options repository_meta
and resources
,
so please be aware of side effects that may happen running your
custom _targets.R
file.
Other metadata:
tar_meta_delete()
,
tar_meta_download()
,
tar_meta_sync()
,
tar_meta_upload()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_meta() tar_meta(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_meta() tar_meta(starts_with("y_")) # see also any_of() }) }
Delete the project metadata files from the local file system, the cloud, or both.
tar_meta_delete( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, delete = "all", script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_meta_delete( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, delete = "all", script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
meta |
Logical of length 1, whether to process the main metadata file
at |
progress |
Logical of length 1, whether to process the progress file at
|
process |
Logical of length 1, whether to process the process file at
|
crew |
Logical of length 1, whether to process the |
verbose |
Logical of length 1, whether to print informative console messages. |
delete |
Character of length 1, which location to delete the files.
Choose |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
Other metadata:
tar_meta()
,
tar_meta_download()
,
tar_meta_sync()
,
tar_meta_upload()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }) tar_make() tar_meta_delete() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }) tar_make() tar_meta_delete() }) }
download local metadata files to the cloud location
(repository, bucket, and prefix) you set in
tar_option_set()
in _targets.R
.
tar_meta_download( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, strict = FALSE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_meta_download( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, strict = FALSE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
meta |
Logical of length 1, whether to process the main metadata file
at |
progress |
Logical of length 1, whether to process the progress file at
|
process |
Logical of length 1, whether to process the process file at
|
crew |
Logical of length 1, whether to process the |
verbose |
Logical of length 1, whether to print informative console messages. |
strict |
Logical of length 1. |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
Other metadata:
tar_meta()
,
tar_meta_delete()
,
tar_meta_sync()
,
tar_meta_upload()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }) tar_make() tar_meta_download() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }) tar_make() tar_meta_download() }) }
Synchronize metadata in a cloud bucket with metadata in the local data store.
tar_meta_sync( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, prefer_local = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_meta_sync( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, prefer_local = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
meta |
Logical of length 1, whether to process the main metadata file
at |
progress |
Logical of length 1, whether to process the progress file at
|
process |
Logical of length 1, whether to process the process file at
|
crew |
Logical of length 1, whether to process the |
verbose |
Logical of length 1, whether to print informative console messages. |
prefer_local |
Logical of length 1 to control which copy of each
metadata file takes precedence if the local hash and cloud hash
are different but the time stamps are the same. Set to |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
tar_meta_sync()
synchronizes the local and cloud copies
of all the metadata files of the pipeline so that both have the
most recent copy. For each metadata file,
if the local file does not exist or is older than the cloud file,
then the cloud file is downloaded to the local file path.
Conversely, if the cloud file is older or does not exist, then the local
file is uploaded to the cloud. If the time stamps of these files are
equal, use the prefer_local
argument to determine
which copy takes precedence.
Other metadata:
tar_meta()
,
tar_meta_delete()
,
tar_meta_download()
,
tar_meta_upload()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }, ask = FALSE) tar_make() tar_meta_sync() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }, ask = FALSE) tar_make() tar_meta_sync() }) }
Upload local metadata files to the cloud location
(repository, bucket, and prefix) you set in
tar_option_set()
in _targets.R
.
tar_meta_upload( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, strict = FALSE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_meta_upload( meta = TRUE, progress = TRUE, process = TRUE, crew = TRUE, verbose = TRUE, strict = FALSE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
meta |
Logical of length 1, whether to process the main metadata file
at |
progress |
Logical of length 1, whether to process the progress file at
|
process |
Logical of length 1, whether to process the process file at
|
crew |
Logical of length 1, whether to process the |
verbose |
Logical of length 1, whether to print informative console messages. |
strict |
Logical of length 1. |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
Other metadata:
tar_meta()
,
tar_meta_delete()
,
tar_meta_download()
,
tar_meta_sync()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }, ask = FALSE) tar_make() tar_meta_upload() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set( resources = tar_resources( aws = tar_resources_aws( bucket = "YOUR_BUCKET_NAME", prefix = "YOUR_PROJECT_NAME" ) ), repository = "aws" ) list( tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) ) }, ask = FALSE) tar_make() tar_meta_upload() }) }
Get the name of the target currently running.
tar_name(default = "target")
tar_name(default = "target")
default |
Character, value to return if |
Character of length 1. If called inside a pipeline,
tar_name()
returns name of the target currently running.
Otherwise, the return value is default
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
tar_name() tar_name(default = "custom_target_name") if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_name()), ask = FALSE) tar_make() tar_read(x) }) }
tar_name() tar_name(default = "custom_target_name") if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_name()), ask = FALSE) tar_make() tar_read(x) }) }
Analyze the pipeline defined in the target script file
(default: _targets.R
)
and return the vertices and edges of the directed acyclic graph
of dependency relationships.
tar_network( targets_only = FALSE, names = NULL, shortcut = FALSE, allow = NULL, exclude = NULL, outdated = TRUE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_network( targets_only = FALSE, names = NULL, shortcut = FALSE, allow = NULL, exclude = NULL, outdated = TRUE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
targets_only |
Logical, whether to restrict the output to just targets
( |
names |
Names of targets. The graph visualization will operate
only on these targets (and unless |
shortcut |
Logical of length 1, how to interpret the |
allow |
Optional, define the set of allowable vertices in the graph.
Unlike |
exclude |
Optional, define the set of exclude vertices from the graph.
Unlike |
outdated |
Logical, whether to show colors to distinguish outdated
targets from up-to-date targets. (Global functions and objects
still show these colors.) Looking for outdated targets
takes a lot of time for large pipelines with lots of branches,
and setting |
reporter |
Character of length 1, name of the reporter to user. Controls how messages are printed as targets are checked. Choices:
|
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
A list with two data frames: vertices
and edges
. The
vertices data frame has one row per target and columns with the
the type of the target or object (stem, branch, map, cross, function,
or object), each target's description, and each target's status
(up to date, outdated, dispatched, completed, canceled, or errored),
as well as metadata if available (seconds of runtime, bytes of
storage, and number of dynamic branches).
The edges data frame has one row for every edge and columns to
and
from
to mark the starting and terminating vertices.
The dependency graph of a pipeline is a directed acyclic graph (DAG)
where each node indicates a target or global object and each directed
edge indicates where a downstream node depends on an upstream node.
The DAG is not always a tree, but it never contains a cycle because
no target is allowed to directly or indirectly depend on itself.
The dependency graph should show a natural progression of work from
left to right. targets
uses static code analysis to create the graph,
so the order of tar_target()
calls in the _targets.R
file
does not matter. However, targets does not support self-referential
loops or other cycles. For more information on the dependency graph,
please read
https://books.ropensci.org/targets/targets.html#dependencies.
Other inspect:
tar_deps()
,
tar_manifest()
,
tar_outdated()
,
tar_sitrep()
,
tar_validate()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1, description = "y2 info"), tar_target(z, y1 + y2, description = "z info") ) }, ask = FALSE) tar_network(targets_only = TRUE) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1, description = "y2 info"), tar_target(z, y1 + y2, description = "z info") ) }, ask = FALSE) tar_network(targets_only = TRUE) }) }
List all the targets whose last successful run occurred after a certain point in time.
tar_newer( time, names = NULL, inclusive = FALSE, store = targets::tar_config_get("store") )
tar_newer( time, names = NULL, inclusive = FALSE, store = targets::tar_config_get("store") )
time |
A |
names |
Names of eligible targets. Targets excluded from |
inclusive |
Logical of length 1, whether to include targets
completed at exactly the |
store |
Character of length 1, path to the
|
Only applies to targets with recorded time stamps:
just non-branching targets and individual dynamic branches.
As of targets
version 0.6.0, these time
stamps are available for these targets regardless of
storage format. Earlier versions of targets
do not record
time stamps for remote storage such as format = "url"
or repository = "aws"
in tar_target()
.
A character vector of names of old targets with recorded timestamp metadata.
Other time:
tar_older()
,
tar_timestamp()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, seq_len(2))) }, ask = FALSE) tar_make() # targets newer than 1 week ago tar_newer(Sys.time() - as.difftime(1, units = "weeks")) # targets newer than 1 week from now tar_newer(Sys.time() + as.difftime(1, units = "weeks")) # Everything is still up to date. tar_make() # Invalidate all targets targets newer than 1 week ago # so they run on the next tar_make(). invalidate_these <- tar_newer(Sys.time() - as.difftime(1, units = "weeks")) tar_invalidate(any_of(invalidate_these)) tar_make() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, seq_len(2))) }, ask = FALSE) tar_make() # targets newer than 1 week ago tar_newer(Sys.time() - as.difftime(1, units = "weeks")) # targets newer than 1 week from now tar_newer(Sys.time() + as.difftime(1, units = "weeks")) # Everything is still up to date. tar_make() # Invalidate all targets targets newer than 1 week ago # so they run on the next tar_make(). invalidate_these <- tar_newer(Sys.time() - as.difftime(1, units = "weeks")) tar_invalidate(any_of(invalidate_these)) tar_make() }) }
In Target Markdown, run the enclosed code only if interactive mode is not activated. Otherwise, do not run the code.
tar_noninteractive(code)
tar_noninteractive(code)
code |
R code to run if Target Markdown interactive mode is not turned on. |
Visit <books.ropensci.org/targets/literate-programming.html> to learn about Target Markdown and interactive mode.
If Target Markdown interactive mode is not turned on,
the function returns the result of running the code.
Otherwise, the function invisibly returns NULL
.
Other Target Markdown:
tar_engine_knitr()
,
tar_interactive()
,
tar_toggle()
tar_noninteractive(message("Not in interactive mode."))
tar_noninteractive(message("Not in interactive mode."))
List targets currently saved to _targets/objects/
or the cloud. Does not include local files
with tar_target(..., format = "file", repository = "local")
.
tar_objects( names = NULL, cloud = TRUE, store = targets::tar_config_get("store") )
tar_objects( names = NULL, cloud = TRUE, store = targets::tar_config_get("store") )
names |
Names of targets to select.
The object supplied to |
cloud |
Logical of length 1, whether to include
cloud targets in the output
(e.g. |
store |
Character of length 1, path to the
|
Character vector of targets saved to _targets/objects/
.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other storage:
tar_format()
,
tar_load()
,
tar_load_everything()
,
tar_read()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, "value")) }, ask = FALSE) tar_make() tar_objects() tar_objects(starts_with("x")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, "value")) }, ask = FALSE) tar_make() tar_objects() tar_objects(starts_with("x")) # see also any_of() }) }
List all the targets whose last successful run occurred
before a certain point in time. Combine with tar_invalidate()
,
you can use tar_older()
to automatically rerun targets at
regular intervals. See the examples for a demonstration.
tar_older( time, names = NULL, inclusive = FALSE, store = targets::tar_config_get("store") )
tar_older( time, names = NULL, inclusive = FALSE, store = targets::tar_config_get("store") )
time |
A |
names |
Names of eligible targets. Targets excluded from |
inclusive |
Logical of length 1, whether to include targets
completed at exactly the |
store |
Character of length 1, path to the
|
Only applies to targets with recorded time stamps:
just non-branching targets and individual dynamic branches.
As of targets
version 0.6.0, these time
stamps are available for these targets regardless of
storage format. Earlier versions of targets
do not record
time stamps for remote storage such as format = "url"
or repository = "aws"
in tar_target()
.
A character vector of names of old targets with recorded timestamp metadata.
Other time:
tar_newer()
,
tar_timestamp()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, seq_len(2))) }, ask = FALSE) tar_make() # targets older than 1 week ago tar_older(Sys.time() - as.difftime(1, units = "weeks")) # targets older than 1 week from now tar_older(Sys.time() + as.difftime(1, units = "weeks")) # Everything is still up to date. tar_make() # Invalidate all targets targets older than 1 week from now # so they run on the next tar_make(). invalidate_these <- tar_older(Sys.time() + as.difftime(1, units = "weeks")) tar_invalidate(any_of(invalidate_these)) tar_make() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, seq_len(2))) }, ask = FALSE) tar_make() # targets older than 1 week ago tar_older(Sys.time() - as.difftime(1, units = "weeks")) # targets older than 1 week from now tar_older(Sys.time() + as.difftime(1, units = "weeks")) # Everything is still up to date. tar_make() # Invalidate all targets targets older than 1 week from now # so they run on the next tar_make(). invalidate_these <- tar_older(Sys.time() + as.difftime(1, units = "weeks")) tar_invalidate(any_of(invalidate_these)) tar_make() }) }
Get a target option. These options include default arguments to
tar_target()
such as packages, storage format,
iteration type, and cue.
Needs to be called before any calls to tar_target()
in order to take effect.
tar_option_get(name = NULL, option = NULL)
tar_option_get(name = NULL, option = NULL)
name |
Character of length 1, name of an option to get.
Must be one of the argument names of |
option |
Deprecated, use the |
This function goes well with tar_target_raw()
when it comes
to defining external interfaces on top of the targets
package to create
pipelines.
Value of a target option.
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_set()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_reset()
,
tar_option_set()
tar_option_get("format") # default format before we set anything tar_target(x, 1)$settings$format tar_option_set(format = "fst_tbl") # new default format tar_option_get("format") tar_target(x, 1)$settings$format tar_option_reset() # reset the format tar_target(x, 1)$settings$format if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(cue = tar_cue(mode = "always")) # All targets always run. list(tar_target(x, 1), tar_target(y, 2)) }) tar_make() tar_make() }) }
tar_option_get("format") # default format before we set anything tar_target(x, 1)$settings$format tar_option_set(format = "fst_tbl") # new default format tar_option_get("format") tar_target(x, 1)$settings$format tar_option_reset() # reset the format tar_target(x, 1)$settings$format if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(cue = tar_cue(mode = "always")) # All targets always run. list(tar_target(x, 1), tar_target(y, 2)) }) tar_make() tar_make() }) }
Reset all target options you previously chose with
tar_option_set()
. These options are mostly configurable default
arguments to tar_target()
and tar_target_raw()
.
tar_option_reset()
tar_option_reset()
NULL
(invisibly).
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_set()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_get()
,
tar_option_set()
tar_option_get("format") # default format before we set anything tar_target(x, 1)$settings$format tar_option_set(format = "fst_tbl") # new default format tar_option_get("format") tar_target(x, 1)$settings$format tar_option_reset() # reset all options tar_target(x, 1)$settings$format if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(cue = tar_cue(mode = "always")) tar_option_reset() # Undo option above. list(tar_target(x, 1), tar_target(y, 2)) }) tar_make() tar_make() }) }
tar_option_get("format") # default format before we set anything tar_target(x, 1)$settings$format tar_option_set(format = "fst_tbl") # new default format tar_option_get("format") tar_target(x, 1)$settings$format tar_option_reset() # reset all options tar_target(x, 1)$settings$format if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(cue = tar_cue(mode = "always")) tar_option_reset() # Undo option above. list(tar_target(x, 1), tar_target(y, 2)) }) tar_make() tar_make() }) }
Set target options, including default arguments to
tar_target()
such as packages, storage format,
iteration type, and cue. Only the non-null arguments are actually
set as options. See currently set options with tar_option_get()
.
To use tar_option_set()
effectively, put it in your workflow's
target script file (default: _targets.R
)
before calls to tar_target()
or tar_target_raw()
.
tar_option_set( tidy_eval = NULL, packages = NULL, imports = NULL, library = NULL, envir = NULL, format = NULL, repository = NULL, repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, garbage_collection = NULL, deployment = NULL, priority = NULL, backoff = NULL, resources = NULL, storage = NULL, retrieval = NULL, cue = NULL, description = NULL, debug = NULL, workspaces = NULL, workspace_on_error = NULL, seed = NULL, controller = NULL, trust_timestamps = NULL, trust_object_timestamps = NULL )
tar_option_set( tidy_eval = NULL, packages = NULL, imports = NULL, library = NULL, envir = NULL, format = NULL, repository = NULL, repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, garbage_collection = NULL, deployment = NULL, priority = NULL, backoff = NULL, resources = NULL, storage = NULL, retrieval = NULL, cue = NULL, description = NULL, debug = NULL, workspaces = NULL, workspace_on_error = NULL, seed = NULL, controller = NULL, trust_timestamps = NULL, trust_object_timestamps = NULL )
tidy_eval |
Logical, whether to enable tidy evaluation
when interpreting |
packages |
Character vector of packages to load right before
the target runs or the output data is reloaded for
downstream targets. Use |
imports |
Character vector of package names.
For every package listed, There are several important limitations:
1. Namespaced calls, e.g. |
library |
Character vector of library paths to try
when loading |
envir |
Environment containing functions and global objects
common to all targets in the pipeline.
The If If Package environments should not be assigned to |
format |
Optional storage format for the target's return value.
With the exception of |
repository |
Character of length 1, remote repository for target storage. Choices:
Note: if |
repository_meta |
Character of length 1 with the same values as
|
iteration |
Character of length 1, name of the iteration mode of the target. Choices:
|
error |
Character of length 1, what to do if the target stops and throws an error. Options:
|
memory |
Character of length 1, memory strategy.
If |
garbage_collection |
A non-negative integer.
If |
deployment |
Character of length 1. If |
priority |
Numeric of length 1 between 0 and 1. Controls which
targets get deployed first when multiple competing targets are ready
simultaneously. Targets with priorities closer to 1 get dispatched earlier
(and polled earlier in |
backoff |
An object from |
resources |
Object returned by |
storage |
Character of length 1, only relevant to
|
retrieval |
Character of length 1, only relevant to
|
cue |
An optional object from |
description |
Character of length 1, a custom free-form human-readable
text description of the target. Descriptions appear as target labels
in functions like |
debug |
Character vector of names of targets to run in debug mode.
To use effectively, you must set |
workspaces |
Character vector of target names.
Could be non-branching targets, whole dynamic branching targets,
or individual branch names. |
workspace_on_error |
Logical of length 1, whether to save
a workspace file for each target that throws an error.
Workspace files help with debugging.
See |
seed |
Integer of length 1, seed for generating
target-specific pseudo-random number generator seeds.
These target-specific seeds are deterministic and depend on
Either the user or third-party packages built on top of The |
controller |
A controller or controller group object
produced by the |
trust_timestamps |
Logical of length 1, whether to use file system modification timestamps to check whether the target output data files in are up to date. This is an advanced setting and usually does not need to be set by the user except on old or difficult platforms. If If However, timestamp precision varies from a few
nanoseconds at best to 2 entire seconds at worst, and timestamps
with poor precision should not be fully trusted if there is any
possibility that you will manually change the file within 2 seconds
after the pipeline finishes.
If the data store is on a file system with low-precision timestamps,
then you may
consider setting To check if your
file system has low-precision timestamps, you can run
|
trust_object_timestamps |
Deprecated. Use |
NULL
(invisibly).
targets
has several built-in storage formats to control how return
values are saved and loaded from disk:
"rds"
: Default, uses saveRDS()
and readRDS()
. Should work for
most objects, but slow.
"auto"
: either "file"
or "qs"
, depending on the return value
of the target. If the return value is a character vector of
existing files (and/or directories), then the format becomes
"file"
before tar_make()
saves the target. Otherwise,
the format becomes "qs"
.
"qs"
: Uses qs::qsave()
and qs::qread()
. Should work for
most objects, much faster than "rds"
. Optionally set the
preset for qsave()
through tar_resources()
and tar_resources_qs()
.
"feather"
: Uses arrow::write_feather()
and
arrow::read_feather()
(version 2.0). Much faster than "rds"
,
but the value must be a data frame. Optionally set
compression
and compression_level
in arrow::write_feather()
through tar_resources()
and tar_resources_feather()
.
Requires the arrow
package (not installed by default).
"parquet"
: Uses arrow::write_parquet()
and
arrow::read_parquet()
(version 2.0). Much faster than "rds"
,
but the value must be a data frame. Optionally set
compression
and compression_level
in arrow::write_parquet()
through tar_resources()
and tar_resources_parquet()
.
Requires the arrow
package (not installed by default).
"fst"
: Uses fst::write_fst()
and fst::read_fst()
.
Much faster than "rds"
, but the value must be
a data frame. Optionally set the compression level for
fst::write_fst()
through tar_resources()
and tar_resources_fst()
.
Requires the fst
package (not installed by default).
"fst_dt"
: Same as "fst"
, but the value is a data.table
.
Deep copies are made as appropriate in order to protect
against the global effects of in-place modification.
Optionally set the compression level the same way as for "fst"
.
"fst_tbl"
: Same as "fst"
, but the value is a tibble
.
Optionally set the compression level the same way as for "fst"
.
"keras"
: superseded by tar_format()
and incompatible
with error = "null"
(in tar_target()
or tar_option_set()
).
Uses keras::save_model_hdf5()
and
keras::load_model_hdf5()
. The value must be a Keras model.
Requires the keras
package (not installed by default).
"torch"
: superseded by tar_format()
and incompatible
with error = "null"
(in tar_target()
or tar_option_set()
).
Uses torch::torch_save()
and torch::torch_load()
.
The value must be an object from the torch
package
such as a tensor or neural network module.
Requires the torch
package (not installed by default).
"file"
: A dynamic file. To use this format,
the target needs to manually identify or save some data
and return a character vector of paths
to the data (must be a single file path if repository
is not "local"
). (These paths must be existing files
and nonempty directories.)
Then, targets
automatically checks those files and cues
the appropriate run/skip decisions if those files are out of date.
Those paths must point to files or directories,
and they must not contain characters |
or *
.
All the files and directories you return must actually exist,
or else targets
will throw an error. (And if storage
is "worker"
,
targets
will first stall out trying to wait for the file
to arrive over a network file system.)
If the target does not create any files, the return value should be
character(0)
.
If repository
is not "local"
and format
is "file"
,
then the character vector returned by the target must be of length 1
and point to a single file. (Directories and vectors of multiple
file paths are not supported for dynamic files on the cloud.)
That output file is uploaded to the cloud and tracked for changes
where it exists in the cloud. The local file is deleted after
the target runs.
"url"
: A dynamic input URL. For this storage format,
repository
is implicitly "local"
,
URL format is like format = "file"
except the return value of the target is a URL that already exists
and serves as input data for downstream targets. Optionally
supply a custom curl
handle through
tar_resources()
and tar_resources_url()
.
in new_handle()
, nobody = TRUE
is important because it
ensures targets
just downloads the metadata instead of
the entire data file when it checks time stamps and hashes.
The data file at the URL needs to have an ETag or a Last-Modified
time stamp, or else the target will throw an error because
it cannot track the data. Also, use extreme caution when
trying to use format = "url"
to track uploads. You must be absolutely
certain the ETag and Last-Modified time stamp are fully updated
and available by the time the target's command finishes running.
targets
makes no attempt to wait for the web server.
A custom format can be supplied with tar_format()
. For this choice,
it is the user's responsibility to provide methods for (un)serialization
and (un)marshaling the return value of the target.
The formats starting with "aws_"
are deprecated as of 2022-03-13
(targets
version > 0.10.0). For cloud storage integration, use the
repository
argument instead.
Formats "rds"
, "file"
, and "url"
are general-purpose formats
that belong in the targets
package itself.
Going forward, any additional formats should be implemented with
tar_format()
in third-party packages like tarchetypes
and geotargets
(for example: tarchetypes::tar_format_nanoparquet()
).
Formats "qs"
, "fst"
, etc. are legacy formats from before the
existence of tar_format()
, and they will continue to remain in
targets
without deprecation.
Other configuration:
tar_config_get()
,
tar_config_projects()
,
tar_config_set()
,
tar_config_unset()
,
tar_config_yaml()
,
tar_envvars()
,
tar_option_get()
,
tar_option_reset()
tar_option_get("format") # default format before we set anything tar_target(x, 1)$settings$format tar_option_set(format = "fst_tbl") # new default format tar_option_get("format") tar_target(x, 1)$settings$format tar_option_reset() # reset the format tar_target(x, 1)$settings$format if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(cue = tar_cue(mode = "always")) # All targets always run. list(tar_target(x, 1), tar_target(y, 2)) }) tar_make() tar_make() }) }
tar_option_get("format") # default format before we set anything tar_target(x, 1)$settings$format tar_option_set(format = "fst_tbl") # new default format tar_option_get("format") tar_target(x, 1)$settings$format tar_option_reset() # reset the format tar_target(x, 1)$settings$format if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(cue = tar_cue(mode = "always")) # All targets always run. list(tar_target(x, 1), tar_target(y, 2)) }) tar_make() tar_make() }) }
Checks for outdated targets in the pipeline,
targets that will be rerun automatically if you call
tar_make()
or similar. See tar_cue()
for the rules
that decide whether a target needs to rerun.
tar_outdated( names = NULL, shortcut = targets::tar_config_get("shortcut"), branches = FALSE, targets_only = TRUE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_outdated( names = NULL, shortcut = targets::tar_config_get("shortcut"), branches = FALSE, targets_only = TRUE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), seconds_interval = targets::tar_config_get("seconds_interval"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
names |
Names of the targets. |
shortcut |
Logical of length 1, how to interpret the |
branches |
Logical of length 1, whether to include branch names. Including branches could get cumbersome for large pipelines. Individual branch names are still omitted when branch-specific information is not reliable: for example, when a pattern branches over an outdated target. |
targets_only |
Logical of length 1, whether to just restrict to targets
or to include functions and other global objects from the environment
created by running the target script file (default: |
reporter |
Character of length 1, name of the reporter to user. Controls how messages are printed as targets are checked. Choices:
|
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
seconds_interval |
Deprecated on 2023-08-24 (version 1.2.2.9001).
Use |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
Requires that you define a pipeline
with a target script file (default: _targets.R
).
(See tar_script()
for details.)
Names of the outdated targets.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other inspect:
tar_deps()
,
tar_manifest()
,
tar_network()
,
tar_sitrep()
,
tar_validate()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_outdated() tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_outdated() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1))) tar_outdated() tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_outdated() }) }
Identify the file path to the target script of the pipeline currently running.
tar_path_script()
tar_path_script()
Character, file path to the target script
of the pipeline currently running.
If called outside of the pipeline currently running,
tar_path_script()
returns tar_config_get("script")
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
tar_path_script() if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. script <- tempfile() tar_script(tar_target(x, tar_path_script()), script = script, ask = FALSE) tar_make(script = script) tar_read(x) }) }
tar_path_script() if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. script <- tempfile() tar_script(tar_target(x, tar_path_script()), script = script, ask = FALSE) tar_make(script = script) tar_read(x) }) }
Identify the directory path to the support scripts of the current target script of the pipeline currently running.
tar_path_script_support()
tar_path_script_support()
A target script (default: _targets.R
) comes with
support scripts if it is written by Target Markdown.
These support scripts usually live in a folder called _targets_r/
,
but the path may vary from case to case. The
tar_path_scipt_support()
returns the path to
the folder with the support scripts.
Character, directory path to the target script
of the pipeline currently running.
If called outside of the pipeline currently running,
tar_path_script()
returns tar_config_get("script")
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_store()
,
tar_path_target()
,
tar_source()
,
tar_store()
tar_path_script_support() if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. script <- tempfile() tar_script( tar_target(x, tar_path_script_support()), script = script, ask = FALSE ) tar_make(script = script) tar_read(x) }) }
tar_path_script_support() if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. script <- tempfile() tar_script( tar_target(x, tar_path_script_support()), script = script, ask = FALSE ) tar_make(script = script) tar_read(x) }) }
Identify the file path to the data store of the pipeline currently running.
tar_path_store()
tar_path_store()
Character, file path to the data store
of the pipeline currently running.
If called outside of the pipeline currently running,
tar_path_store()
returns tar_config_get("store")
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_target()
,
tar_source()
,
tar_store()
tar_path_store() if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_path_store()), ask = FALSE) store <- tempfile() tar_make(store = store) tar_read(x, store = store) }) }
tar_path_store() if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(x, tar_path_store()), ask = FALSE) store <- tempfile() tar_make(store = store) tar_read(x, store = store) }) }
Identify the file path where a target will be stored after the target finishes running in the pipeline.
tar_path_target( name = NULL, default = NA_character_, create_dir = FALSE, store = targets::tar_config_get("store") )
tar_path_target( name = NULL, default = NA_character_, create_dir = FALSE, store = targets::tar_config_get("store") )
name |
Symbol, name of a target.
If |
default |
Character, value to return if |
create_dir |
Logical of length 1,
whether to create |
store |
Character of length 1,
path to the data store if |
Character, file path of the return value of the target.
If not called from inside a running target,
tar_path_target(name = your_target)
just returns
_targets/objects/your_target
, the file path where your_target
will be saved unless format
is equal to "file"
or any of the
supported cloud-based storage formats.
For non-cloud storage formats, if you call tar_path_target()
with no arguments while target x
is running, the name
argument defaults to the name of the running target,
so tar_path_target()
returns _targets/objects/x
.
For cloud-backed formats, tar_path_target()
returns the
path to the staging file in _targets/scratch/
.
That way, even if you select a cloud repository
(e.g. tar_target(..., repository = "aws", storage = "none")
)
then you can still manually write to
tar_path_target(create_dir = TRUE)
and the targets
package will automatically hash it and
upload it to the AWS S3 bucket. This does not apply to
format = "file"
, where you would never need storage = "none"
anyway.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_source()
,
tar_store()
tar_path_target() tar_path_target(your_target) if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(returns_path, tar_path_target()), ask = FALSE) tar_make() tar_read(returns_path) }) }
tar_path_target() tar_path_target(your_target) if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(returns_path, tar_path_target()), ask = FALSE) tar_make() tar_read(returns_path) }) }
Emulate the dynamic branching process outside a pipeline.
tar_pattern()
can help you understand the overall branching structure
that comes from the pattern
argument of tar_target()
.
tar_pattern(pattern, ..., seed = 0L)
tar_pattern(pattern, ..., seed = 0L)
pattern |
Function call with the pattern specification. |
... |
Named integers, each of length 1. Each name is the name of a dependency target, and each integer is the length of the target (number of branches or slices). Names must be unique. |
seed |
Integer of length 1, random number generator seed to
emulate the pattern reproducibly. (The |
Dynamic branching is a way to programmatically
create multiple new targets based on the values of other targets,
all while the pipeline is running. Use the pattern
argument of
tar_target()
to get started. pattern
accepts a function call
composed of target names and any of the following patterns:
map()
: iterate over one or more targets in sequence.
cross()
: iterate over combinations of slices of targets.
slice()
: select one or more slices by index, e.g.
slice(x, index = c(3, 4))
selects the third and fourth
slice or branch of x
.
head()
: restrict branching to the first few elements.
tail()
: restrict branching to the last few elements.
sample()
: restrict branching to a random subset of elements.
A tibble
showing the kinds of dynamic branches that
tar_target()
would create in a real pipeline with the given pattern
.
Each row is a dynamic branch, each column is a dependency target,
and each element is the name of an upstream bud or branch that the
downstream branch depends on. Buds are pieces of non-branching targets
("stems") and branches are pieces of patterns. The returned bud and branch
names are not the actual ones you will see when you run the pipeline,
but they do communicate the branching structure of the pattern.
Other branching:
tar_branch_index()
,
tar_branch_names()
,
tar_branches()
# To use dynamic map for real in a pipeline, # call map() in a target's pattern. # The following code goes at the bottom of # your target script file (default: `_targets.R`). list( tar_target(x, seq_len(2)), tar_target(y, head(letters, 2)), tar_target(dynamic, c(x, y), pattern = map(x, y)) # 2 branches ) # Likewise for more complicated patterns. list( tar_target(x, seq_len(2)), tar_target(y, head(letters, 2)), tar_target(z, head(LETTERS, 2)), tar_target(dynamic, c(x, y, z), pattern = cross(z, map(x, y))) #4 branches ) # But you can emulate dynamic branching without running a pipeline # in order to understand the patterns you are creating. Simply supply # the pattern and the length of each dependency target. # The returned data frame represents the branching structure of the pattern: # One row per new branch, one column per dependency target, and # one element per bud/branch in each dependency target. tar_pattern( cross(x, map(y, z)), x = 2, y = 3, z = 3 ) tar_pattern( head(cross(x, map(y, z)), n = 2), x = 2, y = 3, z = 3 )
# To use dynamic map for real in a pipeline, # call map() in a target's pattern. # The following code goes at the bottom of # your target script file (default: `_targets.R`). list( tar_target(x, seq_len(2)), tar_target(y, head(letters, 2)), tar_target(dynamic, c(x, y), pattern = map(x, y)) # 2 branches ) # Likewise for more complicated patterns. list( tar_target(x, seq_len(2)), tar_target(y, head(letters, 2)), tar_target(z, head(LETTERS, 2)), tar_target(dynamic, c(x, y, z), pattern = cross(z, map(x, y))) #4 branches ) # But you can emulate dynamic branching without running a pipeline # in order to understand the patterns you are creating. Simply supply # the pattern and the length of each dependency target. # The returned data frame represents the branching structure of the pattern: # One row per new branch, one column per dependency target, and # one element per bud/branch in each dependency target. tar_pattern( cross(x, map(y, z)), x = 2, y = 3, z = 3 ) tar_pattern( head(cross(x, map(y, z)), n = 2), x = 2, y = 3, z = 3 )
Get the process ID (PID) of the most recent main R process to orchestrate the targets of the current project.
tar_pid(store = targets::tar_config_get("store"))
tar_pid(store = targets::tar_config_get("store"))
store |
Character of length 1, path to the
|
The main process is the R process invoked
by tar_make()
or similar. If callr_function
is not NULL
,
this is an external process, and the pid
in the return value
will not agree with Sys.getpid()
in your current interactive session.
The process may or may not be alive. You may want to
check it with ps::ps_is_running(ps::ps_handle(targets::tar_pid()))
before running another call to tar_make()
for the same project.
Integer with the process ID (PID) of the most recent main R process to orchestrate the targets of the current project.
Other data:
tar_crew()
,
tar_process()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() Sys.getpid() tar_pid() # Different from the current PID. }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() Sys.getpid() tar_pid() # Different from the current PID. }) }
Print the information in tar_progress_summary()
at regular intervals.
tar_poll( interval = 1, timeout = Inf, fields = c("skipped", "dispatched", "completed", "errored", "canceled", "since"), store = targets::tar_config_get("store") )
tar_poll( interval = 1, timeout = Inf, fields = c("skipped", "dispatched", "completed", "errored", "canceled", "since"), store = targets::tar_config_get("store") )
interval |
Number of seconds to wait between iterations of polling progress. |
timeout |
How many seconds to run before exiting. |
fields |
Optional character vector of names of progress data
columns to read. Set to |
store |
Character of length 1, path to the
|
NULL
(invisibly). Called for its side effects.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(100)), tar_target(y, Sys.sleep(0.1), pattern = map(x)) ) }, ask = FALSE) px <- tar_make(callr_function = callr::r_bg, reporter = "silent") tar_poll() }) }
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(100)), tar_target(y, Sys.sleep(0.1), pattern = map(x)) ) }, ask = FALSE) px <- tar_make(callr_function = callr::r_bg, reporter = "silent") tar_poll() }) }
Get info on the most recent main R process to orchestrate the targets of the current project.
tar_process(names = NULL, store = targets::tar_config_get("store"))
tar_process(names = NULL, store = targets::tar_config_get("store"))
names |
Optional, names of the data points to return.
If supplied, |
store |
Character of length 1, path to the
|
The main process is the R process invoked
by tar_make()
or similar. If callr_function
is not NULL
,
this is an external process, and the pid
in the return value
will not agree with Sys.getpid()
in your current interactive session.
The process may or may not be alive. You may want to
check the status with tar_pid() %in% ps::ps_pids()
before running another call to tar_make()
for the same project.
A data frame with metadata on the most recent main R process
to orchestrate the targets of the current project.
The output includes the pid
of the main process.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other data:
tar_crew()
,
tar_pid()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_process() tar_process(pid) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_process() tar_process(pid) }) }
Read a project's target progress data for the most recent
run of tar_make()
or similar. Only the most recent record is shown.
tar_progress( names = NULL, fields = "progress", store = targets::tar_config_get("store") )
tar_progress( names = NULL, fields = "progress", store = targets::tar_config_get("store") )
names |
Optional, names of the targets. If supplied, the
output is restricted to the selected targets.
The object supplied to |
fields |
Optional, names of progress data columns to read.
Set to |
store |
Character of length 1, path to the
|
A data frame with one row per target and the following columns:
name
: name of the target.
type
: type of target: "stem"
for non-branching targets,
"pattern"
for dynamically branching targets, and "branch"
for dynamic branches.
parent
: name of the target's parent. For branches, this is the
name of the associated pattern. For other targets, the pattern
is just itself.
branches
: number of dynamic branches of a pattern. 0 for non-patterns.
progress
: the most recent progress update of that target.
Could be "dispatched"
, "completed"
, "skipped
", "canceled"
,
or "errored"
. "dispatched"
means the target was sent off
to be run, but in the case of tar_make()
with a crew
controller,
the target might not actually start running right away if the crew
workers are all busy.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_progress() tar_progress(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_progress() tar_progress(starts_with("y_")) # see also any_of() }) }
Read a project's target progress data for the most recent run of the pipeline and display the tabulated status of dynamic branches. Only the most recent record is shown.
tar_progress_branches( names = NULL, fields = NULL, store = targets::tar_config_get("store") )
tar_progress_branches( names = NULL, fields = NULL, store = targets::tar_config_get("store") )
names |
Optional, names of the targets. If supplied, |
fields |
Optional, names of progress data columns to read.
Set to |
store |
Character of length 1, path to the
|
A data frame with one row per target per progress status and the following columns.
name
: name of the pattern.
progress
: progress status: "dispatched"
, "completed"
,
"canceled"
, or "errored"
.
branches
: number of branches in the progress category.
total
: total number of branches planned for the whole pattern.
Values within the same pattern should all be equal.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, x, pattern = map(x)), tar_target(z, stopifnot(y < 1.5), pattern = map(y)) ) }, ask = FALSE) try(tar_make()) tar_progress_branches() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, x, pattern = map(x)), tar_target(z, stopifnot(y < 1.5), pattern = map(y)) ) }, ask = FALSE) try(tar_make()) tar_progress_branches() }) }
Summarize the progress of a run of the pipeline.
tar_progress_summary( fields = c("skipped", "dispatched", "completed", "errored", "canceled", "since"), store = targets::tar_config_get("store") )
tar_progress_summary( fields = c("skipped", "dispatched", "completed", "errored", "canceled", "since"), store = targets::tar_config_get("store") )
fields |
Optional character vector of names of progress data
columns to read. Set to |
store |
Character of length 1, path to the
|
A data frame with one row and the following
optional columns that can be selected with fields
.
(time
is omitted by default.)
dispatched
: number of targets that were sent off to run and
did not (yet) finish. These targets may not actually be running,
depending on the status and workload of parallel workers.
completed
: number of targets that completed without
error or cancellation.
errored
: number of targets that threw an error.
canceled
: number of canceled targets (see tar_cancel()
).
since
: how long ago progress last changed (Sys.time() - time
).
time
: the time when the progress last changed
(modification timestamp of the _targets/meta/progress
file).
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_skipped()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, x, pattern = map(x)), tar_target(z, stopifnot(y < 1.5), pattern = map(y), error = "continue") ) }, ask = FALSE) try(tar_make()) tar_progress_summary() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(x, seq_len(2)), tar_target(y, x, pattern = map(x)), tar_target(z, stopifnot(y < 1.5), pattern = map(y), error = "continue") ) }, ask = FALSE) try(tar_make()) tar_progress_summary() }) }
Remove target values from _targets/objects/
and the cloud
and remove target metadata from _targets/meta/meta
for targets that are no longer part of the pipeline.
tar_prune( cloud = TRUE, batch_size = 1000L, verbose = TRUE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_prune( cloud = TRUE, batch_size = 1000L, verbose = TRUE, callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
cloud |
Logical of length 1, whether to delete objects
from the cloud if applicable (e.g. AWS, GCP). If |
batch_size |
Positive integer between 1 and 1000, number of target objects to delete from the cloud with each HTTP API request. Currently only supported for AWS. Cannot be more than 1000. |
verbose |
Logical of length 1, whether to print console messages to show progress when deleting each batch of targets from each cloud bucket. Batched deletion with verbosity is currently only supported for AWS. |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
tar_prune()
is useful if you recently worked through
multiple changes to your project and are now trying to
discard irrelevant data while keeping the results that still matter.
Global objects and local files with format = "file"
outside the
data store are unaffected. Also removes _targets/scratch/
,
which is only needed while tar_make()
, tar_make_clustermq()
,
or tar_make_future()
is running. To list the targets that will be
pruned without actually removing anything, use tar_prune_list()
.
NULL
except if callr_function
is callr::r_bg
, in which case
a handle to the callr
background process is returned. Either way,
the value is invisibly returned.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Some buckets in Amazon S3 or Google Cloud Storage are "versioned",
which means they track historical versions of each data object.
If you use targets
with cloud storage
(https://books.ropensci.org/targets/cloud-storage.html)
and versioning is turned on, then targets
will record each
version of each target in its metadata.
Functions like tar_read()
and tar_load()
load the version recorded in the local metadata,
which may not be the same as the "current" version of the
object in the bucket. Likewise, functions tar_delete()
and tar_destroy()
only remove
the version ID of each target as recorded in the local
metadata.
If you want to interact with the latest version of an object instead of the version ID recorded in the local metadata, then you will need to delete the object from the metadata.
Make sure your local copy of the metadata is current and
up to date. You may need to run tar_meta_download()
or
tar_meta_sync()
first.
Run tar_unversion()
to remove the recorded version IDs of
your targets in the local metadata.
With the version IDs gone from the local metadata,
functions like tar_read()
and tar_destroy()
will use the
latest version of each target data object.
Optional: to back up the local metadata file with the version IDs
deleted, use tar_meta_upload()
.
tar_prune_inspect
Other clean:
tar_delete()
,
tar_destroy()
,
tar_invalidate()
,
tar_prune_list()
,
tar_unversion()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() # Remove some targets from the pipeline. tar_script(list(tar_target(y1, 1 + 1)), ask = FALSE) # Keep only the remaining targets in the data store. tar_prune() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() # Remove some targets from the pipeline. tar_script(list(tar_target(y1, 1 + 1)), ask = FALSE) # Keep only the remaining targets in the data store. tar_prune() }) }
tar_prune()
will remove.List the targets that tar_prune()
will remove. Does not
actually remove any targets.
tar_prune_list( callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_prune_list( callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
See tar_prune()
for details.
If callr_function
is callr::r_bg
, the return value is
a handle to the callr
background process is returned.
Otherwise, the return value is a character vector of target names
identifying targets that tar_prune()
will remove.
tar_prune
Other clean:
tar_delete()
,
tar_destroy()
,
tar_invalidate()
,
tar_prune()
,
tar_unversion()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() # Remove some targets from the pipeline. tar_script(list(tar_target(y1, 1 + 1)), ask = FALSE) # List targets that tar_prune() will remove. tar_prune_list() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2) ) }, ask = FALSE) tar_make() # Remove some targets from the pipeline. tar_script(list(tar_target(y1, 1 + 1)), ask = FALSE) # List targets that tar_prune() will remove. tar_prune_list() }) }
Read a target's return value from its file in
_targets/objects/
. For file targets (i.e. format = "file"
)
the paths are returned.
tar_read()
expects an
unevaluated symbol for the name
argument, whereas tar_read_raw()
expects a character string.
tar_read( name, branches = NULL, meta = tar_meta(store = store), store = targets::tar_config_get("store") ) tar_read_raw( name, branches = NULL, meta = tar_meta(store = store), store = targets::tar_config_get("store") )
tar_read( name, branches = NULL, meta = tar_meta(store = store), store = targets::tar_config_get("store") ) tar_read_raw( name, branches = NULL, meta = tar_meta(store = store), store = targets::tar_config_get("store") )
name |
Name of the target to read.
|
branches |
Integer of indices of the branches to load if the target is a pattern. |
meta |
Data frame of metadata from |
store |
Character of length 1, path to the
|
The target's return value from its file in
_targets/objects/
, or the paths to the custom files and directories
if format = "file"
was set.
Some buckets in Amazon S3 or Google Cloud Storage are "versioned",
which means they track historical versions of each data object.
If you use targets
with cloud storage
(https://books.ropensci.org/targets/cloud-storage.html)
and versioning is turned on, then targets
will record each
version of each target in its metadata.
Functions like tar_read()
and tar_load()
load the version recorded in the local metadata,
which may not be the same as the "current" version of the
object in the bucket. Likewise, functions tar_delete()
and tar_destroy()
only remove
the version ID of each target as recorded in the local
metadata.
If you want to interact with the latest version of an object instead of the version ID recorded in the local metadata, then you will need to delete the object from the metadata.
Make sure your local copy of the metadata is current and
up to date. You may need to run tar_meta_download()
or
tar_meta_sync()
first.
Run tar_unversion()
to remove the recorded version IDs of
your targets in the local metadata.
With the version IDs gone from the local metadata,
functions like tar_read()
and tar_destroy()
will use the
latest version of each target data object.
Optional: to back up the local metadata file with the version IDs
deleted, use tar_meta_upload()
.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other storage:
tar_format()
,
tar_load()
,
tar_load_everything()
,
tar_objects()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, 1 + 1)) }) tar_make() tar_read(x) tar_read_raw("x") }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, 1 + 1)) }) tar_make() tar_read(x) tar_read_raw("x") }) }
renv
Write package dependencies to a script file
(by default, named _targets_packages.R
in the root project directory).
Each package is written to a separate line
as a standard library()
call (e.g. library(package)
) so
renv
can identify them automatically.
tar_renv( extras = c("bslib", "crew", "gt", "markdown", "rstudioapi", "shiny", "shinybusy", "shinyWidgets", "visNetwork"), path = "_targets_packages.R", callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script") )
tar_renv( extras = c("bslib", "crew", "gt", "markdown", "rstudioapi", "shiny", "shinybusy", "shinyWidgets", "visNetwork"), path = "_targets_packages.R", callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script") )
extras |
Character vector of additional packages to declare as project dependencies. |
path |
Character of length 1, path to the script file to
populate with |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
This function gets called for its side-effect, which writes
package dependencies to a script for compatibility with renv
.
The generated file should not be edited by hand and will be
overwritten each time tar_renv()
is called.
The behavior of renv
is to create and manage a project-local R
library
and keep a record of project dependencies in a file called renv.lock
.
To identify dependencies, renv
crawls through code to find packages
explicitly mentioned using library()
, require()
, or ::
.
However, targets
manages packages in a way that hides dependencies
from renv.
tar_renv()
finds package dependencies that would be
otherwise hidden to renv
because they are declared using the targets
API. Thus, calling tar_renv
this is only necessary if using
tar_option_set()
or tar_target()
to use specialized storage
formats or manage packages.
With the script written by tar_renv()
, renv
is able to crawl the
file to identify package dependencies (with renv::dependencies()
).
tar_renv()
only serves to make your targets
project compatible with
renv
, it is still the users responsibility to call renv::init()
and
renv::snapshot()
directly to initialize and manage a
project-local R
library. This allows your targets
pipeline to have
its own self-contained R
library separate from your standard R
library. See https://rstudio.github.io/renv/index.html for
more information.
Nothing, invisibly.
If you use renv
, then overhead from project initialization
could slow down tar_make()
and friends.
If you experience slowness, please make sure your renv
library
is on a fast file system.
(For example, slow network drives can severely reduce performance.)
In addition, you can disable the slowest renv
initialization checks.
After confirming at
https://rstudio.github.io/renv/reference/config.html
that you can safely disable these checks,
you can write lines RENV_CONFIG_RSPM_ENABLED=false
,
RENV_CONFIG_SANDBOX_ENABLED=false
,
and RENV_CONFIG_SYNCHRONIZED_CHECK=false
in your user-level .Renviron
file. If you disable the synchronization
check, remember to call renv::status()
periodically
to check the health of your renv
project library.
https://rstudio.github.io/renv/articles/renv.html
Other scripts:
tar_edit()
,
tar_github_actions()
,
tar_helper()
,
tar_script()
tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(packages = c("tibble", "qs")) list() }, ask = FALSE) tar_renv() writeLines(readLines("_targets_packages.R")) }) tar_option_reset()
tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(packages = c("tibble", "qs")) list() }, ask = FALSE) tar_renv() writeLines(readLines("_targets_packages.R")) }) tar_option_reset()
Define a custom storage repository that uses content-addressable storage (CAS).
tar_repository_cas( upload, download, exists = NULL, list = NULL, consistent = FALSE, substitute = base::list() )
tar_repository_cas( upload, download, exists = NULL, list = NULL, consistent = FALSE, substitute = base::list() )
upload |
A function with arguments To differentiate between
See the "Repository functions" section for more details. |
download |
A function with arguments Please be careful to avoid deleting the object at See the "Repository functions" section for more details. |
exists |
A function with a single argument The See the "Repository functions" section for more details. |
list |
Either The The See the "Repository functions" section for more details. |
consistent |
Logical. Set to A data storage system is said to have strong read-after-write consistency if a new object is fully available for reading as soon as the write operation finishes. Many modern cloud services like Amazon S3 and Google Cloud Storage have strong read-after-write consistency, meaning that if you upload an object with a PUT request, then a GET request immediately afterwards will retrieve the precise version of the object you just uploaded. Some storage systems do not have strong read-after-write consistency.
One example is network file systems (NFS). On a computing cluster,
if one node creates a file on an NFS, then there is a delay before
other nodes can access the new file. |
substitute |
Named list of values to be inserted into the
body of each custom function in place of symbols in the body.
For example, if
Please do not include temporary or sensitive information
such as authentication credentials.
If you do, then |
Normally, targets
organizes output data
based on target names. For example,
if a pipeline has a single target x
with default settings,
then tar_make()
saves the output data to the file
_targets/objects/x
. When the output of x
changes, tar_make()
overwrites _targets/objects/x
.
In other words, no matter how many changes happen to x
,
the data store always looks like this:
_targets/ meta/ meta objects/ x
By contrast, with content-addressable storage (CAS),
targets
organizes outputs based on the hashes of their contents.
The name of each output file is its hash, and the
metadata maps these hashes to target names. For example, suppose
target x
has repository = tar_repository_cas_local("my_cas")
.
When the output of x
changes, tar_make()
creates a new file
inside my_cas/
without overwriting or deleting any other files
in that folder. If you run tar_make()
three different times
with three different values of x
, then storage will look like this:
_targets/ meta/ meta my_cas/ 1fffeb09ad36e84a 68328d833e6361d3 798af464fb2f6b30
The next call to tar_read(x)
uses tar_meta(x)$data
to look up the current hash of x
. If tar_meta(x)$data
returns
"1fffeb09ad36e84a"
, then tar_read(x)
returns the data from
my_cas/1fffeb09ad36e84a
. Files my_cas/68328d833e6361d3
and
and my_cas/798af464fb2f6b30
are left over from previous values of x
.
Because CAS accumulates historical data objects,
it is ideal for data versioning and collaboration.
If you commit the _targets/meta/meta
file to version control
alongside the source code,
then you can revert to a previous state of your pipeline with all your
targets up to date, and a colleague can leverage your hard-won
results using a fork of your code and metadata.
The downside of CAS is the cost of accumulating many data objects over time. Most pipelines that use CAS should have a garbage collection system or retention policy to remove data objects when they no longer needed.
The tar_repository_cas()
function lets you create your own CAS system
for targets
. You can supply arbitrary custom methods to upload,
download, and check for the existence of data objects. Your custom
CAS system can exist locally on a shared file system or remotely
on the cloud (e.g. in an AWS S3 bucket).
See the "Repository functions" section and the documentation
of individual arguments for advice on how
to write your own methods.
The tar_repository_cas_local()
function has an example
CAS system based on a local folder on disk.
It uses tar_cas_u()
for uploads,
tar_cas_d()
for downloads, and
tar_cas_l()
for listing keys.
In tar_repository_cas()
, functions upload
, download
,
exists
, and keys
must be completely pure and self-sufficient.
They must load or namespace all their own packages,
and they must not depend on any custom user-defined
functions or objects in the global environment of your pipeline.
targets
converts each function to and from text,
so it must not rely on any data in the closure.
This disqualifies functions produced by Vectorize()
,
for example.
upload
and download
can assume length(path)
is 1, but they should
account for the possibility that path
could be a directory. To simply
avoid supporting directories, upload
could simply call an assertion:
targets::tar_assert_not_dir( path, msg = "This CAS upload method does not support directories." )
Otherwise, support for directories may require handling them as a
special case. For example, upload
and download
could copy
all the files in the given directory,
or they could manage the directory as a zip archive.
Some functions may need to be adapted and configured based on other
inputs. For example, you may want to define
upload = \(key, path) file.rename(path, file.path(folder, key))
but do not want to hard-code a value of folder
when you write the
underlying function. The substitute
argument handles this situation.
For example, if substitute
is list(folder = "my_folder")
,
then upload
will end up as
\(key, path) file.rename(path, file.path("my_folder", key))
.
Temporary or sensitive such as authentication credentials
should not be injected
this way into the function body. Instead, pass them as environment
variables using tar_resources_repository_cas()
.
Other content-addressable storage:
tar_repository_cas_local()
,
tar_repository_cas_local_gc()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) repository <- tar_repository_cas( upload = function(key, path) { if (dir.exists(path)) { stop("This CAS repository does not support directory outputs.") } if (!file.exists("cas")) { dir.create("cas", recursive = TRUE) } file.rename(path, file.path("cas", key)) }, download = function(key, path) { file.copy(file.path("cas", key), path) }, exists = function(key) { file.exists(file.path("cas", key)) }, list = function(keys) { keys[file.exists(file.path("cas", keys))] }, consistent = FALSE ) write_file <- function(object) { writeLines(as.character(object), "file.txt") "file.txt" } list( tar_target(x, c(2L, 4L), repository = repository), tar_target( y, x, pattern = map(x), format = "qs", repository = repository ), tar_target(z, write_file(y), format = "file", repository = repository) ) }) tar_make() tar_read(y) tar_read(z) list.files("cas") tar_meta(any_of(c("x", "z")), fields = any_of("data")) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) repository <- tar_repository_cas( upload = function(key, path) { if (dir.exists(path)) { stop("This CAS repository does not support directory outputs.") } if (!file.exists("cas")) { dir.create("cas", recursive = TRUE) } file.rename(path, file.path("cas", key)) }, download = function(key, path) { file.copy(file.path("cas", key), path) }, exists = function(key) { file.exists(file.path("cas", key)) }, list = function(keys) { keys[file.exists(file.path("cas", keys))] }, consistent = FALSE ) write_file <- function(object) { writeLines(as.character(object), "file.txt") "file.txt" } list( tar_target(x, c(2L, 4L), repository = repository), tar_target( y, x, pattern = map(x), format = "qs", repository = repository ), tar_target(z, write_file(y), format = "file", repository = repository) ) }) tar_make() tar_read(y) tar_read(z) list.files("cas") tar_meta(any_of(c("x", "z")), fields = any_of("data")) }) }
Local content-addressable storage (CAS) repository.
tar_repository_cas_local(path = NULL, consistent = FALSE)
tar_repository_cas_local(path = NULL, consistent = FALSE)
path |
Character string, file path to the CAS repository
where all the data object files will be stored. |
consistent |
Logical. Set to A data storage system is said to have strong read-after-write consistency if a new object is fully available for reading as soon as the write operation finishes. Many modern cloud services like Amazon S3 and Google Cloud Storage have strong read-after-write consistency, meaning that if you upload an object with a PUT request, then a GET request immediately afterwards will retrieve the precise version of the object you just uploaded. Some storage systems do not have strong read-after-write consistency.
One example is network file systems (NFS). On a computing cluster,
if one node creates a file on an NFS, then there is a delay before
other nodes can access the new file. |
Pass to the repository
argument of tar_target()
or
tar_option_set()
to use a local CAS system.
A character string from tar_repository_cas()
which may be
passed to the repository
argument of tar_target()
or
tar_option_set()
to use a local CAS system.
Normally, targets
organizes output data
based on target names. For example,
if a pipeline has a single target x
with default settings,
then tar_make()
saves the output data to the file
_targets/objects/x
. When the output of x
changes, tar_make()
overwrites _targets/objects/x
.
In other words, no matter how many changes happen to x
,
the data store always looks like this:
_targets/ meta/ meta objects/ x
By contrast, with content-addressable storage (CAS),
targets
organizes outputs based on the hashes of their contents.
The name of each output file is its hash, and the
metadata maps these hashes to target names. For example, suppose
target x
has repository = tar_repository_cas_local("my_cas")
.
When the output of x
changes, tar_make()
creates a new file
inside my_cas/
without overwriting or deleting any other files
in that folder. If you run tar_make()
three different times
with three different values of x
, then storage will look like this:
_targets/ meta/ meta my_cas/ 1fffeb09ad36e84a 68328d833e6361d3 798af464fb2f6b30
The next call to tar_read(x)
uses tar_meta(x)$data
to look up the current hash of x
. If tar_meta(x)$data
returns
"1fffeb09ad36e84a"
, then tar_read(x)
returns the data from
my_cas/1fffeb09ad36e84a
. Files my_cas/68328d833e6361d3
and
and my_cas/798af464fb2f6b30
are left over from previous values of x
.
Because CAS accumulates historical data objects,
it is ideal for data versioning and collaboration.
If you commit the _targets/meta/meta
file to version control
alongside the source code,
then you can revert to a previous state of your pipeline with all your
targets up to date, and a colleague can leverage your hard-won
results using a fork of your code and metadata.
The downside of CAS is the cost of accumulating many data objects over time. Most pipelines that use CAS should have a garbage collection system or retention policy to remove data objects when they no longer needed.
The tar_repository_cas()
function lets you create your own CAS system
for targets
. You can supply arbitrary custom methods to upload,
download, and check for the existence of data objects. Your custom
CAS system can exist locally on a shared file system or remotely
on the cloud (e.g. in an AWS S3 bucket).
See the "Repository functions" section and the documentation
of individual arguments for advice on how
to write your own methods.
The tar_repository_cas_local()
function has an example
CAS system based on a local folder on disk.
It uses tar_cas_u()
for uploads,
tar_cas_d()
for downloads, and
tar_cas_l()
for listing keys.
Other content-addressable storage:
tar_repository_cas()
,
tar_repository_cas_local_gc()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) repository <- tar_repository_cas_local("cas") write_file <- function(object) { writeLines(as.character(object), "file.txt") "file.txt" } list( tar_target(x, c(2L, 4L), repository = repository), tar_target( y, x, pattern = map(x), format = "qs", repository = repository ), tar_target(z, write_file(y), format = "file", repository = repository) ) }) tar_make() tar_read(y) tar_read(z) list.files("cas") tar_meta(any_of(c("x", "z")), fields = any_of("data")) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) repository <- tar_repository_cas_local("cas") write_file <- function(object) { writeLines(as.character(object), "file.txt") "file.txt" } list( tar_target(x, c(2L, 4L), repository = repository), tar_target( y, x, pattern = map(x), format = "qs", repository = repository ), tar_target(z, write_file(y), format = "file", repository = repository) ) }) tar_make() tar_read(y) tar_read(z) list.files("cas") tar_meta(any_of(c("x", "z")), fields = any_of("data")) }) }
Garbage collection for a local content-addressable storage system.
tar_repository_cas_local_gc( path = NULL, store = targets::tar_config_get("store") )
tar_repository_cas_local_gc( path = NULL, store = targets::tar_config_get("store") )
path |
Character string, file path to the CAS repository
where all the data object files will be stored. |
store |
Character of length 1, path to the
|
Deletes all the files in the local CAS which are not in
tar_meta(targets_only = TRUE)$data
, including all locally saved
historical data of the pipeline. This clears disk space, but
at the expense of removing historical data and data from
other colleagues who worked on the same project.
NULL
(invisibly). Called for its side effects.
Removes files from the CAS repository at path
.
Normally, targets
organizes output data
based on target names. For example,
if a pipeline has a single target x
with default settings,
then tar_make()
saves the output data to the file
_targets/objects/x
. When the output of x
changes, tar_make()
overwrites _targets/objects/x
.
In other words, no matter how many changes happen to x
,
the data store always looks like this:
_targets/ meta/ meta objects/ x
By contrast, with content-addressable storage (CAS),
targets
organizes outputs based on the hashes of their contents.
The name of each output file is its hash, and the
metadata maps these hashes to target names. For example, suppose
target x
has repository = tar_repository_cas_local("my_cas")
.
When the output of x
changes, tar_make()
creates a new file
inside my_cas/
without overwriting or deleting any other files
in that folder. If you run tar_make()
three different times
with three different values of x
, then storage will look like this:
_targets/ meta/ meta my_cas/ 1fffeb09ad36e84a 68328d833e6361d3 798af464fb2f6b30
The next call to tar_read(x)
uses tar_meta(x)$data
to look up the current hash of x
. If tar_meta(x)$data
returns
"1fffeb09ad36e84a"
, then tar_read(x)
returns the data from
my_cas/1fffeb09ad36e84a
. Files my_cas/68328d833e6361d3
and
and my_cas/798af464fb2f6b30
are left over from previous values of x
.
Because CAS accumulates historical data objects,
it is ideal for data versioning and collaboration.
If you commit the _targets/meta/meta
file to version control
alongside the source code,
then you can revert to a previous state of your pipeline with all your
targets up to date, and a colleague can leverage your hard-won
results using a fork of your code and metadata.
The downside of CAS is the cost of accumulating many data objects over time. Most pipelines that use CAS should have a garbage collection system or retention policy to remove data objects when they no longer needed.
The tar_repository_cas()
function lets you create your own CAS system
for targets
. You can supply arbitrary custom methods to upload,
download, and check for the existence of data objects. Your custom
CAS system can exist locally on a shared file system or remotely
on the cloud (e.g. in an AWS S3 bucket).
See the "Repository functions" section and the documentation
of individual arguments for advice on how
to write your own methods.
The tar_repository_cas_local()
function has an example
CAS system based on a local folder on disk.
It uses tar_cas_u()
for uploads,
tar_cas_d()
for downloads, and
tar_cas_l()
for listing keys.
Other content-addressable storage:
tar_repository_cas()
,
tar_repository_cas_local()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(seed = NA, repository = tar_repository_cas_local()) list(tar_target(x, sample.int(n = 9e9, size = 1))) }) for (index in seq_len(3)) tar_make(reporter = "silent") list.files("_targets/cas") tar_repository_cas_local_gc() list.files("_targets/cas") tar_meta(names = any_of("x"), fields = any_of("data")) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set(seed = NA, repository = tar_repository_cas_local()) list(tar_target(x, sample.int(n = 9e9, size = 1))) }) for (index in seq_len(3)) tar_make(reporter = "silent") list.files("_targets/cas") tar_repository_cas_local_gc() list.files("_targets/cas") tar_meta(names = any_of("x"), fields = any_of("data")) }) }
targets
with reprex
Create a reproducible example of a targets
pipeline with the reprex
package.
tar_reprex(pipeline = tar_target(example_target, 1), run = tar_make(), ...)
tar_reprex(pipeline = tar_target(example_target, 1), run = tar_make(), ...)
pipeline |
R code for the target script file |
run |
R code to inspect and run the pipeline. |
... |
Named arguments passed to |
The best way to get help with an issue is to
create a reproducible example of the problem
and post it to https://github.com/ropensci/targets/discussions
tar_reprex()
facilitates this process. It is like
reprex::reprex({targets::tar_script(...); tar_make()})
,
but more convenient.
A character vector of rendered the reprex, invisibly.
Other help:
targets-package
,
use_targets()
,
use_targets_rmd()
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_reprex( pipeline = { list( tar_target(data, data.frame(x = sample.int(1e3))), tar_target(summary, mean(data$x, na.rm = TRUE)) ) }, run = { tar_visnetwork() tar_make() } ) }
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_reprex( pipeline = { list( tar_target(data, data.frame(x = sample.int(1e3))), tar_target(summary, mean(data$x, na.rm = TRUE)) ) }, run = { tar_visnetwork() tar_make() } ) }
Create a resources
argument for tar_target()
or tar_option_set()
.
tar_resources( aws = tar_option_get("resources")$aws, clustermq = tar_option_get("resources")$clustermq, crew = tar_option_get("resources")$crew, custom_format = tar_option_get("resources")$custom_format, feather = tar_option_get("resources")$feather, fst = tar_option_get("resources")$fst, future = tar_option_get("resources")$future, gcp = tar_option_get("resources")$gcp, network = tar_option_get("resources")$network, parquet = tar_option_get("resources")$parquet, qs = tar_option_get("resources")$qs, repository_cas = tar_option_get("resources")$repository_cas, url = tar_option_get("resources")$url )
tar_resources( aws = tar_option_get("resources")$aws, clustermq = tar_option_get("resources")$clustermq, crew = tar_option_get("resources")$crew, custom_format = tar_option_get("resources")$custom_format, feather = tar_option_get("resources")$feather, fst = tar_option_get("resources")$fst, future = tar_option_get("resources")$future, gcp = tar_option_get("resources")$gcp, network = tar_option_get("resources")$network, parquet = tar_option_get("resources")$parquet, qs = tar_option_get("resources")$qs, repository_cas = tar_option_get("resources")$repository_cas, url = tar_option_get("resources")$url )
aws |
Output of function |
clustermq |
Output of function |
crew |
Output of function |
custom_format |
Output of function |
feather |
Output of function |
fst |
Output of function |
future |
Output of function |
gcp |
Output of function |
network |
Output of function |
parquet |
Output of function |
qs |
Output of function |
repository_cas |
Output of function |
url |
Output of function |
A list of objects of class "tar_resources"
with
non-default settings of various optional backends for data storage
and high-performance computing.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "qs", resources = tar_resources( qs = tar_resources_qs(preset = "fast"), future = tar_resources_future(resources = list(n_cores = 1)) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "qs", resources = tar_resources( qs = tar_resources_qs(preset = "fast"), future = tar_resources_future(resources = list(n_cores = 1)) ) )
Create the aws
argument of tar_resources()
to specify optional settings to AWS for
tar_target(..., repository = "aws")
.
See the format
argument of tar_target()
for details.
tar_resources_aws( bucket = targets::tar_option_get("resources")$aws$bucket, prefix = targets::tar_option_get("resources")$aws$prefix, region = targets::tar_option_get("resources")$aws$region, endpoint = targets::tar_option_get("resources")$aws$endpoint, s3_force_path_style = targets::tar_option_get("resources")$aws$s3_force_path_style, part_size = targets::tar_option_get("resources")$aws$part_size, page_size = targets::tar_option_get("resources")$aws$page_size, max_tries = targets::tar_option_get("resources")$aws$max_tries, seconds_timeout = targets::tar_option_get("resources")$aws$seconds_timeout, close_connection = targets::tar_option_get("resources")$aws$close_connection, verbose = targets::tar_option_get("resources")$aws$verbose, ... )
tar_resources_aws( bucket = targets::tar_option_get("resources")$aws$bucket, prefix = targets::tar_option_get("resources")$aws$prefix, region = targets::tar_option_get("resources")$aws$region, endpoint = targets::tar_option_get("resources")$aws$endpoint, s3_force_path_style = targets::tar_option_get("resources")$aws$s3_force_path_style, part_size = targets::tar_option_get("resources")$aws$part_size, page_size = targets::tar_option_get("resources")$aws$page_size, max_tries = targets::tar_option_get("resources")$aws$max_tries, seconds_timeout = targets::tar_option_get("resources")$aws$seconds_timeout, close_connection = targets::tar_option_get("resources")$aws$close_connection, verbose = targets::tar_option_get("resources")$aws$verbose, ... )
bucket |
Character of length 1, name of an existing bucket to upload and download the return values of the affected targets during the pipeline. |
prefix |
Character of length 1, "directory path"
in the bucket where your target object and metadata will go.
Please supply an explicit prefix
unique to your |
region |
Character of length 1, AWS region containing the S3 bucket.
Set to |
endpoint |
Character of length 1, URL endpoint for S3 storage.
Defaults to the Amazon AWS endpoint if |
s3_force_path_style |
Logical of length 1, whether to use path-style addressing for S3 requests. |
part_size |
Positive numeric of length 1, number of bytes
for each part of a multipart upload. (Except the last part,
which is the remainder.) In a multipart upload, each part
must be at least 5 MB. The default value of the |
page_size |
Positive integer of length 1, number of items in each page for paginated HTTP requests such as listing objects. |
max_tries |
Positive integer of length 1, maximum number of attempts to access a network resource on AWS. |
seconds_timeout |
Positive numeric of length 1, number of seconds until an HTTP connection times out. |
close_connection |
Logical of length 1, whether to close HTTP connections immediately. |
verbose |
Logical of length 1, whether to print console messages when running computationally expensive operations such as listing objects in a large bucket. |
... |
Named arguments to functions in
|
See the cloud storage section of https://books.ropensci.org/targets/data.html for details for instructions.
Object of class "tar_resources_aws"
, to be supplied
to the aws
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_target( name, command(), format = "qs", repository = "aws", resources = tar_resources( aws = tar_resources_aws( bucket = "yourbucketname", prefix = "_targets" ), qs = tar_resources_qs(preset = "fast"), ) ) }
# Somewhere in you target script file (usually _targets.R): if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_target( name, command(), format = "qs", repository = "aws", resources = tar_resources( aws = tar_resources_aws( bucket = "yourbucketname", prefix = "_targets" ), qs = tar_resources_qs(preset = "fast"), ) ) }
clustermq
high-performance computingCreate the clustermq
argument of tar_resources()
to specify optional high-performance computing settings
for tar_make_clustermq()
.
For details, see the documentation of the clustermq
R package
and the corresponding argument names in this help file.
tar_resources_clustermq( template = targets::tar_option_get("resources")$clustermq$template )
tar_resources_clustermq( template = targets::tar_option_get("resources")$clustermq$template )
template |
Named list, |
clustermq
workers are persistent,
so there is not a one-to-one correspondence between workers and targets.
The clustermq
resources apply to the workers, not the targets.
So the correct way to assign clustermq
resources is through
tar_option_set()
, not tar_target()
. clustermq
resources
in individual tar_target()
calls will be ignored.
Object of class "tar_resources_clustermq"
, to be supplied
to the clustermq
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), resources = tar_resources( clustermq = tar_resources_clustermq(template = list(n_cores = 2)) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), resources = tar_resources( clustermq = tar_resources_clustermq(template = list(n_cores = 2)) ) )
crew
high-performance computingCreate the crew
argument of tar_resources()
to specify optional target settings.
tar_resources_crew( controller = targets::tar_option_get("resources")$crew$controller, scale = NULL, seconds_timeout = targets::tar_option_get("resources")$crew$seconds_timeout )
tar_resources_crew( controller = targets::tar_option_get("resources")$crew$controller, scale = NULL, seconds_timeout = targets::tar_option_get("resources")$crew$seconds_timeout )
controller |
Character of length 1.
If |
scale |
Deprecated in version 1.3.0.9002 (2023-10-02). No longer necessary. |
seconds_timeout |
Positive numeric of length 1,
optional task timeout passed to the |
tar_resources_crew()
accepts
target-specific settings for integration with the
crew
R package. These settings are arguments to the push()
method of the controller or controller group
object which control things like
auto-scaling behavior and the controller to use in the case
of a controller group.
Object of class "tar_resources_crew"
, to be supplied
to the crew
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), resources = tar_resources( crew = tar_resources_crew(seconds_timeout = 5) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), resources = tar_resources( crew = tar_resources_crew(seconds_timeout = 5) ) )
Create the custom_format
argument of tar_resources()
to specify optional target settings for custom storage formats.
tar_resources_custom_format( envvars = targets::tar_option_get("resources")$custom_format$envvars )
tar_resources_custom_format( envvars = targets::tar_option_get("resources")$custom_format$envvars )
envvars |
Named character vector of environment variables.
These environment variables are temporarily set just before each call to
the storage methods you define in |
tar_resources_custom_format()
accepts
target-specific settings to customize tar_format()
storage formats.
Object of class "tar_resources_custom_format"
, to be supplied
to the custom_format
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name = target_name, command = data.frame(x = 1), format = tar_format( read = function(path) { readRDS(file = path) }, write = function(object, path) { version <- as.integer(Sys.getenv("SERIALIZATION", unset = "2")) saveRDS(object = object, file = path, version = version) } ), resources = tar_resources( custom_format = tar_resources_custom_format( envvars = c(SERIALIZATION = "3") ) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name = target_name, command = data.frame(x = 1), format = tar_format( read = function(path) { readRDS(file = path) }, write = function(object, path) { version <- as.integer(Sys.getenv("SERIALIZATION", unset = "2")) saveRDS(object = object, file = path, version = version) } ), resources = tar_resources( custom_format = tar_resources_custom_format( envvars = c(SERIALIZATION = "3") ) ) )
Create the feather argument of tar_resources()
to specify optional settings for feather data frame storage formats
powered by the arrow
R package.
See the format
argument of tar_target()
for details.
tar_resources_feather( compression = targets::tar_option_get("resources")$feather$compression, compression_level = targets::tar_option_get("resources")$feather$compression_level )
tar_resources_feather( compression = targets::tar_option_get("resources")$feather$compression, compression_level = targets::tar_option_get("resources")$feather$compression_level )
compression |
Character of length 1, |
compression_level |
Numeric of length 1, |
Object of class "tar_resources_feather"
, to be supplied
to the feather argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "feather", resources = tar_resources( feather = tar_resources_feather(compression = "lz4") ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "feather", resources = tar_resources( feather = tar_resources_feather(compression = "lz4") ) )
fst
storage formatsCreate the fst
argument of tar_resources()
to specify optional settings for big data frame storage formats
powered by the fst
R package.
See the format
argument of tar_target()
for details.
tar_resources_fst(compress = targets::tar_option_get("resources")$fst$compress)
tar_resources_fst(compress = targets::tar_option_get("resources")$fst$compress)
compress |
Numeric of length 1, |
Object of class "tar_resources_fst"
, to be supplied
to the fst
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "fst_tbl", resources = tar_resources( fst = tar_resources_fst(compress = 100) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "fst_tbl", resources = tar_resources( fst = tar_resources_fst(compress = 100) ) )
future
high-performance computingCreate the future
argument of tar_resources()
to specify optional high-performance computing settings
for tar_make_future()
.
This is how to supply the resources
argument of future::future()
for targets
.
Resources supplied through
future::plan()
and future::tweak()
are completely ignored.
For details, see the documentation of the future
R package
and the corresponding argument names in this help file.
tar_resources_future( plan = NULL, resources = targets::tar_option_get("resources")$future$resources )
tar_resources_future( plan = NULL, resources = targets::tar_option_get("resources")$future$resources )
plan |
A |
resources |
Named list, |
Object of class "tar_resources_future"
, to be supplied
to the future
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), resources = tar_resources( future = tar_resources_future(resources = list(n_cores = 2)) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), resources = tar_resources( future = tar_resources_future(resources = list(n_cores = 2)) ) )
Create the gcp
argument of tar_resources()
to specify optional settings for Google Cloud Storage for
targets with tar_target(..., repository = "gcp")
.
See the format
argument of tar_target()
for details.
tar_resources_gcp( bucket = targets::tar_option_get("resources")$gcp$bucket, prefix = targets::tar_option_get("resources")$gcp$prefix, predefined_acl = targets::tar_option_get("resources")$gcp$predefined_acl, max_tries = targets::tar_option_get("resources")$gcp$max_tries, verbose = targets::tar_option_get("resources")$gcp$verbose )
tar_resources_gcp( bucket = targets::tar_option_get("resources")$gcp$bucket, prefix = targets::tar_option_get("resources")$gcp$prefix, predefined_acl = targets::tar_option_get("resources")$gcp$predefined_acl, max_tries = targets::tar_option_get("resources")$gcp$max_tries, verbose = targets::tar_option_get("resources")$gcp$verbose )
bucket |
Character of length 1, name of an existing bucket to upload and download the return values of the affected targets during the pipeline. |
prefix |
Character of length 1, "directory path"
in the bucket where your target object and metadata will go.
Please supply an explicit prefix
unique to your |
predefined_acl |
Character of length 1, user access
to the object. See |
max_tries |
Positive integer of length 1, number of tries accessing a network resource on GCP. |
verbose |
Logical of length 1, whether to print
extra messages like progress bars during uploads
and downloads. Defaults to |
See the cloud storage section of https://books.ropensci.org/targets/data.html for details for instructions.
Object of class "tar_resources_gcp"
, to be supplied
to the gcp
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "qs", repository = "gcp", resources = tar_resources( gcp = tar_resources_gcp( bucket = "yourbucketname", prefix = "_targets" ), qs = tar_resources_qs(preset = "fast"), ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "qs", repository = "gcp", resources = tar_resources( gcp = tar_resources_gcp( bucket = "yourbucketname", prefix = "_targets" ), qs = tar_resources_qs(preset = "fast"), ) )
In high-performance computing on network file systems,
if storage = "worker"
in tar_target()
or tar_option_set()
, then
targets
waits for hashes to synchronize before continuing the pipeline.
These resources control the retry mechanism.
tar_resources_network( max_tries = targets::tar_option_get("resources")$network$max_tries, seconds_interval = targets::tar_option_get("resources")$network$seconds_interval, seconds_timeout = targets::tar_option_get("resources")$network$seconds_timeout, verbose = targets::tar_option_get("resources")$network$verbose )
tar_resources_network( max_tries = targets::tar_option_get("resources")$network$max_tries, seconds_interval = targets::tar_option_get("resources")$network$seconds_interval, seconds_timeout = targets::tar_option_get("resources")$network$seconds_timeout, verbose = targets::tar_option_get("resources")$network$verbose )
max_tries |
Positive integer of length 1. Max number of tries. |
seconds_interval |
Positive numeric of length 1, seconds between retries. |
seconds_timeout |
Positive numeric of length 1. Timeout length in seconds. |
verbose |
Logical of length 1, whether to print informative console messages. |
Object of class "tar_resources_network"
, to be supplied
to the network argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # Somewhere in you target script file (usually _targets.R): tar_target( name = your_name, command = your_command(), storage = "worker", resources = tar_resources( network = tar_resources_network(max_tries = 3) ) ) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # Somewhere in you target script file (usually _targets.R): tar_target( name = your_name, command = your_command(), storage = "worker", resources = tar_resources( network = tar_resources_network(max_tries = 3) ) ) }
Create the parquet
argument of tar_resources()
to specify optional settings for parquet data frame storage formats
powered by the arrow
R package.
See the format
argument of tar_target()
for details.
tar_resources_parquet( compression = targets::tar_option_get("resources")$parquet$compression, compression_level = targets::tar_option_get("resources")$parquet$compression_level )
tar_resources_parquet( compression = targets::tar_option_get("resources")$parquet$compression, compression_level = targets::tar_option_get("resources")$parquet$compression_level )
compression |
Character of length 1, |
compression_level |
Numeric of length 1, |
Object of class "tar_resources_parquet"
, to be supplied
to the parquet argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_qs()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "parquet", resources = tar_resources( parquet = tar_resources_parquet(compression = "lz4") ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "parquet", resources = tar_resources( parquet = tar_resources_parquet(compression = "lz4") ) )
Create the qs
argument of tar_resources()
to specify optional settings for big data storage formats
powered by the qs
R package.
See the format
argument of tar_target()
for details.
tar_resources_qs(preset = targets::tar_option_get("resources")$qs$preset)
tar_resources_qs(preset = targets::tar_option_get("resources")$qs$preset)
preset |
Character of length 1, |
Object of class "tar_resources_qs"
, to be supplied
to the qs argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_repository_cas()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "qs", resources = tar_resources( qs = tar_resources_qs(preset = "fast") ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "qs", resources = tar_resources( qs = tar_resources_qs(preset = "fast") ) )
Create the repository_cas
argument of tar_resources()
to specify optional target settings for custom storage formats.
tar_resources_repository_cas( envvars = targets::tar_option_get("resources")$repository_cas$envvars )
tar_resources_repository_cas( envvars = targets::tar_option_get("resources")$repository_cas$envvars )
envvars |
Named character vector of environment variables.
These environment variables are temporarily set just before each call to
the storage methods you define in |
tar_resources_repository_cas()
accepts
target-specific settings to customize tar_repository_cas()
storage
repositories.
Object of class "tar_resources_repository_cas"
, to be supplied
to the repository_cas
argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_url()
# Somewhere in you target script file (usually _targets.R): tar_target( name = target_name, command = data.frame(x = 1), repository = tar_repository_cas( upload = function(key, path) { if (dir.exists(path)) { stop("This CAS repository does not support directory outputs.") } if (!file.exists("cas")) { dir.create("cas", recursive = TRUE) } file.copy(path, file.path("cas", key)) }, download = function(key, path) { file.copy(file.path("cas", key), path) }, exists = function(key) { file.exists(file.path("cas", key)) } ), resources = tar_resources( repository_cas = tar_resources_repository_cas( envvars = c(AUTHENTICATION_CREDENTIALS = "...") ) ) )
# Somewhere in you target script file (usually _targets.R): tar_target( name = target_name, command = data.frame(x = 1), repository = tar_repository_cas( upload = function(key, path) { if (dir.exists(path)) { stop("This CAS repository does not support directory outputs.") } if (!file.exists("cas")) { dir.create("cas", recursive = TRUE) } file.copy(path, file.path("cas", key)) }, download = function(key, path) { file.copy(file.path("cas", key), path) }, exists = function(key) { file.exists(file.path("cas", key)) } ), resources = tar_resources( repository_cas = tar_resources_repository_cas( envvars = c(AUTHENTICATION_CREDENTIALS = "...") ) ) )
Create the url
argument of tar_resources()
to specify optional settings for URL storage formats.
See the format
argument of tar_target()
for details.
tar_resources_url( handle = targets::tar_option_get("resources")$url$handle, max_tries = targets::tar_option_get("resources")$url$max_tries, seconds_interval = targets::tar_option_get("resources")$url$seconds_interval, seconds_timeout = targets::tar_option_get("resources")$url$seconds_interval )
tar_resources_url( handle = targets::tar_option_get("resources")$url$handle, max_tries = targets::tar_option_get("resources")$url$max_tries, seconds_interval = targets::tar_option_get("resources")$url$seconds_interval, seconds_timeout = targets::tar_option_get("resources")$url$seconds_interval )
handle |
Object returned by |
max_tries |
Positive integer of length 1, maximum number of tries to access a URL. |
seconds_interval |
Nonnegative numeric of length 1,
number of seconds to wait between individual retries
while attempting to connect to the URL.
Use |
seconds_timeout |
Nonnegative numeric of length 1,
number of seconds to wait before timing out while trying to
connect to the URL.
Use |
Object of class "tar_resources_url"
, to be supplied
to the url argument of tar_resources()
.
Functions tar_target()
and tar_option_set()
each takes an optional resources
argument to supply
non-default settings of various optional backends for data storage
and high-performance computing. The tar_resources()
function
is a helper to supply those settings in the correct manner.
In targets
version 0.12.2 and above, resources are inherited one-by-one
in nested fashion from tar_option_get("resources")
.
For example, suppose you set
tar_option_set(resources = tar_resources(aws = my_aws))
,
where my_aws
equals tar_resources_aws(bucket = "x", prefix = "y")
.
Then, tar_target(data, get_data()
will have bucket "x"
and
prefix "y"
. In addition, if new_resources
equals
tar_resources(aws = tar_resources_aws(bucket = "z")))
, then
tar_target(data, get_data(), resources = new_resources)
will use the new bucket "z"
, but it will still use the prefix "y"
supplied through tar_option_set()
. (In targets
0.12.1 and below,
options like prefix
do not carry over from tar_option_set()
if you
supply non-default resources to tar_target()
.)
Other resources:
tar_resources()
,
tar_resources_aws()
,
tar_resources_clustermq()
,
tar_resources_crew()
,
tar_resources_custom_format()
,
tar_resources_feather()
,
tar_resources_fst()
,
tar_resources_future()
,
tar_resources_gcp()
,
tar_resources_network()
,
tar_resources_parquet()
,
tar_resources_qs()
,
tar_resources_repository_cas()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "url", resources = tar_resources( url = tar_resources_url(handle = curl::new_handle()) ) ) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN # Somewhere in you target script file (usually _targets.R): tar_target( name, command(), format = "url", resources = tar_resources( url = tar_resources_url(handle = curl::new_handle()) ) ) }
The tar_script()
function is a convenient
way to create the required target script file (default: _targets.R
)
in the current working directory.
It always overwrites the existing target script,
and it requires you to be in the working directory
where you intend to write the file, so be careful.
See the "Target script" section for details.
tar_script( code = NULL, library_targets = TRUE, ask = NULL, script = targets::tar_config_get("script") )
tar_script( code = NULL, library_targets = TRUE, ask = NULL, script = targets::tar_config_get("script") )
code |
R code to write to the target script file.
If |
library_targets |
logical, whether to write a |
ask |
Logical, whether to ask before writing if the
target script file
already exists. If |
script |
Character of length 1, where to write
the target script file. Defaults to |
NULL
(invisibly).
Every targets
project requires a target script file.
The target script file is usually a file called _targets.R
Functions tar_make()
and friends look for the target script
and run it to set up the pipeline just prior to the main task.
Every target script file should run the following
steps in the order below:
Package: load the targets
package. This step is automatically
inserted at the top of the target script file produced by
tar_script()
if library_targets
is TRUE
,
so you do not need to explicitly include it in code
.
Globals: load custom functions and global objects into memory.
Usually, this section is a bunch of calls to source()
that run
scripts defining user-defined functions. These functions support
the R commands of the targets.
Options: call tar_option_set()
to set defaults for targets-specific
settings such as the names of required packages. Even if you have no
specific options to set, it is still recommended to call
tar_option_set()
in order to register the proper environment.
Targets: define one or more target objects using tar_target()
.
Pipeline: call list()
to bring the targets from (3)
together in a pipeline object. Every target script file must return
a pipeline object, which usually means ending with a call to
list()
. In practice, (3) and (4) can be combined together
in the same function call.
Other scripts:
tar_edit()
,
tar_github_actions()
,
tar_helper()
,
tar_renv()
tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script() # Writes an example target script file. # Writes a user-defined target script: tar_script({ library(targets) library(tarchetypes) x <- tar_target(x, 1 + 1) tar_option_set() list(x) }, ask = FALSE) writeLines(readLines("_targets.R")) })
tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script() # Writes an example target script file. # Writes a user-defined target script: tar_script({ library(targets) library(tarchetypes) x <- tar_target(x, 1 + 1) tar_option_set() list(x) }, ask = FALSE) writeLines(readLines("_targets.R")) })
Create a seed for a target.
tar_seed_create(name, global_seed = NULL)
tar_seed_create(name, global_seed = NULL)
name |
Character of length 1, target name. |
global_seed |
Integer of length 1, the overarching global
pipeline seed which governs the seeds of all the targets.
Set to |
Integer of length 1, the target seed.
A target's random number generator seed
is a deterministic function of its name and the global pipeline seed
from tar_option_get("seed")
. Consequently,
1. Each target runs with a reproducible seed so that different runs of the same pipeline in the same computing environment produce identical results. 2. No two targets in the same pipeline share the same seed. Even dynamic branches have different names and thus different seeds.
You can retrieve the seed of a completed target
with tar_meta(your_target, seed)
and run tar_seed_set()
on the result to locally
recreate the target's initial RNG state. tar_workspace()
does this automatically as part of recovering a workspace.
In theory, there is a risk that the pseudo-random number generator
streams of different targets will overlap and produce statistically
correlated results. (For a discussion of the motivating problem,
see the Section 6: "Random-number generation" in the parallel
package vignette: vignette(topic = "parallel", package = "parallel")
.)
However, this risk is extremely small in practice, as shown by
L'Ecuyer et al. (2017) doi:10.1016/j.matcom.2016.05.005
under "A single RNG with a 'random' seed for each stream" (Section 4:
under "How to produce parallel streams and substreams").
targets
and tarchetypes
take the approach discussed in the
aforementioned section of the paper using the
secretbase
package by Charlie Gao (2024) doi:10.5281/zenodo.10553140.
To generate the 32-bit integer seed
argument of set.seed()
for each target, secretbase
generates a cryptographic hash using the
SHAKE256 extendable output function (XOF). secretbase
uses algorithms
from the Mbed TLS
C library.
Gao C (2024). secretbase
: Cryptographic Hash and
Extendable-Output Functions. R package version 0.1.0,
doi:10.5281/zenodo.10553140.
Pierre L'Ecuyer, David Munger, Boris Oreshkin, and Richard Simard (2017). Random numbers for parallel computers: Requirements and methods, with emphasis on GPUs. Mathematics and Computers in Simulation, 135, 3-17. doi:10.1016/j.matcom.2016.05.005.
Other pseudo-random number generation:
tar_seed_get()
,
tar_seed_set()
Get the random number generator seed of the target currently running.
tar_seed_get(default = 1L)
tar_seed_get(default = 1L)
default |
Integer, value to return if |
Integer of length 1. If invoked inside a targets
pipeline,
the return value is the seed of the target currently running,
which is a deterministic function of the target name. Otherwise,
the return value is default
.
A target's random number generator seed
is a deterministic function of its name and the global pipeline seed
from tar_option_get("seed")
. Consequently,
1. Each target runs with a reproducible seed so that different runs of the same pipeline in the same computing environment produce identical results. 2. No two targets in the same pipeline share the same seed. Even dynamic branches have different names and thus different seeds.
You can retrieve the seed of a completed target
with tar_meta(your_target, seed)
and run tar_seed_set()
on the result to locally
recreate the target's initial RNG state. tar_workspace()
does this automatically as part of recovering a workspace.
In theory, there is a risk that the pseudo-random number generator
streams of different targets will overlap and produce statistically
correlated results. (For a discussion of the motivating problem,
see the Section 6: "Random-number generation" in the parallel
package vignette: vignette(topic = "parallel", package = "parallel")
.)
However, this risk is extremely small in practice, as shown by
L'Ecuyer et al. (2017) doi:10.1016/j.matcom.2016.05.005
under "A single RNG with a 'random' seed for each stream" (Section 4:
under "How to produce parallel streams and substreams").
targets
and tarchetypes
take the approach discussed in the
aforementioned section of the paper using the
secretbase
package by Charlie Gao (2024) doi:10.5281/zenodo.10553140.
To generate the 32-bit integer seed
argument of set.seed()
for each target, secretbase
generates a cryptographic hash using the
SHAKE256 extendable output function (XOF). secretbase
uses algorithms
from the Mbed TLS
C library.
Gao C (2024). secretbase
: Cryptographic Hash and
Extendable-Output Functions. R package version 0.1.0,
doi:10.5281/zenodo.10553140.
Pierre L'Ecuyer, David Munger, Boris Oreshkin, and Richard Simard (2017). Random numbers for parallel computers: Requirements and methods, with emphasis on GPUs. Mathematics and Computers in Simulation, 135, 3-17. doi:10.1016/j.matcom.2016.05.005.
Other pseudo-random number generation:
tar_seed_create()
,
tar_seed_set()
tar_seed_get() tar_seed_get(default = 123L) if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(returns_seed, tar_seed_get()), ask = FALSE) tar_make() tar_read(returns_seed) }) }
tar_seed_get() tar_seed_get(default = 123L) if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(tar_target(returns_seed, tar_seed_get()), ask = FALSE) tar_make() tar_read(returns_seed) }) }
targets
generates its own target-specific seeds
using tar_seed_create()
. Use tar_seed_set()
to set one of
these seeds in R.
tar_seed_set(seed)
tar_seed_set(seed)
seed |
Integer of length 1, value of the seed to set
with |
tar_seed_set()
gives the user-supplied seed
to
set.seed()
and sets arguments kind = "default"
,
normal.kind = "default"
, and sample.kind = "default"
.
NULL
(invisibly).
A target's random number generator seed
is a deterministic function of its name and the global pipeline seed
from tar_option_get("seed")
. Consequently,
1. Each target runs with a reproducible seed so that different runs of the same pipeline in the same computing environment produce identical results. 2. No two targets in the same pipeline share the same seed. Even dynamic branches have different names and thus different seeds.
You can retrieve the seed of a completed target
with tar_meta(your_target, seed)
and run tar_seed_set()
on the result to locally
recreate the target's initial RNG state. tar_workspace()
does this automatically as part of recovering a workspace.
In theory, there is a risk that the pseudo-random number generator
streams of different targets will overlap and produce statistically
correlated results. (For a discussion of the motivating problem,
see the Section 6: "Random-number generation" in the parallel
package vignette: vignette(topic = "parallel", package = "parallel")
.)
However, this risk is extremely small in practice, as shown by
L'Ecuyer et al. (2017) doi:10.1016/j.matcom.2016.05.005
under "A single RNG with a 'random' seed for each stream" (Section 4:
under "How to produce parallel streams and substreams").
targets
and tarchetypes
take the approach discussed in the
aforementioned section of the paper using the
secretbase
package by Charlie Gao (2024) doi:10.5281/zenodo.10553140.
To generate the 32-bit integer seed
argument of set.seed()
for each target, secretbase
generates a cryptographic hash using the
SHAKE256 extendable output function (XOF). secretbase
uses algorithms
from the Mbed TLS
C library.
Gao C (2024). secretbase
: Cryptographic Hash and
Extendable-Output Functions. R package version 0.1.0,
doi:10.5281/zenodo.10553140.
Pierre L'Ecuyer, David Munger, Boris Oreshkin, and Richard Simard (2017). Random numbers for parallel computers: Requirements and methods, with emphasis on GPUs. Mathematics and Computers in Simulation, 135, 3-17. doi:10.1016/j.matcom.2016.05.005.
Other pseudo-random number generation:
tar_seed_create()
,
tar_seed_get()
seed <- tar_seed_create("target_name") seed sample(10) tar_seed_set(seed) sample(10) tar_seed_set(seed) sample(10)
seed <- tar_seed_create("target_name") seed sample(10) tar_seed_set(seed) sample(10) tar_seed_set(seed) sample(10)
For each target, report which cues are activated.
Except for the never
cue, the target will rerun in tar_make()
if any cue is activated. The target is suppressed if the never
cue is TRUE
. See tar_cue()
for details.
tar_sitrep( names = NULL, fields = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_outdated"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_sitrep( names = NULL, fields = NULL, shortcut = targets::tar_config_get("shortcut"), reporter = targets::tar_config_get("reporter_outdated"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function, reporter), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
names |
Optional, names of the targets. If supplied, |
fields |
Optional, names of columns/fields to select. If supplied,
|
shortcut |
Logical of length 1, how to interpret the |
reporter |
Character of length 1, name of the reporter to user. Controls how messages are printed as targets are checked. Choices:
|
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
Caveats:
tar_cue()
allows you to change/suppress cues, so the return
value will depend on the settings you supply to tar_cue()
.
If a pattern tries to branches over a target that does not exist in storage, then the branches are omitted from the output.
tar_sitrep()
is myopic. It only considers what happens to the
immediate target and its immediate upstream dependencies,
and it makes no attempt to propagate invalidation downstream.
A data frame with one row per target/object and one column
per cue. Each element is a logical to indicate whether the cue
is activated for the target.
See the field
argument in this help file for details.
Other inspect:
tar_deps()
,
tar_manifest()
,
tar_network()
,
tar_outdated()
,
tar_validate()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_sitrep() tar_meta(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_sitrep() tar_meta(starts_with("y_")) # see also any_of() }) }
List targets whose progress is "skipped"
.
tar_skipped(names = NULL, store = targets::tar_config_get("store"))
tar_skipped(names = NULL, store = targets::tar_config_get("store"))
names |
Optional, names of the targets. If supplied, the
output is restricted to the selected targets.
The object supplied to |
store |
Character of length 1, path to the
|
A character vector of skipped targets.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_watch()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_skipped() tar_skipped(starts_with("y_")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ list( tar_target(x, seq_len(2)), tar_target(y, 2 * x, pattern = map(x)) ) }, ask = FALSE) tar_make() tar_skipped() tar_skipped(starts_with("y_")) # see also any_of() }) }
Run all the R scripts in a directory in the environment specified.
tar_source( files = "R", envir = targets::tar_option_get("envir"), change_directory = FALSE )
tar_source( files = "R", envir = targets::tar_option_get("envir"), change_directory = FALSE )
files |
Character vector of file and directory paths to look for R scripts to run. Paths must either be absolute paths or must be relative to the current working directory just before the function call. |
envir |
Environment to run the scripts. Defaults to
|
change_directory |
Logical, whether to temporarily change the working directory to the directory of each R script before running it. |
tar_source()
is a convenient way to load R scripts
in _targets.R
to make custom functions available to the pipeline.
tar_source()
recursively looks for files ending
in .R
or .r
, and it runs each with
eval(parse(text = readLines(script_file, warn = FALSE)), envir)
.
NULL
(invisibly)
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other utilities:
tar_active()
,
tar_backoff()
,
tar_call()
,
tar_cancel()
,
tar_definition()
,
tar_described_as()
,
tar_envir()
,
tar_format_get()
,
tar_group()
,
tar_name()
,
tar_path()
,
tar_path_script()
,
tar_path_script_support()
,
tar_path_store()
,
tar_path_target()
,
tar_store()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. # Running in tar_dir(), these files are written in tempdir(). dir.create("R") writeLines("f <- function(x) x + 1", file.path("R", "functions.R")) tar_script({ tar_source() list(tar_target(x, f(1))) }) tar_make() tar_read(x) # 2 }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. # Running in tar_dir(), these files are written in tempdir(). dir.create("R") writeLines("f <- function(x) x + 1", file.path("R", "functions.R")) tar_script({ tar_source() list(tar_target(x, f(1))) }) tar_make() tar_read(x) # 2 }) }
A target is a single step of computation in a pipeline. It runs an R command and returns a value. This value gets treated as an R object that can be used by the commands of targets downstream. Targets that are already up to date are skipped. See the user manual for more details.
tar_target()
defines a target using non-standard evaluation.
The name
argument is an unevaluated symbol,
and the command
and pattern
arguments are unevaluated expressions. Example:
tar_target(name = data, command = get_data())
.
tar_target_raw()
defines a target with standard evaluation.
The name
argument is a character string,
and the command
and pattern
arguments are evaluated expressions. Example:
tar_target_raw(name = "data", command = quote(get_data()))
.
tar_target_raw()
also has extra arguments deps
and string
for advanced customization.
tar_target( name, command, pattern = NULL, tidy_eval = targets::tar_option_get("tidy_eval"), packages = targets::tar_option_get("packages"), library = targets::tar_option_get("library"), format = targets::tar_option_get("format"), repository = targets::tar_option_get("repository"), iteration = targets::tar_option_get("iteration"), error = targets::tar_option_get("error"), memory = targets::tar_option_get("memory"), garbage_collection = isTRUE(targets::tar_option_get("garbage_collection")), deployment = targets::tar_option_get("deployment"), priority = targets::tar_option_get("priority"), resources = targets::tar_option_get("resources"), storage = targets::tar_option_get("storage"), retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description") ) tar_target_raw( name, command, pattern = NULL, packages = targets::tar_option_get("packages"), library = targets::tar_option_get("library"), deps = NULL, string = NULL, format = targets::tar_option_get("format"), repository = targets::tar_option_get("repository"), iteration = targets::tar_option_get("iteration"), error = targets::tar_option_get("error"), memory = targets::tar_option_get("memory"), garbage_collection = isTRUE(targets::tar_option_get("garbage_collection")), deployment = targets::tar_option_get("deployment"), priority = targets::tar_option_get("priority"), resources = targets::tar_option_get("resources"), storage = targets::tar_option_get("storage"), retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description") )
tar_target( name, command, pattern = NULL, tidy_eval = targets::tar_option_get("tidy_eval"), packages = targets::tar_option_get("packages"), library = targets::tar_option_get("library"), format = targets::tar_option_get("format"), repository = targets::tar_option_get("repository"), iteration = targets::tar_option_get("iteration"), error = targets::tar_option_get("error"), memory = targets::tar_option_get("memory"), garbage_collection = isTRUE(targets::tar_option_get("garbage_collection")), deployment = targets::tar_option_get("deployment"), priority = targets::tar_option_get("priority"), resources = targets::tar_option_get("resources"), storage = targets::tar_option_get("storage"), retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description") ) tar_target_raw( name, command, pattern = NULL, packages = targets::tar_option_get("packages"), library = targets::tar_option_get("library"), deps = NULL, string = NULL, format = targets::tar_option_get("format"), repository = targets::tar_option_get("repository"), iteration = targets::tar_option_get("iteration"), error = targets::tar_option_get("error"), memory = targets::tar_option_get("memory"), garbage_collection = isTRUE(targets::tar_option_get("garbage_collection")), deployment = targets::tar_option_get("deployment"), priority = targets::tar_option_get("priority"), resources = targets::tar_option_get("resources"), storage = targets::tar_option_get("storage"), retrieval = targets::tar_option_get("retrieval"), cue = targets::tar_option_get("cue"), description = targets::tar_option_get("description") )
name |
Symbol, name of the target.
In A target name must be a valid name for a symbol in R, and it
must not start with a dot. Subsequent targets
can refer to this name symbolically to induce a dependency relationship:
e.g. |
command |
R code to run the target.
In |
pattern |
Code to define a dynamic branching branching for a target.
In To demonstrate dynamic branching patterns, suppose we have
a pipeline with numeric vector targets |
tidy_eval |
Logical, whether to enable tidy evaluation
when interpreting |
packages |
Character vector of packages to load right before
the target runs or the output data is reloaded for
downstream targets. Use |
library |
Character vector of library paths to try
when loading |
format |
Optional storage format for the target's return value.
With the exception of |
repository |
Character of length 1, remote repository for target storage. Choices:
Note: if |
iteration |
Character of length 1, name of the iteration mode of the target. Choices:
|
error |
Character of length 1, what to do if the target stops and throws an error. Options:
|
memory |
Character of length 1, memory strategy.
If |
garbage_collection |
Logical: |
deployment |
Character of length 1. If |
priority |
Numeric of length 1 between 0 and 1. Controls which
targets get deployed first when multiple competing targets are ready
simultaneously. Targets with priorities closer to 1 get dispatched earlier
(and polled earlier in |
resources |
Object returned by |
storage |
Character of length 1, only relevant to
|
retrieval |
Character of length 1, only relevant to
|
cue |
An optional object from |
description |
Character of length 1, a custom free-form human-readable
text description of the target. Descriptions appear as target labels
in functions like |
deps |
Optional character vector of the adjacent upstream
dependencies of the target, including targets and global objects.
If |
string |
Optional string representation of the command.
Internally, the string gets hashed to check if the command changed
since last run, which helps |
A target object. Users should not modify these directly,
just feed them to list()
in your target script file
(default: _targets.R
).
Functions like tar_target()
produce target objects,
special objects with specialized sets of S3 classes.
Target objects represent skippable steps of the analysis pipeline
as described at https://books.ropensci.org/targets/.
Please read the walkthrough at
https://books.ropensci.org/targets/walkthrough.html
to understand the role of target objects in analysis pipelines.
For developers, https://wlandau.github.io/targetopia/contributing.html#target-factories explains target factories (functions like this one which generate targets) and the design specification at https://books.ropensci.org/targets-design/ details the structure and composition of target objects.
targets
has several built-in storage formats to control how return
values are saved and loaded from disk:
"rds"
: Default, uses saveRDS()
and readRDS()
. Should work for
most objects, but slow.
"auto"
: either "file"
or "qs"
, depending on the return value
of the target. If the return value is a character vector of
existing files (and/or directories), then the format becomes
"file"
before tar_make()
saves the target. Otherwise,
the format becomes "qs"
.
"qs"
: Uses qs::qsave()
and qs::qread()
. Should work for
most objects, much faster than "rds"
. Optionally set the
preset for qsave()
through tar_resources()
and tar_resources_qs()
.
"feather"
: Uses arrow::write_feather()
and
arrow::read_feather()
(version 2.0). Much faster than "rds"
,
but the value must be a data frame. Optionally set
compression
and compression_level
in arrow::write_feather()
through tar_resources()
and tar_resources_feather()
.
Requires the arrow
package (not installed by default).
"parquet"
: Uses arrow::write_parquet()
and
arrow::read_parquet()
(version 2.0). Much faster than "rds"
,
but the value must be a data frame. Optionally set
compression
and compression_level
in arrow::write_parquet()
through tar_resources()
and tar_resources_parquet()
.
Requires the arrow
package (not installed by default).
"fst"
: Uses fst::write_fst()
and fst::read_fst()
.
Much faster than "rds"
, but the value must be
a data frame. Optionally set the compression level for
fst::write_fst()
through tar_resources()
and tar_resources_fst()
.
Requires the fst
package (not installed by default).
"fst_dt"
: Same as "fst"
, but the value is a data.table
.
Deep copies are made as appropriate in order to protect
against the global effects of in-place modification.
Optionally set the compression level the same way as for "fst"
.
"fst_tbl"
: Same as "fst"
, but the value is a tibble
.
Optionally set the compression level the same way as for "fst"
.
"keras"
: superseded by tar_format()
and incompatible
with error = "null"
(in tar_target()
or tar_option_set()
).
Uses keras::save_model_hdf5()
and
keras::load_model_hdf5()
. The value must be a Keras model.
Requires the keras
package (not installed by default).
"torch"
: superseded by tar_format()
and incompatible
with error = "null"
(in tar_target()
or tar_option_set()
).
Uses torch::torch_save()
and torch::torch_load()
.
The value must be an object from the torch
package
such as a tensor or neural network module.
Requires the torch
package (not installed by default).
"file"
: A dynamic file. To use this format,
the target needs to manually identify or save some data
and return a character vector of paths
to the data (must be a single file path if repository
is not "local"
). (These paths must be existing files
and nonempty directories.)
Then, targets
automatically checks those files and cues
the appropriate run/skip decisions if those files are out of date.
Those paths must point to files or directories,
and they must not contain characters |
or *
.
All the files and directories you return must actually exist,
or else targets
will throw an error. (And if storage
is "worker"
,
targets
will first stall out trying to wait for the file
to arrive over a network file system.)
If the target does not create any files, the return value should be
character(0)
.
If repository
is not "local"
and format
is "file"
,
then the character vector returned by the target must be of length 1
and point to a single file. (Directories and vectors of multiple
file paths are not supported for dynamic files on the cloud.)
That output file is uploaded to the cloud and tracked for changes
where it exists in the cloud. The local file is deleted after
the target runs.
"url"
: A dynamic input URL. For this storage format,
repository
is implicitly "local"
,
URL format is like format = "file"
except the return value of the target is a URL that already exists
and serves as input data for downstream targets. Optionally
supply a custom curl
handle through
tar_resources()
and tar_resources_url()
.
in new_handle()
, nobody = TRUE
is important because it
ensures targets
just downloads the metadata instead of
the entire data file when it checks time stamps and hashes.
The data file at the URL needs to have an ETag or a Last-Modified
time stamp, or else the target will throw an error because
it cannot track the data. Also, use extreme caution when
trying to use format = "url"
to track uploads. You must be absolutely
certain the ETag and Last-Modified time stamp are fully updated
and available by the time the target's command finishes running.
targets
makes no attempt to wait for the web server.
A custom format can be supplied with tar_format()
. For this choice,
it is the user's responsibility to provide methods for (un)serialization
and (un)marshaling the return value of the target.
The formats starting with "aws_"
are deprecated as of 2022-03-13
(targets
version > 0.10.0). For cloud storage integration, use the
repository
argument instead.
Formats "rds"
, "file"
, and "url"
are general-purpose formats
that belong in the targets
package itself.
Going forward, any additional formats should be implemented with
tar_format()
in third-party packages like tarchetypes
and geotargets
(for example: tarchetypes::tar_format_nanoparquet()
).
Formats "qs"
, "fst"
, etc. are legacy formats from before the
existence of tar_format()
, and they will continue to remain in
targets
without deprecation.
Other targets:
tar_cue()
# Defining targets does not run them. data <- tar_target(target_name, get_data(), packages = "tidyverse") analysis <- tar_target(analysis, analyze(x), pattern = map(x)) # In a pipeline: if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(name = x, command = 1 + 1), tar_target_raw(name = "y", command = quote(x + y)) ) }) tar_make() tar_read(x) }) # Tidy evaluation tar_option_set(envir = environment()) n_rows <- 30L data <- tar_target(target_name, get_data(!!n_rows)) print(data) # Disable tidy evaluation: data <- tar_target(target_name, get_data(!!n_rows), tidy_eval = FALSE) print(data) tar_option_reset() }
# Defining targets does not run them. data <- tar_target(target_name, get_data(), packages = "tidyverse") analysis <- tar_target(analysis, analyze(x), pattern = map(x)) # In a pipeline: if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list( tar_target(name = x, command = 1 + 1), tar_target_raw(name = "y", command = quote(x + y)) ) }) tar_make() tar_read(x) }) # Tidy evaluation tar_option_set(envir = environment()) n_rows <- 30L data <- tar_target(target_name, get_data(!!n_rows)) print(data) # Disable tidy evaluation: data <- tar_target(target_name, get_data(!!n_rows), tidy_eval = FALSE) print(data) tar_option_reset() }
Runs a test_that()
unit test inside a temporary
directory to avoid writing to the user's file space.
This helps ensure compliance with CRAN policies.
Also isolates tar_option_set()
options and environment variables specific to targets
and skips the test on Solaris.
Useful for writing tests for
targetopia packages
(extensions to targets
tailored to specific use cases).
tar_test(label, code)
tar_test(label, code)
label |
Character of length 1, label for the test. |
code |
User-defined code for the test. |
NULL
(invisibly).
Other utilities to extend targets:
tar_assert
,
tar_condition
,
tar_language
tar_test("example test", { testing_variable_cafecfcb <- "only defined inside tar_test()" file.create("only_exists_in_tar_test") }) exists("testing_variable_cafecfcb") file.exists("only_exists_in_tar_test")
tar_test("example test", { testing_variable_cafecfcb <- "only defined inside tar_test()" file.create("only_exists_in_tar_test") }) exists("testing_variable_cafecfcb") file.exists("only_exists_in_tar_test")
Get the timestamp associated with a target's
last successful run.
tar_timestamp()
expects the name
argument to be an unevaluated
symbol, whereas tar_timestamp_raw()
expects name
to be a character string.
tar_timestamp( name = NULL, format = NULL, tz = NULL, parse = NULL, store = targets::tar_config_get("store") ) tar_timestamp_raw( name = NULL, format = NULL, tz = NULL, parse = NULL, store = targets::tar_config_get("store") )
tar_timestamp( name = NULL, format = NULL, tz = NULL, parse = NULL, store = targets::tar_config_get("store") ) tar_timestamp_raw( name = NULL, format = NULL, tz = NULL, parse = NULL, store = targets::tar_config_get("store") )
name |
Name of the target. If
|
format |
Deprecated in |
tz |
Deprecated in |
parse |
Deprecated in |
store |
Character string, directory path to the data store of the pipeline. |
tar_timestamp()
checks the metadata in _targets/meta/meta
,
not the actual returned data of the target.
The timestamp depends on the storage format of the target.
If storage is local, e.g. formats like "rds"
and "file"
,
then the time stamp is the latest modification time
of the target data files at the time the target
last successfully ran. For non-local storage as with
repository = "aws"
and format = "url"
, targets
chooses instead
to simply record the time the target last successfully ran.
If the target is not recorded in the metadata
or cannot be parsed correctly, then
tar_timestamp()
returns a POSIXct
object at 1970-01-01 UTC
.
Other time:
tar_newer()
,
tar_older()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, 1)) }, ask = FALSE) tar_make() # Get the timestamp. tar_timestamp(x) # We can use the timestamp to cancel the target # if it already ran within the last hour. # Be sure to set `cue = tar_cue(mode = "always")` # if you want the target to always check the timestamp. tar_script({ list( tar_target( x, tar_cancel((Sys.time() - tar_timestamp()) < 3600), cue = tar_cue(mode = "always") ) )}, ask = FALSE) tar_make() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) list(tar_target(x, 1)) }, ask = FALSE) tar_make() # Get the timestamp. tar_timestamp(x) # We can use the timestamp to cancel the target # if it already ran within the last hour. # Be sure to set `cue = tar_cue(mode = "always")` # if you want the target to always check the timestamp. tar_script({ list( tar_target( x, tar_cancel((Sys.time() - tar_timestamp()) < 3600), cue = tar_cue(mode = "always") ) )}, ask = FALSE) tar_make() }) }
Run one piece of code if Target Markdown mode interactive mode is turned on and another piece of code otherwise.
tar_toggle(interactive, noninteractive)
tar_toggle(interactive, noninteractive)
interactive |
R code to run if Target Markdown interactive mode is activated. |
noninteractive |
R code to run if Target Markdown interactive mode is not activated. |
Visit <books.ropensci.org/targets/literate-programming.html> to learn about Target Markdown and interactive mode.
If Target Markdown interactive mode is not turned on,
the function returns the result of running the code.
Otherwise, the function invisibly returns NULL
.
Other Target Markdown:
tar_engine_knitr()
,
tar_interactive()
,
tar_noninteractive()
tar_toggle( message("In interactive mode."), message("Not in interactive mode.") )
tar_toggle( message("In interactive mode."), message("Not in interactive mode.") )
Return the saved traceback of a target.
Assumes the target errored out in a previous run of the pipeline
with workspaces enabled for that target.
See tar_workspace()
for details.
tar_traceback( name, envir = NULL, packages = NULL, source = NULL, characters = NULL, store = targets::tar_config_get("store") )
tar_traceback( name, envir = NULL, packages = NULL, source = NULL, characters = NULL, store = targets::tar_config_get("store") )
name |
Symbol, name of the target whose workspace to read. |
envir |
Deprecated in |
packages |
Logical, whether to load the required packages of the target. |
source |
Logical, whether to run the target script file
(default: |
characters |
Deprecated in |
store |
Character of length 1, path to the
|
Character vector, the traceback of a failed target if it exists.
Other debug:
tar_load_globals()
,
tar_workspace()
,
tar_workspaces()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tmp <- sample(1) tar_script({ library(targets) library(tarchetypes) tar_option_set(workspace_on_error = TRUE) list( tar_target(x, "loaded"), tar_target(y, stop(x)) ) }, ask = FALSE) try(tar_make()) tar_traceback(y, characters = 60) }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tmp <- sample(1) tar_script({ library(targets) library(tarchetypes) tar_option_set(workspace_on_error = TRUE) list( tar_target(x, "loaded"), tar_target(y, stop(x)) ) }, ask = FALSE) try(tar_make()) tar_traceback(y, characters = 60) }) }
Remove target script helper files (default: _targets_r/
)
that were created by Target Markdown.
tar_unscript(script = targets::tar_config_get("script"))
tar_unscript(script = targets::tar_config_get("script"))
script |
Character of length 1, path to the
target script file. Defaults to |
Target Markdown code chunks create R scripts in a folder
called _targets_r/
in order to aid the automatically supplied
_targets.R
file. Over time, the number of script files
starts to build up, and targets
has no way of automatically
removing helper script files that are no longer necessary.
To keep your pipeline up to date
with the code chunks in the Target Markdown document(s),
it is good practice to call tar_unscript()
at the beginning
of your first Target Markdown document. That way,
extraneous/discarded targets are automatically
removed from the pipeline when the document starts render.
If the target script is at some alternative path,
e.g. custom/script.R
, the helper scripts are in custom/script_r/
.
tar_unscript()
works on the helper scripts as long as your
project configuration settings correctly identify the correct
target script.
NULL
(invisibly).
tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_unscript() })
tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_unscript() })
Delete version IDs from local metadata.
tar_unversion( names = tidyselect::everything(), store = targets::tar_config_get("store") )
tar_unversion( names = tidyselect::everything(), store = targets::tar_config_get("store") )
names |
Tidyselect expression to identify the targets to drop
version IDs.
The object supplied to |
store |
Character of length 1, path to the
|
NULL
(invisibly).
Some buckets in Amazon S3 or Google Cloud Storage are "versioned",
which means they track historical versions of each data object.
If you use targets
with cloud storage
(https://books.ropensci.org/targets/cloud-storage.html)
and versioning is turned on, then targets
will record each
version of each target in its metadata.
Functions like tar_read()
and tar_load()
load the version recorded in the local metadata,
which may not be the same as the "current" version of the
object in the bucket. Likewise, functions tar_delete()
and tar_destroy()
only remove
the version ID of each target as recorded in the local
metadata.
If you want to interact with the latest version of an object instead of the version ID recorded in the local metadata, then you will need to delete the object from the metadata.
Make sure your local copy of the metadata is current and
up to date. You may need to run tar_meta_download()
or
tar_meta_sync()
first.
Run tar_unversion()
to remove the recorded version IDs of
your targets in the local metadata.
With the version IDs gone from the local metadata,
functions like tar_read()
and tar_destroy()
will use the
latest version of each target data object.
Optional: to back up the local metadata file with the version IDs
deleted, use tar_meta_upload()
.
Other clean:
tar_delete()
,
tar_destroy()
,
tar_invalidate()
,
tar_prune()
,
tar_prune_list()
Inspect the pipeline for issues and throw an error or warning if a problem is detected.
tar_validate( callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_validate( callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
NULL
except if callr_function = callr::r_bg()
, in which case
a handle to the callr
background process is returned. Either way,
the value is invisibly returned.
Other inspect:
tar_deps()
,
tar_manifest()
,
tar_network()
,
tar_outdated()
,
tar_sitrep()
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1)), ask = FALSE) tar_validate() }) }
if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script(list(tar_target(x, 1 + 1)), ask = FALSE) tar_validate() }) }
Visualize the pipeline dependency graph with a visNetwork
HTML widget.
tar_visnetwork( targets_only = FALSE, names = NULL, shortcut = FALSE, allow = NULL, exclude = ".Random.seed", outdated = TRUE, label = targets::tar_config_get("label"), label_width = targets::tar_config_get("label_width"), level_separation = targets::tar_config_get("level_separation"), degree_from = 1L, degree_to = 1L, zoom_speed = 1, physics = FALSE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
tar_visnetwork( targets_only = FALSE, names = NULL, shortcut = FALSE, allow = NULL, exclude = ".Random.seed", outdated = TRUE, label = targets::tar_config_get("label"), label_width = targets::tar_config_get("label_width"), level_separation = targets::tar_config_get("level_separation"), degree_from = 1L, degree_to = 1L, zoom_speed = 1, physics = FALSE, reporter = targets::tar_config_get("reporter_outdated"), seconds_reporter = targets::tar_config_get("seconds_reporter"), callr_function = callr::r, callr_arguments = targets::tar_callr_args_default(callr_function), envir = parent.frame(), script = targets::tar_config_get("script"), store = targets::tar_config_get("store") )
targets_only |
Logical, whether to restrict the output to just targets
( |
names |
Names of targets. The graph visualization will operate
only on these targets (and unless |
shortcut |
Logical of length 1, how to interpret the |
allow |
Optional, define the set of allowable vertices in the graph.
Unlike |
exclude |
Optional, define the set of exclude vertices from the graph.
Unlike |
outdated |
Logical, whether to show colors to distinguish outdated
targets from up-to-date targets. (Global functions and objects
still show these colors.) Looking for outdated targets
takes a lot of time for large pipelines with lots of branches,
and setting |
label |
Character vector of one or more aesthetics to add to the
vertex labels. Can contain |
label_width |
Positive numeric of length 1, maximum width (in number of characters) of the node labels. |
level_separation |
Numeric of length 1,
|
degree_from |
Integer of length 1. When you click on a node,
the graph highlights a neighborhood of that node. |
degree_to |
Integer of length 1. When you click on a node,
the graph highlights a neighborhood of that node. |
zoom_speed |
Positive numeric of length 1, scaling factor on the zoom speed. Above 1 zooms faster than default, below 1 zooms lower than default. |
physics |
Logical of length 1, whether to implement interactive physics in the graph, e.g. edge elasticity. |
reporter |
Character of length 1, name of the reporter to user. Controls how messages are printed as targets are checked. Choices:
|
seconds_reporter |
Positive numeric of length 1 with the minimum number of seconds between times when the reporter prints progress messages to the R console. |
callr_function |
A function from |
callr_arguments |
A list of arguments to |
envir |
An environment, where to run the target R script
(default: The |
script |
Character of length 1, path to the
target script file. Defaults to |
store |
Character of length 1, path to the
|
A visNetwork
HTML widget object.
The dependency graph of a pipeline is a directed acyclic graph (DAG)
where each node indicates a target or global object and each directed
edge indicates where a downstream node depends on an upstream node.
The DAG is not always a tree, but it never contains a cycle because
no target is allowed to directly or indirectly depend on itself.
The dependency graph should show a natural progression of work from
left to right. targets
uses static code analysis to create the graph,
so the order of tar_target()
calls in the _targets.R
file
does not matter. However, targets does not support self-referential
loops or other cycles. For more information on the dependency graph,
please read
https://books.ropensci.org/targets/targets.html#dependencies.
Several functions like tar_make()
, tar_read()
, tar_load()
,
tar_meta()
, and tar_progress()
read or modify
the local data store of the pipeline.
The local data store is in flux while a pipeline is running,
and depending on how distributed computing or cloud computing is set up,
not all targets can even reach it. So please do not call these
functions from inside a target as part of a running
pipeline. The only exception is literate programming
target factories in the tarchetypes
package such as tar_render()
and tar_quarto()
.
Other visualize:
tar_glimpse()
,
tar_mermaid()
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2, description = "sum of two other sums") ) }) tar_visnetwork() tar_visnetwork(allow = starts_with("y")) # see also any_of() }) }
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) tar_option_set() list( tar_target(y1, 1 + 1), tar_target(y2, 1 + 1), tar_target(z, y1 + y2, description = "sum of two other sums") ) }) tar_visnetwork() tar_visnetwork(allow = starts_with("y")) # see also any_of() }) }
Launches a background process with a Shiny app
that calls tar_visnetwork()
every few seconds.
To embed this app in other apps, use the Shiny module
in tar_watch_ui()
and tar_watch_server()
.
tar_watch( seconds = 10, seconds_min = 1, seconds_max = 60, seconds_step = 1, targets_only = FALSE, exclude = ".Random.seed", outdated = FALSE, label = NULL, level_separation = 150, degree_from = 1L, degree_to = 1L, config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main"), height = "650px", display = "summary", displays = c("summary", "branches", "progress", "graph", "about"), background = TRUE, browse = TRUE, host = getOption("shiny.host", "127.0.0.1"), port = getOption("shiny.port", targets::tar_random_port()), verbose = TRUE, supervise = TRUE, poll_connection = TRUE, stdout = "|", stderr = "|", title = "", theme = bslib::bs_theme(), spinner = TRUE )
tar_watch( seconds = 10, seconds_min = 1, seconds_max = 60, seconds_step = 1, targets_only = FALSE, exclude = ".Random.seed", outdated = FALSE, label = NULL, level_separation = 150, degree_from = 1L, degree_to = 1L, config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main"), height = "650px", display = "summary", displays = c("summary", "branches", "progress", "graph", "about"), background = TRUE, browse = TRUE, host = getOption("shiny.host", "127.0.0.1"), port = getOption("shiny.port", targets::tar_random_port()), verbose = TRUE, supervise = TRUE, poll_connection = TRUE, stdout = "|", stderr = "|", title = "", theme = bslib::bs_theme(), spinner = TRUE )
seconds |
Numeric of length 1, default number of seconds between refreshes of the graph. Can be changed in the app controls. |
seconds_min |
Numeric of length 1, lower bound of |
seconds_max |
Numeric of length 1, upper bound of |
seconds_step |
Numeric of length 1, step size of |
targets_only |
Logical, whether to restrict the output to just targets
( |
exclude |
Character vector of nodes to omit from the graph. |
outdated |
Logical, whether to show colors to distinguish outdated
targets from up-to-date targets. (Global functions and objects
still show these colors.) Looking for outdated targets
takes a lot of time for large pipelines with lots of branches,
and setting |
label |
Label argument to |
level_separation |
Numeric of length 1,
|
degree_from |
Integer of length 1. When you click on a node,
the graph highlights a neighborhood of that node. |
degree_to |
Integer of length 1. When you click on a node,
the graph highlights a neighborhood of that node. |
config |
Character of length 1, file path of the YAML
configuration file with |
project |
Character of length 1, name of the current
|
height |
Character of length 1,
height of the |
display |
Character of length 1, which display to show first. |
displays |
Character vector of choices for the display.
Elements can be any of
|
background |
Logical, whether to run the app in a background process so you can still use the R console while the app is running. |
browse |
Whether to open the app in a browser when the app is ready.
Only relevant if |
host |
Character of length 1, IPv4 address to listen on.
Only relevant if |
port |
Positive integer of length 1, TCP port to listen on.
Only relevant if |
verbose |
whether to print a spinner and informative messages.
Only relevant if |
supervise |
Whether to register the process with a supervisor. If |
poll_connection |
Whether to have a control connection to the process. This is used to transmit messages from the subprocess to the main process. |
stdout |
The name of the file the standard output of
the child R process will be written to.
If the child process runs with the |
stderr |
The name of the file the standard error of
the child R process will be written to.
In particular |
title |
Character of length 1, title of the UI. |
theme |
A call to |
spinner |
|
The controls of the app are in the left panel.
The seconds
control is the number of seconds between
refreshes of the graph, and the other settings match
the arguments of tar_visnetwork()
.
A handle to callr::r_bg()
background process running the app.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch_server()
,
tar_watch_ui()
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) sleep_run <- function(...) { Sys.sleep(10) } list( tar_target(settings, sleep_run()), tar_target(data1, sleep_run(settings)), tar_target(data2, sleep_run(settings)) ) }, ask = FALSE) # Launch the app in a background process. tar_watch(seconds = 10, outdated = FALSE, targets_only = TRUE) # Run the pipeline. tar_make() }) }
if (identical(Sys.getenv("TAR_INTERACTIVE_EXAMPLES"), "true")) { tar_dir({ # tar_dir() runs code from a temp dir for CRAN. tar_script({ library(targets) library(tarchetypes) sleep_run <- function(...) { Sys.sleep(10) } list( tar_target(settings, sleep_run()), tar_target(data1, sleep_run(settings)), tar_target(data2, sleep_run(settings)) ) }, ask = FALSE) # Launch the app in a background process. tar_watch(seconds = 10, outdated = FALSE, targets_only = TRUE) # Run the pipeline. tar_make() }) }
Use tar_watch_ui()
and tar_watch_server()
to include tar_watch()
as a Shiny module in an app.
tar_watch_server( id, height = "650px", exclude = ".Random.seed", config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
tar_watch_server( id, height = "650px", exclude = ".Random.seed", config = Sys.getenv("TAR_CONFIG", "_targets.yaml"), project = Sys.getenv("TAR_PROJECT", "main") )
id |
Character of length 1, ID corresponding to the UI function of the module. |
height |
Character of length 1,
height of the |
exclude |
Character vector of nodes to omit from the graph. |
config |
Character of length 1, file path of the YAML
configuration file with |
project |
Character of length 1, name of the current
|
A Shiny module server.
Other progress:
tar_canceled()
,
tar_completed()
,
tar_dispatched()
,
tar_errored()
,
tar_poll()
,
tar_progress()
,
tar_progress_branches()
,
tar_progress_summary()
,
tar_skipped()
,
tar_watch()
,
tar_watch_ui()
Use tar_watch_ui()
and tar_watch_server()
to include tar_watch()
as a Shiny module in an app.
tar_watch_ui( id, label = "tar_watch_label", seconds = 10, seconds_min = 1, seconds_max = 60, seconds_step = 1, targets_only = FALSE, outdated = FALSE, label_tar_visnetwork = NULL, level_separation = 150, degree_from = 1L, degree_to = 1L, height = "650px", display = "summary", displays = c("summary", "branches", "progress", "graph", "about"), title = "", theme = bslib::bs_theme(), spinner = FALSE )
tar_watch_ui( id, label = "tar_watch_label", seconds = 10, seconds_min = 1, seconds_max = 60, seconds_step = 1, targets_only = FALSE, outdated = FALSE, label_tar_visnetwork = NULL, level_separation = 150, degree_from = 1L, degree_to = 1L, height = "650px", display = "summary", displays = c("summary", "branches", "progress", "graph", "about"), title = "",<