Format the occurrence dataset for usage in itsdm
Source:R/format_observation.R
format_observation.Rd
The focus of this function is to format the dataset but to keep the dataset as original as possible. Then the users can modify the data if they want before put it into this function.
Usage
format_observation(
obs_df,
eval_df = NULL,
split_perc = 0.3,
seed = 123,
obs_crs = 4326,
eval_crs = 4326,
x_col = "x",
y_col = "y",
obs_col = "observation",
obs_type = "presence_only"
)
Arguments
- obs_df
(
data.frame
). Thedata.frame
style table that include x and y coordinate and observation of training dataset. This parameter is required as it is the training dataset. Note: it only takesdata.frame
to reduce the risk of column name mismatch betweendata.frame
and other formats such astibble
.- eval_df
(
data.frame
orNULL
) Thedata.frame
style table that include x and y coordinate and observation of evaluation dataset. Note: it only takesdata.frame
to reduce the risk of column name mismatch betweendata.frame
and other formats such astibble
.- split_perc
(
numeric
) anumeric
between 0 and 1 corresponding to the percentage of data used to evaluate the models. Only required ifeval_df
isNULL
.- seed
(
integer
) The seed to split train and evaluation set. The default value is123
. Only required ifeval_df
isNULL
.- obs_crs
(
integer
,numeric
,character
, orcrs
) The EPSG code, CRS string, orsf::crs
object of the coordinate system of the training dataset. It corresponds tox_col
andy_col
inobs_df
.- eval_crs
(
integer
,numeric
,character
, orcrs
) The EPSG code, CRS string, orsf::crs
object of the coordinate system of the evaluation dataset. Only required ifeval_df
is notNULL
. It corresponds tox_col
andy_col
ineval_df
if any.- x_col
(
character
) The name of column that is x coordinate inobs_df
andeval_df
if notNULL
.- y_col
(
character
) The name of column that is y coordinate inobs_df
andeval_df
if notNULL
.- obs_col
(
character
) The name of column that represents observations inobs_df
andeval_df
if notNULL
.- obs_type
(
character
) The type of observation to be formatted to. Only can be one ofc("presence_only", "presence_absence")
. Note that if "presence_only" is set, the absences inobs_df
will be deleted. This only affectobs_df
,eval_df
will keep the original type no matter it is an independent one or is split fromeval_df
.
Value
(FormatOccurrence
) A list of
obs (
sf
) the formatted pts of observations. The column of observation is "observation".obs_type (
character
) the type of the observations, presence_only or presence_absence.has_eval (
logical
) whether evaluation dataset is set or generated.eval (
sf
) the formatted pts of observations for evaluation if any. The column of observation is "observation".eval (
eval_type
) the type of the observations for evaluation, presence_only or presence_absence.
Examples
library(dplyr)
library(itsdm)
data("occ_virtual_species")
# obs + eval, presence-absence
obs_df <- occ_virtual_species %>% filter(usage == "train")
eval_df <- occ_virtual_species %>% filter(usage == "eval")
x_col <- "x"
y_col <- "y"
obs_col <- "observation"
obs_type <- "presence_absence"
obs <- format_observation(
obs_df = obs_df, eval_df = eval_df,
x_col = x_col, y_col = y_col, obs_col = obs_col,
obs_type = obs_type)
# obs + eval, presence-only
obs_df <- occ_virtual_species %>% filter(usage == "train")
eval_df <- occ_virtual_species %>% filter(usage == "eval")
x_col <- "x"
y_col <- "y"
obs_col <- "observation"
obs_type <- "presence_only"
obs <- format_observation(
obs_df = obs_df, eval_df = eval_df,
x_col = x_col, y_col = y_col, obs_col = obs_col,
obs_type = obs_type)
# obs + eval, different crs, presence-only
obs_df <- occ_virtual_species %>% filter(usage == "train")
eval_df <- occ_virtual_species %>% filter(usage == "eval")
obs_crs <- 4326
# Fake one
eval_crs <- 20935
x_col <- "x"
y_col <- "y"
obs_col <- "observation"
obs_type <- "presence_only"
obs <- format_observation(
obs_df = obs_df, eval_df = eval_df,
obs_crs = obs_crs, eval_crs = eval_crs,
x_col = x_col, y_col = y_col, obs_col = obs_col,
obs_type = obs_type)
# obs + split, presence-absence
obs_df <- occ_virtual_species
split_perc <- 0.5
seed <- 123
obs_crs <- 4326
x_col <- "x"
y_col <- "y"
obs_col <- "observation"
obs_type <- "presence_absence"
obs <- format_observation(
obs_df = obs_df, split_perc = split_perc,
x_col = x_col, y_col = y_col,
obs_col = obs_col, obs_type = obs_type)
# obs, presence-only, no eval
obs_df <- occ_virtual_species
eval_df <- NULL
split_perc <- 0
seed <- 123
obs_crs <- 4326
x_col <- "x"
y_col <- "y"
obs_col <- "observation"
obs_type <- "presence_only"
obs <- format_observation(
obs_df = obs_df, eval_df = eval_df,
split_perc = split_perc,
x_col = x_col, y_col = y_col,
obs_col = obs_col, obs_type = obs_type)