DataManager

`DataManager`

Provide functionality for handling data processings and workflows.

Source code in pySWATPlus/data_manager.py

class DataManager:

    '''
    Provide functionality for handling data processings and workflows.
    '''

    def simulated_timeseries_df(
        self,
        sim_file: str | pathlib.Path,
        has_units: bool,
        begin_date: typing.Optional[str] = None,
        end_date: typing.Optional[str] = None,
        ref_day: typing.Optional[int] = None,
        ref_month: typing.Optional[int] = None,
        apply_filter: typing.Optional[dict[str, list[typing.Any]]] = None,
        usecols: typing.Optional[list[str]] = None,
        json_file: typing.Optional[str | pathlib.Path] = None
    ) -> pandas.DataFrame:
        '''
        Extract data from a simulation output file and return a time series `DataFrame`.
        A new `date` column is constructed using `datetime.date` objects from the `yr`, `mon`, and `day` columns.

        Args:
            sim_file (str | pathlib.Path): Path to the input file containing time series data generated by
                the method [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).
                The file must contain `yr`, `mon`, and `day` columns.

            has_units (bool): If `True`, the third line of the input file contains column units.

            begin_date (str): Start date in `DD-Mon-YYYY` format (e.g., '01-Jan-2012'), inclusive.
                If `None` (default), the earliest available date is used.

            end_date (str): End date in `DD-Mon-YYYY` format (e.g., '31-Dec-2015'), inclusive.
                If `None` (default), the latest available date is used.

            ref_day (int): Reference day for monthly and yearly time series after filtering by `begin_date` and `end_date`.
                For example, `2012-01-31` and `2012-02-29` become `2012-01-15` and `2012-02-15` when `ref_day=15`.
                If `None` (default), the last day of the month or year is used, obtained from simulation.
                Not applicable to daily time series files (ending with `_day`).

            ref_month (int): Reference month for yearly time series after filtering by `begin_date` and `end_date`. For example,
                `2012-12-31` and `2013-12-31` become `2012-06-15` and `2013-06-15` when `ref_day=15` and `ref_month=6`.
                If `None` (default), the last month of the year is used, obtained from simulation.
                Not applicable to monthly time series files (ending with `_mon`).

            apply_filter (dict[str, list[Any]]): Dictionary mapping column names to lists of values for row filtering.
                If `None` (default), no filtering is applied.

            usecols (list[str]): Column names to include in the output. If `None` (default), all columns are used.

            json_file (str | pathlib.Path): Path to save the output `DataFrame` as a JSON file.
                If `None` (default), the DataFrame is not saved.

        Returns:
            Time series `DataFrame` with a new `date` column.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.simulated_timeseries_df
            ),
            vars_values=locals()
        )

        # Absolute file path
        sim_file = pathlib.Path(sim_file).resolve()

        # DataFrame from input file
        skiprows = [0, 2] if has_units else [0]
        df = utils._df_extract(
            input_file=sim_file,
            skiprows=skiprows
        )

        # DataFrame columns
        df_cols = list(df.columns)

        # Create date column
        date_col = 'date'
        time_cols = ['yr', 'mon', 'day']
        missing_cols = [
            col for col in time_cols if col not in df_cols
        ]
        if len(missing_cols) > 0:
            raise ValueError(
                f'Missing required time series columns "{missing_cols}" in file "{sim_file.name}"'
            )
        df[date_col] = pandas.to_datetime(
            df[time_cols].rename(columns={'yr': 'year', 'mon': 'month'})
        ).dt.date

        # Filter DataFrame by date
        begin_dt = utils._date_str_to_object(begin_date) if begin_date is not None else df[date_col].iloc[0]
        end_dt = utils._date_str_to_object(end_date) if end_date is not None else df[date_col].iloc[-1]
        df = df.loc[df[date_col].between(begin_dt, end_dt)].reset_index(drop=True)

        # Fix reference day
        if ref_day is not None:
            if sim_file.stem.endswith(('_day', '_subday')):
                raise ValueError(
                    f'Parameter "ref_day" is not applicable for daily or sub-daily time series in file "{sim_file.name}" '
                    f'because it would assign the same day to all records within a month.'
                )
            df[date_col] = df[date_col].apply(
                lambda x: x.replace(day=ref_day)
            )

        # Fix reference month
        if ref_month is not None:
            if sim_file.stem.endswith('_mon'):
                raise ValueError(
                    f'Parameter "ref_month" is not applicable for monthly time series in file "{sim_file.name}" '
                    f'because it would assign the same month to all records within a year.'
                )
            df[date_col] = df[date_col].apply(
                lambda x: x.replace(month=ref_month)
            )

        # Check if filtering by date removed all rows
        if df.empty:
            raise ValueError(
                f'No data found between "{begin_date}" and "{end_date}" in file "{sim_file.name}"'
            )

        # Filter rows by dictionary criteria
        if apply_filter is not None:
            for col, val in apply_filter.items():
                if col not in df_cols:
                    raise ValueError(
                        f'Column "{col}" in apply_filter was not found in file "{sim_file.name}"'
                    )
                if not isinstance(val, list):
                    raise TypeError(
                        f'Column "{col}" in apply_filter for file "{sim_file.name}" must be a list, '
                        f'but got type "{type(val).__name__}"'
                    )
                df = df.loc[df[col].isin(val)]
                # Check if filtering removed all rows
                if df.empty:
                    raise ValueError(
                        f'Filtering by column "{col}" with values "{val}" returned no rows in "{sim_file.name}"'
                    )

        # Reset DataFrame index
        df = df.reset_index(
            drop=True
        )

        # Finalize columns for DataFrame
        if usecols is None:
            retain_cols = [date_col] + df_cols
        else:
            for col in usecols:
                if col not in df_cols:
                    raise ValueError(
                        f'Column "{col}" specified in "usecols" was not found in file "{sim_file.name}"'
                    )
            retain_cols = [date_col] + usecols

        # Output DataFrame
        df = df[retain_cols]

        # Save DataFrame
        if json_file is not None:
            json_file = pathlib.Path(json_file).resolve()
            # Raise error for invalid JSON file extension
            validators._json_extension(
                json_file=json_file
            )
            # Write DataFrame to the JSON file
            copy_df = copy.deepcopy(
                x=df
            )
            copy_df[date_col] = copy_df[date_col].apply(lambda x: x.strftime('%d-%b-%Y'))
            copy_df.to_json(
                path_or_buf=json_file,
                orient='records',
                indent=4
            )

        return df

    def hru_stats_from_daily_simulation(
        self,
        sim_file: str | pathlib.Path,
        has_units: bool,
        gis_id: int,
        sim_col: str,
        output_dir: typing.Optional[str | pathlib.Path] = None
    ) -> dict[str, pandas.DataFrame]:
        '''
        Compute monthly and yearly statistical summaries for a Hydrological Response Unit (HRU) from daily simulation time series data.

        The method returns a dictionary containing two keys, `monthly` and `yearly`, whose values are `DataFrame` objects.
        Each `DataFrame` includes the following columns:

            - `date`: The first day of the corresponding month or year.
            - `min`: Minimum simulated value within the time window.
            - `max`: Maximum simulated value within the time window.
            - `mean`: Mean simulated value within the time window.
            - `std`: Standard deviation of simulated values within the time window.

        The statistics are computed using daily values between the first and last dates (both inclusive)
        of each month or year. The `date` column represents the first day of the corresponding period
        (e.g., 01-Jan-2012, 01-Feb-2012 for monthly; 01-Jan-2012 for yearly).

        If the first or last record in the input file does not align exactly with the start or end
        of a month or year, the statistics are computed for the available portion of that period.
        In such cases, the `date` column represents the first available date for that partial period.

        Args:
            sim_file (str | pathlib.Path): Path to the input file containing time series data generated by
                the method [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).
                The file must contain `yr`, `mon`, and `day` columns.

            has_units (bool): If `True`, the third line of the input file contains column units.

            gis_id (int): Unique identifier for the Hydrological Response Unit (HRU) found in the `gis_id`.

            sim_col (str): Name of the column containing simulated values.

            output_dir (str | pathlib.Path): Directory path to save the computed results as two following JSON files.
                If `None` (default), the results are not saved.

                    - `statistics_monthly.json`: Contains the monthly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`).
                    - `statistics_yearly.json`: Contains the yearly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`).

        Returns:
            Dictionary with two keys:

                - `monthly`: `DataFrame` containing monthly statistics, with `date` as `datetime.date` objects.
                - `yearly`: `DataFrame` containing yearly statistics, with `date` as `datetime.date` objects.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.hru_stats_from_daily_simulation
            ),
            vars_values=locals()
        )

        # Check input file contains daily time series data
        sim_file = pathlib.Path(sim_file).resolve()
        if not sim_file.stem.endswith('_day'):
            raise ValueError(
                f'Statistical summary applies only to daily time series files ending with "_day"; received file name "{sim_file.stem}"'
            )

        # Validate directory path
        if output_dir is not None:
            validators._dir_path(
                input_dir=pathlib.Path(output_dir).resolve()
            )

        # Simulated DataFrame
        df = self.simulated_timeseries_df(
            sim_file=sim_file,
            has_units=has_units,
            apply_filter={
                'gis_id': [gis_id]
            },
            usecols=[sim_col]
        )

        # Frequncy abbreviations
        freq_abb = {
            'monthly': 'MS',
            'yearly': 'YS'
        }

        # Date column
        date_col = 'date'

        output = {}
        # Iterate frequency
        for freq in freq_abb:
            # Start date
            start_date = df[date_col].min()
            # End date
            end_date = df[date_col].max()
            # Time frequency representation days
            freq_day = pandas.date_range(
                start=start_date,
                end=end_date,
                freq=freq_abb[freq]
            )
            freq_day = pandas.Series(freq_day).dt.date
            # Add start and end date
            freq_day = pandas.Series(
                [start_date] + freq_day.tolist() + [end_date]
            )
            # Get unique date if repitation of start and end dates
            freq_day = freq_day.unique()
            # Frequency DataFrame
            freq_df = pandas.DataFrame()
            for idx, dates in enumerate(zip(freq_day[:-1], freq_day[1:])):
                idx_df = df[(df[date_col] >= dates[0]) & (df[date_col] < dates[1])]
                freq_df.loc[idx, 'date'] = dates[0]
                freq_df.loc[idx, 'max'] = idx_df[sim_col].max()
                freq_df.loc[idx, 'min'] = idx_df[sim_col].min()
                freq_df.loc[idx, 'mean'] = idx_df[sim_col].mean()
                freq_df.loc[idx, 'std'] = idx_df[sim_col].std()
            # Insert the DataFrame in the output dictionary
            output[freq] = freq_df
            # Save the DataFrame
            if output_dir is not None:
                save_file = pathlib.Path(output_dir).resolve() / f'statistics_{freq}.json'
                copy_df = copy.deepcopy(freq_df)
                copy_df[date_col] = copy_df[date_col].apply(lambda x: x.strftime('%d-%b-%Y'))
                copy_df.to_json(
                    path_or_buf=save_file,
                    orient='records',
                    indent=4
                )

        return output

    def read_sensitive_dfs(
        self,
        sensim_file: str | pathlib.Path,
        df_name: str,
        add_problem: bool = False,
        add_sample: bool = False
    ) -> dict[str, typing.Any]:
        '''
        Read sensitivity simulation data generated by the method
        [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters),
        and return a dictionary mapping each scenario integer to its corresponding `DataFrame`.

        The returned dictionary may include the following keys:
        - `scenario` (default): A mapping between each scenario integer and its corresponding DataFrame.
        - `problem` (optional): The problem definition.
        - `sample` (optional): The sample list used in the sensitivity simulation.

        Args:
            sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file generated by `simulation_by_sample_parameters`.

            df_name (str): Name of the `DataFrame` within `sensitivity_simulation.json`.

            add_problem (bool): If `True`, includes the problem definition in the output dictionary under the `problem` key. Defaults to `False`.

            add_sample (bool): If `True`, includes the sample list used in the simulation under the `sample` key. Defaults to `False`.

        Returns:
            A dictionary with the following keys:

                - `scenario` (default): A mapping between each scenario integer and its corresponding DataFrame.
                - `problem` (optional):  The definition dictionary passed to sampling.
                - `sample` (optional): The sample list used in the sensitivity simulation.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.read_sensitive_dfs
            ),
            vars_values=locals()
        )

        # Absolute file path
        sensim_file = pathlib.Path(sensim_file).resolve()

        # Sensitiivty output data
        output = utils._sensitivity_output_retrieval(
            sensim_file=sensim_file,
            df_name=df_name,
            add_problem=add_problem,
            add_sample=add_sample
        )

        return output

`hru_stats_from_daily_simulation(sim_file: str | pathlib.Path, has_units: bool, gis_id: int, sim_col: str, output_dir: typing.Optional[str | pathlib.Path] = None) -> dict[str, pandas.DataFrame]`

Compute monthly and yearly statistical summaries for a Hydrological Response Unit (HRU) from daily simulation time series data.

The method returns a dictionary containing two keys, monthly and yearly, whose values are DataFrame objects. Each DataFrame includes the following columns:

- `date`: The first day of the corresponding month or year.
- `min`: Minimum simulated value within the time window.
- `max`: Maximum simulated value within the time window.
- `mean`: Mean simulated value within the time window.
- `std`: Standard deviation of simulated values within the time window.

The statistics are computed using daily values between the first and last dates (both inclusive) of each month or year. The date column represents the first day of the corresponding period (e.g., 01-Jan-2012, 01-Feb-2012 for monthly; 01-Jan-2012 for yearly).

If the first or last record in the input file does not align exactly with the start or end of a month or year, the statistics are computed for the available portion of that period. In such cases, the date column represents the first available date for that partial period.

Parameters:

Name	Type	Description	Default
`sim_file`	`str \| Path`	Path to the input file containing time series data generated by the method `run_swat`. The file must contain `yr`, `mon`, and `day` columns.	required
`has_units`	`bool`	If `True`, the third line of the input file contains column units.	required
`gis_id`	`int`	Unique identifier for the Hydrological Response Unit (HRU) found in the `gis_id`.	required
`sim_col`	`str`	Name of the column containing simulated values.	required
`output_dir`	`str \| Path`	Directory path to save the computed results as two following JSON files. If `None` (default), the results are not saved. - `statistics_monthly.json`: Contains the monthly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`). - `statistics_yearly.json`: Contains the yearly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`).	`None`

Returns:

Type	Description
`dict[str, DataFrame]`	Dictionary with two keys: `monthly`: `DataFrame` containing monthly statistics, with `date` as `datetime.date` objects. `yearly`: `DataFrame` containing yearly statistics, with `date` as `datetime.date` objects.

Source code in pySWATPlus/data_manager.py

def hru_stats_from_daily_simulation(
    self,
    sim_file: str | pathlib.Path,
    has_units: bool,
    gis_id: int,
    sim_col: str,
    output_dir: typing.Optional[str | pathlib.Path] = None
) -> dict[str, pandas.DataFrame]:
    '''
    Compute monthly and yearly statistical summaries for a Hydrological Response Unit (HRU) from daily simulation time series data.

    The method returns a dictionary containing two keys, `monthly` and `yearly`, whose values are `DataFrame` objects.
    Each `DataFrame` includes the following columns:

        - `date`: The first day of the corresponding month or year.
        - `min`: Minimum simulated value within the time window.
        - `max`: Maximum simulated value within the time window.
        - `mean`: Mean simulated value within the time window.
        - `std`: Standard deviation of simulated values within the time window.

    The statistics are computed using daily values between the first and last dates (both inclusive)
    of each month or year. The `date` column represents the first day of the corresponding period
    (e.g., 01-Jan-2012, 01-Feb-2012 for monthly; 01-Jan-2012 for yearly).

    If the first or last record in the input file does not align exactly with the start or end
    of a month or year, the statistics are computed for the available portion of that period.
    In such cases, the `date` column represents the first available date for that partial period.

    Args:
        sim_file (str | pathlib.Path): Path to the input file containing time series data generated by
            the method [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).
            The file must contain `yr`, `mon`, and `day` columns.

        has_units (bool): If `True`, the third line of the input file contains column units.

        gis_id (int): Unique identifier for the Hydrological Response Unit (HRU) found in the `gis_id`.

        sim_col (str): Name of the column containing simulated values.

        output_dir (str | pathlib.Path): Directory path to save the computed results as two following JSON files.
            If `None` (default), the results are not saved.

                - `statistics_monthly.json`: Contains the monthly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`).
                - `statistics_yearly.json`: Contains the yearly statistical `DataFrame` (with `date` formatted as `DD-Mon-YYYY`).

    Returns:
        Dictionary with two keys:

            - `monthly`: `DataFrame` containing monthly statistics, with `date` as `datetime.date` objects.
            - `yearly`: `DataFrame` containing yearly statistics, with `date` as `datetime.date` objects.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.hru_stats_from_daily_simulation
        ),
        vars_values=locals()
    )

    # Check input file contains daily time series data
    sim_file = pathlib.Path(sim_file).resolve()
    if not sim_file.stem.endswith('_day'):
        raise ValueError(
            f'Statistical summary applies only to daily time series files ending with "_day"; received file name "{sim_file.stem}"'
        )

    # Validate directory path
    if output_dir is not None:
        validators._dir_path(
            input_dir=pathlib.Path(output_dir).resolve()
        )

    # Simulated DataFrame
    df = self.simulated_timeseries_df(
        sim_file=sim_file,
        has_units=has_units,
        apply_filter={
            'gis_id': [gis_id]
        },
        usecols=[sim_col]
    )

    # Frequncy abbreviations
    freq_abb = {
        'monthly': 'MS',
        'yearly': 'YS'
    }

    # Date column
    date_col = 'date'

    output = {}
    # Iterate frequency
    for freq in freq_abb:
        # Start date
        start_date = df[date_col].min()
        # End date
        end_date = df[date_col].max()
        # Time frequency representation days
        freq_day = pandas.date_range(
            start=start_date,
            end=end_date,
            freq=freq_abb[freq]
        )
        freq_day = pandas.Series(freq_day).dt.date
        # Add start and end date
        freq_day = pandas.Series(
            [start_date] + freq_day.tolist() + [end_date]
        )
        # Get unique date if repitation of start and end dates
        freq_day = freq_day.unique()
        # Frequency DataFrame
        freq_df = pandas.DataFrame()
        for idx, dates in enumerate(zip(freq_day[:-1], freq_day[1:])):
            idx_df = df[(df[date_col] >= dates[0]) & (df[date_col] < dates[1])]
            freq_df.loc[idx, 'date'] = dates[0]
            freq_df.loc[idx, 'max'] = idx_df[sim_col].max()
            freq_df.loc[idx, 'min'] = idx_df[sim_col].min()
            freq_df.loc[idx, 'mean'] = idx_df[sim_col].mean()
            freq_df.loc[idx, 'std'] = idx_df[sim_col].std()
        # Insert the DataFrame in the output dictionary
        output[freq] = freq_df
        # Save the DataFrame
        if output_dir is not None:
            save_file = pathlib.Path(output_dir).resolve() / f'statistics_{freq}.json'
            copy_df = copy.deepcopy(freq_df)
            copy_df[date_col] = copy_df[date_col].apply(lambda x: x.strftime('%d-%b-%Y'))
            copy_df.to_json(
                path_or_buf=save_file,
                orient='records',
                indent=4
            )

    return output

`read_sensitive_dfs(sensim_file: str | pathlib.Path, df_name: str, add_problem: bool = False, add_sample: bool = False) -> dict[str, typing.Any]`

Read sensitivity simulation data generated by the method simulation_by_sample_parameters, and return a dictionary mapping each scenario integer to its corresponding DataFrame.

The returned dictionary may include the following keys: - scenario (default): A mapping between each scenario integer and its corresponding DataFrame. - problem (optional): The problem definition. - sample (optional): The sample list used in the sensitivity simulation.

Parameters:

Name	Type	Description	Default
`sensim_file`	`str \| Path`	Path to the `sensitivity_simulation.json` file generated by `simulation_by_sample_parameters`.	required
`df_name`	`str`	Name of the `DataFrame` within `sensitivity_simulation.json`.	required
`add_problem`	`bool`	If `True`, includes the problem definition in the output dictionary under the `problem` key. Defaults to `False`.	`False`
`add_sample`	`bool`	If `True`, includes the sample list used in the simulation under the `sample` key. Defaults to `False`.	`False`

Returns:

Type	Description
`dict[str, Any]`	A dictionary with the following keys: `scenario` (default): A mapping between each scenario integer and its corresponding DataFrame. `problem` (optional): The definition dictionary passed to sampling. `sample` (optional): The sample list used in the sensitivity simulation.

Source code in pySWATPlus/data_manager.py

def read_sensitive_dfs(
    self,
    sensim_file: str | pathlib.Path,
    df_name: str,
    add_problem: bool = False,
    add_sample: bool = False
) -> dict[str, typing.Any]:
    '''
    Read sensitivity simulation data generated by the method
    [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters),
    and return a dictionary mapping each scenario integer to its corresponding `DataFrame`.

    The returned dictionary may include the following keys:
    - `scenario` (default): A mapping between each scenario integer and its corresponding DataFrame.
    - `problem` (optional): The problem definition.
    - `sample` (optional): The sample list used in the sensitivity simulation.

    Args:
        sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file generated by `simulation_by_sample_parameters`.

        df_name (str): Name of the `DataFrame` within `sensitivity_simulation.json`.

        add_problem (bool): If `True`, includes the problem definition in the output dictionary under the `problem` key. Defaults to `False`.

        add_sample (bool): If `True`, includes the sample list used in the simulation under the `sample` key. Defaults to `False`.

    Returns:
        A dictionary with the following keys:

            - `scenario` (default): A mapping between each scenario integer and its corresponding DataFrame.
            - `problem` (optional):  The definition dictionary passed to sampling.
            - `sample` (optional): The sample list used in the sensitivity simulation.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.read_sensitive_dfs
        ),
        vars_values=locals()
    )

    # Absolute file path
    sensim_file = pathlib.Path(sensim_file).resolve()

    # Sensitiivty output data
    output = utils._sensitivity_output_retrieval(
        sensim_file=sensim_file,
        df_name=df_name,
        add_problem=add_problem,
        add_sample=add_sample
    )

    return output

`simulated_timeseries_df(sim_file: str | pathlib.Path, has_units: bool, begin_date: typing.Optional[str] = None, end_date: typing.Optional[str] = None, ref_day: typing.Optional[int] = None, ref_month: typing.Optional[int] = None, apply_filter: typing.Optional[dict[str, list[typing.Any]]] = None, usecols: typing.Optional[list[str]] = None, json_file: typing.Optional[str | pathlib.Path] = None) -> pandas.DataFrame`

Extract data from a simulation output file and return a time series DataFrame. A new date column is constructed using datetime.date objects from the yr, mon, and day columns.

Parameters:

Name	Type	Description	Default
`sim_file`	`str \| Path`	Path to the input file containing time series data generated by the method `run_swat`. The file must contain `yr`, `mon`, and `day` columns.	required
`has_units`	`bool`	If `True`, the third line of the input file contains column units.	required
`begin_date`	`str`	Start date in `DD-Mon-YYYY` format (e.g., '01-Jan-2012'), inclusive. If `None` (default), the earliest available date is used.	`None`
`end_date`	`str`	End date in `DD-Mon-YYYY` format (e.g., '31-Dec-2015'), inclusive. If `None` (default), the latest available date is used.	`None`
`ref_day`	`int`	Reference day for monthly and yearly time series after filtering by `begin_date` and `end_date`. For example, `2012-01-31` and `2012-02-29` become `2012-01-15` and `2012-02-15` when `ref_day=15`. If `None` (default), the last day of the month or year is used, obtained from simulation. Not applicable to daily time series files (ending with `_day`).	`None`
`ref_month`	`int`	Reference month for yearly time series after filtering by `begin_date` and `end_date`. For example, `2012-12-31` and `2013-12-31` become `2012-06-15` and `2013-06-15` when `ref_day=15` and `ref_month=6`. If `None` (default), the last month of the year is used, obtained from simulation. Not applicable to monthly time series files (ending with `_mon`).	`None`
`apply_filter`	`dict[str, list[Any]]`	Dictionary mapping column names to lists of values for row filtering. If `None` (default), no filtering is applied.	`None`
`usecols`	`list[str]`	Column names to include in the output. If `None` (default), all columns are used.	`None`
`json_file`	`str \| Path`	Path to save the output `DataFrame` as a JSON file. If `None` (default), the DataFrame is not saved.	`None`

Returns:

Type	Description
`DataFrame`	Time series `DataFrame` with a new `date` column.

Source code in pySWATPlus/data_manager.py

def simulated_timeseries_df(
    self,
    sim_file: str | pathlib.Path,
    has_units: bool,
    begin_date: typing.Optional[str] = None,
    end_date: typing.Optional[str] = None,
    ref_day: typing.Optional[int] = None,
    ref_month: typing.Optional[int] = None,
    apply_filter: typing.Optional[dict[str, list[typing.Any]]] = None,
    usecols: typing.Optional[list[str]] = None,
    json_file: typing.Optional[str | pathlib.Path] = None
) -> pandas.DataFrame:
    '''
    Extract data from a simulation output file and return a time series `DataFrame`.
    A new `date` column is constructed using `datetime.date` objects from the `yr`, `mon`, and `day` columns.

    Args:
        sim_file (str | pathlib.Path): Path to the input file containing time series data generated by
            the method [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).
            The file must contain `yr`, `mon`, and `day` columns.

        has_units (bool): If `True`, the third line of the input file contains column units.

        begin_date (str): Start date in `DD-Mon-YYYY` format (e.g., '01-Jan-2012'), inclusive.
            If `None` (default), the earliest available date is used.

        end_date (str): End date in `DD-Mon-YYYY` format (e.g., '31-Dec-2015'), inclusive.
            If `None` (default), the latest available date is used.

        ref_day (int): Reference day for monthly and yearly time series after filtering by `begin_date` and `end_date`.
            For example, `2012-01-31` and `2012-02-29` become `2012-01-15` and `2012-02-15` when `ref_day=15`.
            If `None` (default), the last day of the month or year is used, obtained from simulation.
            Not applicable to daily time series files (ending with `_day`).

        ref_month (int): Reference month for yearly time series after filtering by `begin_date` and `end_date`. For example,
            `2012-12-31` and `2013-12-31` become `2012-06-15` and `2013-06-15` when `ref_day=15` and `ref_month=6`.
            If `None` (default), the last month of the year is used, obtained from simulation.
            Not applicable to monthly time series files (ending with `_mon`).

        apply_filter (dict[str, list[Any]]): Dictionary mapping column names to lists of values for row filtering.
            If `None` (default), no filtering is applied.

        usecols (list[str]): Column names to include in the output. If `None` (default), all columns are used.

        json_file (str | pathlib.Path): Path to save the output `DataFrame` as a JSON file.
            If `None` (default), the DataFrame is not saved.

    Returns:
        Time series `DataFrame` with a new `date` column.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.simulated_timeseries_df
        ),
        vars_values=locals()
    )

    # Absolute file path
    sim_file = pathlib.Path(sim_file).resolve()

    # DataFrame from input file
    skiprows = [0, 2] if has_units else [0]
    df = utils._df_extract(
        input_file=sim_file,
        skiprows=skiprows
    )

    # DataFrame columns
    df_cols = list(df.columns)

    # Create date column
    date_col = 'date'
    time_cols = ['yr', 'mon', 'day']
    missing_cols = [
        col for col in time_cols if col not in df_cols
    ]
    if len(missing_cols) > 0:
        raise ValueError(
            f'Missing required time series columns "{missing_cols}" in file "{sim_file.name}"'
        )
    df[date_col] = pandas.to_datetime(
        df[time_cols].rename(columns={'yr': 'year', 'mon': 'month'})
    ).dt.date

    # Filter DataFrame by date
    begin_dt = utils._date_str_to_object(begin_date) if begin_date is not None else df[date_col].iloc[0]
    end_dt = utils._date_str_to_object(end_date) if end_date is not None else df[date_col].iloc[-1]
    df = df.loc[df[date_col].between(begin_dt, end_dt)].reset_index(drop=True)

    # Fix reference day
    if ref_day is not None:
        if sim_file.stem.endswith(('_day', '_subday')):
            raise ValueError(
                f'Parameter "ref_day" is not applicable for daily or sub-daily time series in file "{sim_file.name}" '
                f'because it would assign the same day to all records within a month.'
            )
        df[date_col] = df[date_col].apply(
            lambda x: x.replace(day=ref_day)
        )

    # Fix reference month
    if ref_month is not None:
        if sim_file.stem.endswith('_mon'):
            raise ValueError(
                f'Parameter "ref_month" is not applicable for monthly time series in file "{sim_file.name}" '
                f'because it would assign the same month to all records within a year.'
            )
        df[date_col] = df[date_col].apply(
            lambda x: x.replace(month=ref_month)
        )

    # Check if filtering by date removed all rows
    if df.empty:
        raise ValueError(
            f'No data found between "{begin_date}" and "{end_date}" in file "{sim_file.name}"'
        )

    # Filter rows by dictionary criteria
    if apply_filter is not None:
        for col, val in apply_filter.items():
            if col not in df_cols:
                raise ValueError(
                    f'Column "{col}" in apply_filter was not found in file "{sim_file.name}"'
                )
            if not isinstance(val, list):
                raise TypeError(
                    f'Column "{col}" in apply_filter for file "{sim_file.name}" must be a list, '
                    f'but got type "{type(val).__name__}"'
                )
            df = df.loc[df[col].isin(val)]
            # Check if filtering removed all rows
            if df.empty:
                raise ValueError(
                    f'Filtering by column "{col}" with values "{val}" returned no rows in "{sim_file.name}"'
                )

    # Reset DataFrame index
    df = df.reset_index(
        drop=True
    )

    # Finalize columns for DataFrame
    if usecols is None:
        retain_cols = [date_col] + df_cols
    else:
        for col in usecols:
            if col not in df_cols:
                raise ValueError(
                    f'Column "{col}" specified in "usecols" was not found in file "{sim_file.name}"'
                )
        retain_cols = [date_col] + usecols

    # Output DataFrame
    df = df[retain_cols]

    # Save DataFrame
    if json_file is not None:
        json_file = pathlib.Path(json_file).resolve()
        # Raise error for invalid JSON file extension
        validators._json_extension(
            json_file=json_file
        )
        # Write DataFrame to the JSON file
        copy_df = copy.deepcopy(
            x=df
        )
        copy_df[date_col] = copy_df[date_col].apply(lambda x: x.strftime('%d-%b-%Y'))
        copy_df.to_json(
            path_or_buf=json_file,
            orient='records',
            indent=4
        )

    return df