Skip to content

PerformanceMetrics

PerformanceMetrics

Provide functionality to evaluate model perfromance between simulated and ovserved values.

Source code in pySWATPlus/performance_metrics.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
class PerformanceMetrics:
    '''
    Provide functionality to evaluate model perfromance between simulated and ovserved values.
    '''

    @property
    def indicator_names(
        self
    ) -> dict[str, str]:
        '''
        Returns a dictionary of available performance indicators. The keys are
        commonly used abbreviations, and the values are the corresponding full
        indicator names. The computed indicator values are treated as performance
        metrics of the model. Available abbreviations:

        - `NSE`: Nash–Sutcliffe Efficiency
        - `KGE`: Kling–Gupta Efficiency
        - `MSE`: Mean Squared Error
        - `RMSE`: Root Mean Squared Error
        - `PBIAS`: Percent Bias
        - `MARE`: Mean Absolute Relative Error
        '''

        abbr_name = {
            'NSE': 'Nash-Sutcliffe Efficiency',
            'KGE': 'Kling-Gupta Efficiency',
            'MSE': 'Mean Squared Error',
            'RMSE': 'Root Mean Squared Error',
            'PBIAS': 'Percent Bias',
            'MARE': 'Mean Absolute Relative Error'
        }

        return abbr_name

    def compute_nse(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the [`Nash-Sutcliffe Efficiency`](https://doi.org/10.1016/0022-1694(70)90255-6)
        metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

        Returns:
            Computed value of the `Nash-Sutcliffe Efficiency` metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_nse
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        numerator = ((sim_arr - obs_arr).pow(2)).sum()
        denominator = ((obs_arr - obs_arr.mean()).pow(2)).sum()
        output = float(1 - numerator / denominator)

        return output

    def compute_kge(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the [`Kling-Gupta Efficiency`](https://doi.org/10.1016/j.jhydrol.2009.08.003)
        metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

        Returns:
            Computed value of the `Kling-Gupta Efficiency` metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_kge
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Pearson correlation coefficient (r)
        r = sim_arr.corr(obs_arr)

        # Variability of prediction errors
        alpha = sim_arr.std() / obs_arr.std()

        # Bias
        beta = sim_arr.mean() / obs_arr.mean()

        # Output
        output = float(1 - pow(pow(r - 1, 2) + pow(alpha - 1, 2) + pow(beta - 1, 2), 0.5))

        return output

    def compute_mse(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Mean Squared Error` metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

        Returns:
            Computed value of the `Mean Squared Error` metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_mse
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        output = float(((sim_arr - obs_arr).pow(2)).mean())

        return output

    def compute_rmse(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Root Mean Squared Error` metric between simulated and observed values.

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

        Returns:
            Computed value of the `Root Mean Squared Error` metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_rmse
            ),
            vars_values=locals()
        )

        # computer MSE error
        mse_value = self.compute_mse(
            df=df,
            sim_col=sim_col,
            obs_col=obs_col
        )

        # Output
        output = float(pow(mse_value, 0.5))

        return output

    def compute_pbias(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Percent Bias` metric between simulated and observed values.

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

        Returns:
            Computed value of the `Percent Bias` metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_pbias
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        output = float(100 * (sim_arr - obs_arr).sum() / obs_arr.sum())

        return output

    def compute_mare(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Mean Absolute Relative Error` metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

        Returns:
            Computed value of the `Mean Absolute Relative Error` metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_mare
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        output = float(((obs_arr - sim_arr) / obs_arr).abs().mean())

        return output

    def _validate_indicator_abbr(
        self,
        indicators: list[str]
    ) -> None:
        '''
        Validate each indicator abbreviation in the provided list.
        '''

        abbr_indicator = self.indicator_names
        for ind in indicators:
            if ind not in abbr_indicator:
                raise ValueError(
                    f'Invalid indicator "{ind}"; expected names are {list(abbr_indicator.keys())}'
                )

        return None

    def compute_from_abbr(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str,
        indicator: str
    ) -> float:
        '''
        Compute the performance metric between simulated and observed values based on the provided indicator abbreviation.

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.

            indicator (str): Abbreviation of the indicator, selected from the available options in the property
                [`indicator_names`](https://swatmodel.github.io/pySWATPlus/api/performance_metrics/#pySWATPlus.PerformanceMetrics.indicator_names).

        Returns:
            Computed metric value corresponding to the specified indicator.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_from_abbr
            ),
            vars_values=locals()
        )

        # Validate abbreviation of indicators
        self._validate_indicator_abbr(
            indicators=[indicator]
        )

        # Method from indicator abbreviation
        indicator_method = getattr(
            self,
            f'compute_{indicator.lower()}'
        )

        # Indicator value
        indicator_value = indicator_method(
            df=df,
            sim_col=sim_col,
            obs_col=obs_col
        )

        return float(indicator_value)

    def scenario_indicators(
        self,
        sensim_file: str | pathlib.Path,
        df_name: str,
        sim_col: str,
        obs_file: str | pathlib.Path,
        date_format: str,
        obs_col: str,
        indicators: list[str],
        json_file: typing.Optional[str | pathlib.Path] = None
    ) -> dict[str, typing.Any]:
        '''
        Compute performance metrics for sample scenarios obtained using the method
        [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters).

        Before computing the metrics, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`,
        where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.

        The method returns a dictionary with two keys:

        - `problem`: The definition dictionary passed to sampling.
        - `indicator`: A `DataFrame` containing the `Scenario` column and one column per indicator,
          with scenario indices and corresponding indicator values.

        Before computing the indicators, both simulated and observed values are normalized using the formula
        `(v - min_o) / (max_o - min_o)`, where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.
        All negative and `None` observed values are removed before computing `min_o` and `max_o` to prevent errors during normalization.

        Args:
            sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file produced by `simulation_by_sobol_sample`.

            df_name (str): Name of the `DataFrame` within `sensitivity_simulation.json` from which to compute scenario indicators.

            sim_col (str): Name of the column in `df_name` containing simulated values.

            obs_file (str | pathlib.Path): Path to the CSV file containing observed data. The file must include a
                `date` column (used to merge simulated and observed data) and use a comma as the separator.

            date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings.

            obs_col (str): Name of the column in `obs_file` containing observed data.

            indicators (list[str]): List of indicator abbreviations selected from the available options in the property
                [`indicator_names`](https://swatmodel.github.io/pySWATPlus/api/performance_metrics/#pySWATPlus.PerformanceMetrics.indicator_names).

            json_file (str | pathlib.Path, optional): Path to a JSON file for saving the output `DataFrame` containing indicator values.
                If `None` (default), the `DataFrame` is not saved.

        Returns:
            Dictionary with two keys, `problem` and `indicator`, and their corresponding values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.scenario_indicators
            ),
            vars_values=locals()
        )

        # Validate abbreviation of indicators
        self._validate_indicator_abbr(
            indicators=indicators
        )

        # Observed DataFrame
        obs_df = utils._df_observe(
            obs_file=pathlib.Path(obs_file).resolve(),
            date_format=date_format,
            obs_col=obs_col
        )
        obs_df.columns = ['date', 'obs']

        # Retrieve sensitivity output
        sensitivity_sim = utils._sensitivity_output_retrieval(
            sensim_file=pathlib.Path(sensim_file).resolve(),
            df_name=df_name,
            add_problem=True,
            add_sample=False
        )

        # Empty DataFrame to store scenario indicators
        ind_df = pandas.DataFrame(
            columns=indicators
        )

        # Iterate scenarios
        for key, df in sensitivity_sim['scenario'].items():
            df = df[['date', sim_col]]
            df.columns = ['date', 'sim']
            # Merge scenario DataFrame with observed DataFrame
            merge_df = df.merge(
                right=obs_df.copy(),
                how='inner',
                on='date'
            )
            # Normalized DataFrame
            norm_df = utils._df_normalize(
                df=merge_df[['sim', 'obs']],
                norm_col='obs'
            )
            # Iterate indicators
            for indicator in indicators:
                # Store indicator value in DataFrame
                ind_val = PerformanceMetrics().compute_from_abbr(
                    df=norm_df,
                    sim_col='sim',
                    obs_col='obs',
                    indicator=indicator
                )
                ind_df.loc[key, indicator] = ind_val

        # Reset index to scenario column
        scnro_col = 'Scenario'
        ind_df = ind_df.reset_index(
            names=[scnro_col]
        )
        ind_df[scnro_col] = ind_df[scnro_col].astype(int)

        # Save DataFrame
        if json_file is not None:
            json_file = pathlib.Path(json_file).resolve()
            # Raise error for invalid JSON file extension
            validators._json_extension(
                json_file=json_file
            )
            # Write DataFrame to the JSON file
            ind_df.to_json(
                path_or_buf=json_file,
                orient='records',
                indent=4
            )

        # Output dictionary
        output = {
            'problem': sensitivity_sim['problem'],
            'indicator': ind_df
        }

        return output

    def indicator_from_file(
        self,
        sim_file: str | pathlib.Path,
        sim_col: str,
        extract_sim: dict[str, typing.Any],
        obs_file: str | pathlib.Path,
        date_format: str,
        obs_col: str,
        indicators: list[str]
    ) -> dict[str, float]:

        '''
        Compute performance metrics for simulated values obtained using the method
        [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).

        Before computing the indicators, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`,
        where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively. All negative and `None` observed values
        are removed before computing `min_o` and `max_o` to prevent errors during normalization.

        Args:
            sim_file (str | pathlib.Path): Path to the simulation file generated using the method
                [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).

            sim_col (str): Name of the column in `sim_file` containing simulated data.

            extract_sim (dict[str, typing.Any]): Dictionary specifying input variable configurations for
                [`simulated_timeseries_df`](https://swat-model.github.io/pySWATPlus/api/data_manager/#pySWATPlus.DataManager.simulated_timeseries_df)
                to extract data from `sim_file`. Each key corresponds to an input variable name, and its value is the parameter supplied to the method.

                - *Required key:* `has_units`.
                - *Optional key:* `begin_date`, `end_date`, `ref_day`, `ref_month`, and `apply_filter`.

                !!! note
                    The optional key `usecols` should **not** be included in `extract_sim`. Although no error will be raised,
                    it will be ignored because the `sim_col` argument is internally used to define the `usecols` list.

            obs_file (str | pathlib.Path): Path to the CSV file containing observed data. The file must include a
                `date` column (used to merge simulated and observed data) and use a comma as the separator.

            date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings.

            obs_col (str): Name of the column in `obs_file` containing observed data.

            indicators (list[str]): List of indicator abbreviations selected from the available options in the property
                [`indicator_names`](https://swatmodel.github.io/pySWATPlus/api/performance_metrics/#pySWATPlus.PerformanceMetrics.indicator_names).

        Returns:
            Dictionary where each key is an indicator abbreviation, and value is the corresponding performance metric.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.indicator_from_file
            ),
            vars_values=locals()
        )

        # Validate abbreviation of indicators
        self._validate_indicator_abbr(
            indicators=indicators
        )

        # Simulated DataFrame
        extract_sim['usecols'] = [sim_col]
        sim_df = DataManager().simulated_timeseries_df(
            sim_file=pathlib.Path(sim_file).resolve(),
            **extract_sim
        )
        sim_df.columns = ['date', 'sim']

        # Observed DataFrame
        obs_df = utils._df_observe(
            obs_file=pathlib.Path(obs_file).resolve(),
            date_format=date_format,
            obs_col=obs_col
        )
        obs_df.columns = ['date', 'obs']

        # Merge simulated and observed DataFrames
        merge_df = sim_df.merge(
            right=obs_df,
            how='inner',
            on='date'
        )

        # Normalized DataFrame
        norm_df = utils._df_normalize(
            df=merge_df[['sim', 'obs']],
            norm_col='obs'
        )

        # Empty dictionary to store indicator values
        ind_dict = {}

        # Iterate indicator
        for ind in indicators:
            # Store indicator value in dictionary
            ind_val = PerformanceMetrics().compute_from_abbr(
                df=norm_df,
                sim_col='sim',
                obs_col='obs',
                indicator=ind
            )
            ind_dict[ind] = ind_val

        return ind_dict

indicator_names: dict[str, str] property

Returns a dictionary of available performance indicators. The keys are commonly used abbreviations, and the values are the corresponding full indicator names. The computed indicator values are treated as performance metrics of the model. Available abbreviations:

  • NSE: Nash–Sutcliffe Efficiency
  • KGE: Kling–Gupta Efficiency
  • MSE: Mean Squared Error
  • RMSE: Root Mean Squared Error
  • PBIAS: Percent Bias
  • MARE: Mean Absolute Relative Error

compute_from_abbr(df: pandas.DataFrame, sim_col: str, obs_col: str, indicator: str) -> float

Compute the performance metric between simulated and observed values based on the provided indicator abbreviation.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
indicator str

Abbreviation of the indicator, selected from the available options in the property indicator_names.

required

Returns:

Type Description
float

Computed metric value corresponding to the specified indicator.

Source code in pySWATPlus/performance_metrics.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
def compute_from_abbr(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str,
    indicator: str
) -> float:
    '''
    Compute the performance metric between simulated and observed values based on the provided indicator abbreviation.

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

        indicator (str): Abbreviation of the indicator, selected from the available options in the property
            [`indicator_names`](https://swatmodel.github.io/pySWATPlus/api/performance_metrics/#pySWATPlus.PerformanceMetrics.indicator_names).

    Returns:
        Computed metric value corresponding to the specified indicator.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_from_abbr
        ),
        vars_values=locals()
    )

    # Validate abbreviation of indicators
    self._validate_indicator_abbr(
        indicators=[indicator]
    )

    # Method from indicator abbreviation
    indicator_method = getattr(
        self,
        f'compute_{indicator.lower()}'
    )

    # Indicator value
    indicator_value = indicator_method(
        df=df,
        sim_col=sim_col,
        obs_col=obs_col
    )

    return float(indicator_value)

compute_kge(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Kling-Gupta Efficiency metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required

Returns:

Type Description
float

Computed value of the Kling-Gupta Efficiency metric.

Source code in pySWATPlus/performance_metrics.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def compute_kge(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the [`Kling-Gupta Efficiency`](https://doi.org/10.1016/j.jhydrol.2009.08.003)
    metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

    Returns:
        Computed value of the `Kling-Gupta Efficiency` metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_kge
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Pearson correlation coefficient (r)
    r = sim_arr.corr(obs_arr)

    # Variability of prediction errors
    alpha = sim_arr.std() / obs_arr.std()

    # Bias
    beta = sim_arr.mean() / obs_arr.mean()

    # Output
    output = float(1 - pow(pow(r - 1, 2) + pow(alpha - 1, 2) + pow(beta - 1, 2), 0.5))

    return output

compute_mare(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Mean Absolute Relative Error metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required

Returns:

Type Description
float

Computed value of the Mean Absolute Relative Error metric.

Source code in pySWATPlus/performance_metrics.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def compute_mare(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Mean Absolute Relative Error` metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

    Returns:
        Computed value of the `Mean Absolute Relative Error` metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_mare
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    output = float(((obs_arr - sim_arr) / obs_arr).abs().mean())

    return output

compute_mse(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Mean Squared Error metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required

Returns:

Type Description
float

Computed value of the Mean Squared Error metric.

Source code in pySWATPlus/performance_metrics.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
def compute_mse(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Mean Squared Error` metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

    Returns:
        Computed value of the `Mean Squared Error` metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_mse
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    output = float(((sim_arr - obs_arr).pow(2)).mean())

    return output

compute_nse(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Nash-Sutcliffe Efficiency metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required

Returns:

Type Description
float

Computed value of the Nash-Sutcliffe Efficiency metric.

Source code in pySWATPlus/performance_metrics.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def compute_nse(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the [`Nash-Sutcliffe Efficiency`](https://doi.org/10.1016/0022-1694(70)90255-6)
    metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

    Returns:
        Computed value of the `Nash-Sutcliffe Efficiency` metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_nse
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    numerator = ((sim_arr - obs_arr).pow(2)).sum()
    denominator = ((obs_arr - obs_arr.mean()).pow(2)).sum()
    output = float(1 - numerator / denominator)

    return output

compute_pbias(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Percent Bias metric between simulated and observed values.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required

Returns:

Type Description
float

Computed value of the Percent Bias metric.

Source code in pySWATPlus/performance_metrics.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
def compute_pbias(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Percent Bias` metric between simulated and observed values.

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

    Returns:
        Computed value of the `Percent Bias` metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_pbias
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    output = float(100 * (sim_arr - obs_arr).sum() / obs_arr.sum())

    return output

compute_rmse(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Root Mean Squared Error metric between simulated and observed values.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required

Returns:

Type Description
float

Computed value of the Root Mean Squared Error metric.

Source code in pySWATPlus/performance_metrics.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
def compute_rmse(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Root Mean Squared Error` metric between simulated and observed values.

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.

    Returns:
        Computed value of the `Root Mean Squared Error` metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_rmse
        ),
        vars_values=locals()
    )

    # computer MSE error
    mse_value = self.compute_mse(
        df=df,
        sim_col=sim_col,
        obs_col=obs_col
    )

    # Output
    output = float(pow(mse_value, 0.5))

    return output

indicator_from_file(sim_file: str | pathlib.Path, sim_col: str, extract_sim: dict[str, typing.Any], obs_file: str | pathlib.Path, date_format: str, obs_col: str, indicators: list[str]) -> dict[str, float]

Compute performance metrics for simulated values obtained using the method run_swat.

Before computing the indicators, simulated and observed values are normalized using the formula (v - min_o) / (max_o - min_o), where min_o and max_o represent the minimum and maximum of observed values, respectively. All negative and None observed values are removed before computing min_o and max_o to prevent errors during normalization.

Parameters:

Name Type Description Default
sim_file str | Path

Path to the simulation file generated using the method run_swat.

required
sim_col str

Name of the column in sim_file containing simulated data.

required
extract_sim dict[str, Any]

Dictionary specifying input variable configurations for simulated_timeseries_df to extract data from sim_file. Each key corresponds to an input variable name, and its value is the parameter supplied to the method.

  • Required key: has_units.
  • Optional key: begin_date, end_date, ref_day, ref_month, and apply_filter.

Note

The optional key usecols should not be included in extract_sim. Although no error will be raised, it will be ignored because the sim_col argument is internally used to define the usecols list.

required
obs_file str | Path

Path to the CSV file containing observed data. The file must include a date column (used to merge simulated and observed data) and use a comma as the separator.

required
date_format str

Date format of the date column in obs_file, used to parse datetime.date objects from date strings.

required
obs_col str

Name of the column in obs_file containing observed data.

required
indicators list[str]

List of indicator abbreviations selected from the available options in the property indicator_names.

required

Returns:

Type Description
dict[str, float]

Dictionary where each key is an indicator abbreviation, and value is the corresponding performance metric.

Source code in pySWATPlus/performance_metrics.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
def indicator_from_file(
    self,
    sim_file: str | pathlib.Path,
    sim_col: str,
    extract_sim: dict[str, typing.Any],
    obs_file: str | pathlib.Path,
    date_format: str,
    obs_col: str,
    indicators: list[str]
) -> dict[str, float]:

    '''
    Compute performance metrics for simulated values obtained using the method
    [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).

    Before computing the indicators, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`,
    where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively. All negative and `None` observed values
    are removed before computing `min_o` and `max_o` to prevent errors during normalization.

    Args:
        sim_file (str | pathlib.Path): Path to the simulation file generated using the method
            [`run_swat`](https://swat-model.github.io/pySWATPlus/api/txtinout_reader/#pySWATPlus.TxtinoutReader.run_swat).

        sim_col (str): Name of the column in `sim_file` containing simulated data.

        extract_sim (dict[str, typing.Any]): Dictionary specifying input variable configurations for
            [`simulated_timeseries_df`](https://swat-model.github.io/pySWATPlus/api/data_manager/#pySWATPlus.DataManager.simulated_timeseries_df)
            to extract data from `sim_file`. Each key corresponds to an input variable name, and its value is the parameter supplied to the method.

            - *Required key:* `has_units`.
            - *Optional key:* `begin_date`, `end_date`, `ref_day`, `ref_month`, and `apply_filter`.

            !!! note
                The optional key `usecols` should **not** be included in `extract_sim`. Although no error will be raised,
                it will be ignored because the `sim_col` argument is internally used to define the `usecols` list.

        obs_file (str | pathlib.Path): Path to the CSV file containing observed data. The file must include a
            `date` column (used to merge simulated and observed data) and use a comma as the separator.

        date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings.

        obs_col (str): Name of the column in `obs_file` containing observed data.

        indicators (list[str]): List of indicator abbreviations selected from the available options in the property
            [`indicator_names`](https://swatmodel.github.io/pySWATPlus/api/performance_metrics/#pySWATPlus.PerformanceMetrics.indicator_names).

    Returns:
        Dictionary where each key is an indicator abbreviation, and value is the corresponding performance metric.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.indicator_from_file
        ),
        vars_values=locals()
    )

    # Validate abbreviation of indicators
    self._validate_indicator_abbr(
        indicators=indicators
    )

    # Simulated DataFrame
    extract_sim['usecols'] = [sim_col]
    sim_df = DataManager().simulated_timeseries_df(
        sim_file=pathlib.Path(sim_file).resolve(),
        **extract_sim
    )
    sim_df.columns = ['date', 'sim']

    # Observed DataFrame
    obs_df = utils._df_observe(
        obs_file=pathlib.Path(obs_file).resolve(),
        date_format=date_format,
        obs_col=obs_col
    )
    obs_df.columns = ['date', 'obs']

    # Merge simulated and observed DataFrames
    merge_df = sim_df.merge(
        right=obs_df,
        how='inner',
        on='date'
    )

    # Normalized DataFrame
    norm_df = utils._df_normalize(
        df=merge_df[['sim', 'obs']],
        norm_col='obs'
    )

    # Empty dictionary to store indicator values
    ind_dict = {}

    # Iterate indicator
    for ind in indicators:
        # Store indicator value in dictionary
        ind_val = PerformanceMetrics().compute_from_abbr(
            df=norm_df,
            sim_col='sim',
            obs_col='obs',
            indicator=ind
        )
        ind_dict[ind] = ind_val

    return ind_dict

scenario_indicators(sensim_file: str | pathlib.Path, df_name: str, sim_col: str, obs_file: str | pathlib.Path, date_format: str, obs_col: str, indicators: list[str], json_file: typing.Optional[str | pathlib.Path] = None) -> dict[str, typing.Any]

Compute performance metrics for sample scenarios obtained using the method simulation_by_sample_parameters.

Before computing the metrics, simulated and observed values are normalized using the formula (v - min_o) / (max_o - min_o), where min_o and max_o represent the minimum and maximum of observed values, respectively.

The method returns a dictionary with two keys:

  • problem: The definition dictionary passed to sampling.
  • indicator: A DataFrame containing the Scenario column and one column per indicator, with scenario indices and corresponding indicator values.

Before computing the indicators, both simulated and observed values are normalized using the formula (v - min_o) / (max_o - min_o), where min_o and max_o represent the minimum and maximum of observed values, respectively. All negative and None observed values are removed before computing min_o and max_o to prevent errors during normalization.

Parameters:

Name Type Description Default
sensim_file str | Path

Path to the sensitivity_simulation.json file produced by simulation_by_sobol_sample.

required
df_name str

Name of the DataFrame within sensitivity_simulation.json from which to compute scenario indicators.

required
sim_col str

Name of the column in df_name containing simulated values.

required
obs_file str | Path

Path to the CSV file containing observed data. The file must include a date column (used to merge simulated and observed data) and use a comma as the separator.

required
date_format str

Date format of the date column in obs_file, used to parse datetime.date objects from date strings.

required
obs_col str

Name of the column in obs_file containing observed data.

required
indicators list[str]

List of indicator abbreviations selected from the available options in the property indicator_names.

required
json_file str | Path

Path to a JSON file for saving the output DataFrame containing indicator values. If None (default), the DataFrame is not saved.

None

Returns:

Type Description
dict[str, Any]

Dictionary with two keys, problem and indicator, and their corresponding values.

Source code in pySWATPlus/performance_metrics.py
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
def scenario_indicators(
    self,
    sensim_file: str | pathlib.Path,
    df_name: str,
    sim_col: str,
    obs_file: str | pathlib.Path,
    date_format: str,
    obs_col: str,
    indicators: list[str],
    json_file: typing.Optional[str | pathlib.Path] = None
) -> dict[str, typing.Any]:
    '''
    Compute performance metrics for sample scenarios obtained using the method
    [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters).

    Before computing the metrics, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`,
    where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.

    The method returns a dictionary with two keys:

    - `problem`: The definition dictionary passed to sampling.
    - `indicator`: A `DataFrame` containing the `Scenario` column and one column per indicator,
      with scenario indices and corresponding indicator values.

    Before computing the indicators, both simulated and observed values are normalized using the formula
    `(v - min_o) / (max_o - min_o)`, where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.
    All negative and `None` observed values are removed before computing `min_o` and `max_o` to prevent errors during normalization.

    Args:
        sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file produced by `simulation_by_sobol_sample`.

        df_name (str): Name of the `DataFrame` within `sensitivity_simulation.json` from which to compute scenario indicators.

        sim_col (str): Name of the column in `df_name` containing simulated values.

        obs_file (str | pathlib.Path): Path to the CSV file containing observed data. The file must include a
            `date` column (used to merge simulated and observed data) and use a comma as the separator.

        date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings.

        obs_col (str): Name of the column in `obs_file` containing observed data.

        indicators (list[str]): List of indicator abbreviations selected from the available options in the property
            [`indicator_names`](https://swatmodel.github.io/pySWATPlus/api/performance_metrics/#pySWATPlus.PerformanceMetrics.indicator_names).

        json_file (str | pathlib.Path, optional): Path to a JSON file for saving the output `DataFrame` containing indicator values.
            If `None` (default), the `DataFrame` is not saved.

    Returns:
        Dictionary with two keys, `problem` and `indicator`, and their corresponding values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.scenario_indicators
        ),
        vars_values=locals()
    )

    # Validate abbreviation of indicators
    self._validate_indicator_abbr(
        indicators=indicators
    )

    # Observed DataFrame
    obs_df = utils._df_observe(
        obs_file=pathlib.Path(obs_file).resolve(),
        date_format=date_format,
        obs_col=obs_col
    )
    obs_df.columns = ['date', 'obs']

    # Retrieve sensitivity output
    sensitivity_sim = utils._sensitivity_output_retrieval(
        sensim_file=pathlib.Path(sensim_file).resolve(),
        df_name=df_name,
        add_problem=True,
        add_sample=False
    )

    # Empty DataFrame to store scenario indicators
    ind_df = pandas.DataFrame(
        columns=indicators
    )

    # Iterate scenarios
    for key, df in sensitivity_sim['scenario'].items():
        df = df[['date', sim_col]]
        df.columns = ['date', 'sim']
        # Merge scenario DataFrame with observed DataFrame
        merge_df = df.merge(
            right=obs_df.copy(),
            how='inner',
            on='date'
        )
        # Normalized DataFrame
        norm_df = utils._df_normalize(
            df=merge_df[['sim', 'obs']],
            norm_col='obs'
        )
        # Iterate indicators
        for indicator in indicators:
            # Store indicator value in DataFrame
            ind_val = PerformanceMetrics().compute_from_abbr(
                df=norm_df,
                sim_col='sim',
                obs_col='obs',
                indicator=indicator
            )
            ind_df.loc[key, indicator] = ind_val

    # Reset index to scenario column
    scnro_col = 'Scenario'
    ind_df = ind_df.reset_index(
        names=[scnro_col]
    )
    ind_df[scnro_col] = ind_df[scnro_col].astype(int)

    # Save DataFrame
    if json_file is not None:
        json_file = pathlib.Path(json_file).resolve()
        # Raise error for invalid JSON file extension
        validators._json_extension(
            json_file=json_file
        )
        # Write DataFrame to the JSON file
        ind_df.to_json(
            path_or_buf=json_file,
            orient='records',
            indent=4
        )

    # Output dictionary
    output = {
        'problem': sensitivity_sim['problem'],
        'indicator': ind_df
    }

    return output