Skip to content

PerformanceMetrics

PerformanceMetrics

Provide functionality to compute errors between simulated and ovserved values.

Source code in pySWATPlus/performance_metrics.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
class PerformanceMetrics:
    '''
    Provide functionality to compute errors between simulated and ovserved values.
    '''

    @property
    def indicator_names(
        self
    ) -> dict[str, str]:
        '''
        Return a dictionary of available indicators. Keys are the commonly used abbreviations,
        and values are the corresponding full indicator names.
        '''

        abbr_name = {
            'NSE': 'Nash-Sutcliffe Efficiency',
            'KGE': 'Kling-Gupta Efficiency',
            'MSE': 'Mean Squared Error',
            'RMSE': 'Root Mean Squared Error',
            'PBIAS': 'Percent Bias',
            'MARE': 'Mean Absolute Relative Error'
        }

        return abbr_name

    def compute_nse(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the [`Nash-Sutcliffe Efficiency`](https://doi.org/10.1016/0022-1694(70)90255-6)
        metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_nse
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        numerator = ((sim_arr - obs_arr).pow(2)).sum()
        denominator = ((obs_arr - obs_arr.mean()).pow(2)).sum()
        output = float(1 - numerator / denominator)

        return output

    def compute_kge(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the [`Kling-Gupta Efficiency`](https://doi.org/10.1016/j.jhydrol.2009.08.003)
        metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_kge
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Pearson correlation coefficient (r)
        r = sim_arr.corr(obs_arr)

        # Variability of prediction errors
        alpha = sim_arr.std() / obs_arr.std()

        # Bias
        beta = sim_arr.mean() / obs_arr.mean()

        # Output
        output = float(1 - pow(pow(r - 1, 2) + pow(alpha - 1, 2) + pow(beta - 1, 2), 0.5))

        return output

    def compute_mse(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Mean Squared Error` metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_mse
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        output = float(((sim_arr - obs_arr).pow(2)).mean())

        return output

    def compute_rmse(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Root Mean Squared Error` metric between simulated and observed values.

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_rmse
            ),
            vars_values=locals()
        )

        # computer MSE error
        mse_value = self.compute_mse(
            df=df,
            sim_col=sim_col,
            obs_col=obs_col
        )

        # Output
        output = float(pow(mse_value, 0.5))

        return output

    def compute_pbias(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Percent Bias` metric between simulated and observed values.

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_pbias
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        output = float(100 * (sim_arr - obs_arr).sum() / obs_arr.sum())

        return output

    def compute_mare(
        self,
        df: pandas.DataFrame,
        sim_col: str,
        obs_col: str
    ) -> float:
        '''
        Calculate the `Mean Absolute Relative Error` metric between simulated and observed values

        Args:
            df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

            sim_col (str): Name of the column containing simulated values.

            obs_col (str): Name of the column containing observed values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.compute_mare
            ),
            vars_values=locals()
        )

        # Simulation values
        sim_arr = df[sim_col].astype(float)

        # Observed values
        obs_arr = df[obs_col].astype(float)

        # Output
        output = float(((obs_arr - sim_arr) / obs_arr).abs().mean())

        return output

    def scenario_indicators(
        self,
        sensim_file: str | pathlib.Path,
        df_name: str,
        sim_col: str,
        obs_file: str | pathlib.Path,
        date_format: str,
        obs_col: str,
        indicators: list[str],
        json_file: typing.Optional[str | pathlib.Path] = None
    ) -> dict[str, typing.Any]:
        '''
        Compute performance indicators for sample scenarios obtained using the method
        [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters).

        Before computing the indicators, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`,
        where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.

        The method returns a dictionary with two keys:

        - `problem`: The definition dictionary passed to sampling.
        - `indicator`: A `DataFrame` containing the `Scenario` column and one column per indicator,
          with scenario indices and corresponding indicator values.

        Before computing the indicators, both simulated and observed values are normalized using the formula
        `(v - min_o) / (max_o - min_o)`, where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.

        Note:
            All negative and `None` observed values are removed before computing `min_o` and `max_o` to prevent errors during normalization.

        Args:
            sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file produced by `simulation_by_sobol_sample`.

            df_name (str): Name of the `DataFrame` within `sensitivity_simulation.json` from which to compute scenario indicators.

            sim_col (str): Name of the column in `df_name` containing simulated values.

            obs_file (str | pathlib.Path): Path to the CSV file containing observed data. The file must include a
                `date` column (used to merge simulated and observed data) and use a comma as the separator.

            date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings.

            obs_col (str): Name of the column in `obs_file` containing observed data.

            indicators (list[str]): List of performance indicators to compute. Available options:

                - `NSE`: Nash–Sutcliffe Efficiency
                - `KGE`: Kling–Gupta Efficiency
                - `MSE`: Mean Squared Error
                - `RMSE`: Root Mean Squared Error
                - `PBIAS`: Percent Bias
                - `MARE`: Mean Absolute Relative Error

            json_file (str | pathlib.Path, optional): Path to a JSON file for saving the output `DataFrame` containing indicator values.
                If `None` (default), the `DataFrame` is not saved.

        Returns:
            Dictionary with two keys, `problem` and `indicator`, and their corresponding values.
        '''

        # Check input variables type
        validators._variable_origin_static_type(
            vars_types=typing.get_type_hints(
                obj=self.scenario_indicators
            ),
            vars_values=locals()
        )

        # Check valid name of metric
        abbr_indicator = self.indicator_names
        for indicator in indicators:
            if indicator not in abbr_indicator:
                raise ValueError(
                    f'Invalid name "{indicator}" in "indicatiors" list; expected names are {list(abbr_indicator.keys())}'
                )

        # Observed DataFrame
        obs_df = utils._df_observe(
            obs_file=pathlib.Path(obs_file).resolve(),
            date_format=date_format,
            obs_col=obs_col
        )
        obs_df.columns = ['date', 'obs']

        # Retrieve sensitivity output
        sensitivity_sim = utils._sensitivity_output_retrieval(
            sensim_file=pathlib.Path(sensim_file).resolve(),
            df_name=df_name,
            add_problem=True,
            add_sample=False
        )

        # Empty DataFrame to store scenario indicators
        ind_df = pandas.DataFrame(
            columns=indicators
        )

        # Iterate scenarios
        for key, df in sensitivity_sim['scenario'].items():
            df = df[['date', sim_col]]
            df.columns = ['date', 'sim']
            # Merge scenario DataFrame with observed DataFrame
            merge_df = df.merge(
                right=obs_df.copy(),
                how='inner',
                on='date'
            )
            # Normalized DataFrame
            norm_df = utils._df_normalize(
                df=merge_df[['sim', 'obs']],
                norm_col='obs'
            )
            # Iterate indicators
            for indicator in indicators:
                # Method from indicator abbreviation
                indicator_method = getattr(
                    self,
                    f'compute_{indicator.lower()}'
                )
                # Indicator value
                key_indicator = indicator_method(
                    df=norm_df,
                    sim_col='sim',
                    obs_col='obs'
                )
                # Store indicator value in DataFrame
                ind_df.loc[key, indicator] = key_indicator

        # Reset index to scenario column
        scnro_col = 'Scenario'
        ind_df = ind_df.reset_index(
            names=[scnro_col]
        )
        ind_df[scnro_col] = ind_df[scnro_col].astype(int)

        # Save DataFrame
        if json_file is not None:
            json_file = pathlib.Path(json_file).resolve()
            # Raise error for invalid JSON file extension
            validators._json_extension(
                json_file=json_file
            )
            # Write DataFrame to the JSON file
            ind_df.to_json(
                path_or_buf=json_file,
                orient='records',
                indent=4
            )

        # Output dictionary
        output = {
            'problem': sensitivity_sim['problem'],
            'indicator': ind_df
        }

        return output

indicator_names: dict[str, str] property

Return a dictionary of available indicators. Keys are the commonly used abbreviations, and values are the corresponding full indicator names.

compute_kge(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Kling-Gupta Efficiency metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
Source code in pySWATPlus/performance_metrics.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def compute_kge(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the [`Kling-Gupta Efficiency`](https://doi.org/10.1016/j.jhydrol.2009.08.003)
    metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_kge
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Pearson correlation coefficient (r)
    r = sim_arr.corr(obs_arr)

    # Variability of prediction errors
    alpha = sim_arr.std() / obs_arr.std()

    # Bias
    beta = sim_arr.mean() / obs_arr.mean()

    # Output
    output = float(1 - pow(pow(r - 1, 2) + pow(alpha - 1, 2) + pow(beta - 1, 2), 0.5))

    return output

compute_mare(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Mean Absolute Relative Error metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
Source code in pySWATPlus/performance_metrics.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def compute_mare(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Mean Absolute Relative Error` metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_mare
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    output = float(((obs_arr - sim_arr) / obs_arr).abs().mean())

    return output

compute_mse(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Mean Squared Error metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
Source code in pySWATPlus/performance_metrics.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def compute_mse(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Mean Squared Error` metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_mse
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    output = float(((sim_arr - obs_arr).pow(2)).mean())

    return output

compute_nse(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Nash-Sutcliffe Efficiency metric between simulated and observed values

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
Source code in pySWATPlus/performance_metrics.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def compute_nse(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the [`Nash-Sutcliffe Efficiency`](https://doi.org/10.1016/0022-1694(70)90255-6)
    metric between simulated and observed values

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_nse
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    numerator = ((sim_arr - obs_arr).pow(2)).sum()
    denominator = ((obs_arr - obs_arr.mean()).pow(2)).sum()
    output = float(1 - numerator / denominator)

    return output

compute_pbias(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Percent Bias metric between simulated and observed values.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
Source code in pySWATPlus/performance_metrics.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def compute_pbias(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Percent Bias` metric between simulated and observed values.

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_pbias
        ),
        vars_values=locals()
    )

    # Simulation values
    sim_arr = df[sim_col].astype(float)

    # Observed values
    obs_arr = df[obs_col].astype(float)

    # Output
    output = float(100 * (sim_arr - obs_arr).sum() / obs_arr.sum())

    return output

compute_rmse(df: pandas.DataFrame, sim_col: str, obs_col: str) -> float

Calculate the Root Mean Squared Error metric between simulated and observed values.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing at least two columns with simulated and observed values.

required
sim_col str

Name of the column containing simulated values.

required
obs_col str

Name of the column containing observed values.

required
Source code in pySWATPlus/performance_metrics.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def compute_rmse(
    self,
    df: pandas.DataFrame,
    sim_col: str,
    obs_col: str
) -> float:
    '''
    Calculate the `Root Mean Squared Error` metric between simulated and observed values.

    Args:
        df (pandas.DataFrame): DataFrame containing at least two columns with simulated and observed values.

        sim_col (str): Name of the column containing simulated values.

        obs_col (str): Name of the column containing observed values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.compute_rmse
        ),
        vars_values=locals()
    )

    # computer MSE error
    mse_value = self.compute_mse(
        df=df,
        sim_col=sim_col,
        obs_col=obs_col
    )

    # Output
    output = float(pow(mse_value, 0.5))

    return output

scenario_indicators(sensim_file: str | pathlib.Path, df_name: str, sim_col: str, obs_file: str | pathlib.Path, date_format: str, obs_col: str, indicators: list[str], json_file: typing.Optional[str | pathlib.Path] = None) -> dict[str, typing.Any]

Compute performance indicators for sample scenarios obtained using the method simulation_by_sample_parameters.

Before computing the indicators, simulated and observed values are normalized using the formula (v - min_o) / (max_o - min_o), where min_o and max_o represent the minimum and maximum of observed values, respectively.

The method returns a dictionary with two keys:

  • problem: The definition dictionary passed to sampling.
  • indicator: A DataFrame containing the Scenario column and one column per indicator, with scenario indices and corresponding indicator values.

Before computing the indicators, both simulated and observed values are normalized using the formula (v - min_o) / (max_o - min_o), where min_o and max_o represent the minimum and maximum of observed values, respectively.

Note

All negative and None observed values are removed before computing min_o and max_o to prevent errors during normalization.

Parameters:

Name Type Description Default
sensim_file str | Path

Path to the sensitivity_simulation.json file produced by simulation_by_sobol_sample.

required
df_name str

Name of the DataFrame within sensitivity_simulation.json from which to compute scenario indicators.

required
sim_col str

Name of the column in df_name containing simulated values.

required
obs_file str | Path

Path to the CSV file containing observed data. The file must include a date column (used to merge simulated and observed data) and use a comma as the separator.

required
date_format str

Date format of the date column in obs_file, used to parse datetime.date objects from date strings.

required
obs_col str

Name of the column in obs_file containing observed data.

required
indicators list[str]

List of performance indicators to compute. Available options:

  • NSE: Nash–Sutcliffe Efficiency
  • KGE: Kling–Gupta Efficiency
  • MSE: Mean Squared Error
  • RMSE: Root Mean Squared Error
  • PBIAS: Percent Bias
  • MARE: Mean Absolute Relative Error
required
json_file str | Path

Path to a JSON file for saving the output DataFrame containing indicator values. If None (default), the DataFrame is not saved.

None

Returns:

Type Description
dict[str, Any]

Dictionary with two keys, problem and indicator, and their corresponding values.

Source code in pySWATPlus/performance_metrics.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
def scenario_indicators(
    self,
    sensim_file: str | pathlib.Path,
    df_name: str,
    sim_col: str,
    obs_file: str | pathlib.Path,
    date_format: str,
    obs_col: str,
    indicators: list[str],
    json_file: typing.Optional[str | pathlib.Path] = None
) -> dict[str, typing.Any]:
    '''
    Compute performance indicators for sample scenarios obtained using the method
    [`simulation_by_sample_parameters`](https://swat-model.github.io/pySWATPlus/api/sensitivity_analyzer/#pySWATPlus.SensitivityAnalyzer.simulation_by_sample_parameters).

    Before computing the indicators, simulated and observed values are normalized using the formula `(v - min_o) / (max_o - min_o)`,
    where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.

    The method returns a dictionary with two keys:

    - `problem`: The definition dictionary passed to sampling.
    - `indicator`: A `DataFrame` containing the `Scenario` column and one column per indicator,
      with scenario indices and corresponding indicator values.

    Before computing the indicators, both simulated and observed values are normalized using the formula
    `(v - min_o) / (max_o - min_o)`, where `min_o` and `max_o` represent the minimum and maximum of observed values, respectively.

    Note:
        All negative and `None` observed values are removed before computing `min_o` and `max_o` to prevent errors during normalization.

    Args:
        sensim_file (str | pathlib.Path): Path to the `sensitivity_simulation.json` file produced by `simulation_by_sobol_sample`.

        df_name (str): Name of the `DataFrame` within `sensitivity_simulation.json` from which to compute scenario indicators.

        sim_col (str): Name of the column in `df_name` containing simulated values.

        obs_file (str | pathlib.Path): Path to the CSV file containing observed data. The file must include a
            `date` column (used to merge simulated and observed data) and use a comma as the separator.

        date_format (str): Date format of the `date` column in `obs_file`, used to parse `datetime.date` objects from date strings.

        obs_col (str): Name of the column in `obs_file` containing observed data.

        indicators (list[str]): List of performance indicators to compute. Available options:

            - `NSE`: Nash–Sutcliffe Efficiency
            - `KGE`: Kling–Gupta Efficiency
            - `MSE`: Mean Squared Error
            - `RMSE`: Root Mean Squared Error
            - `PBIAS`: Percent Bias
            - `MARE`: Mean Absolute Relative Error

        json_file (str | pathlib.Path, optional): Path to a JSON file for saving the output `DataFrame` containing indicator values.
            If `None` (default), the `DataFrame` is not saved.

    Returns:
        Dictionary with two keys, `problem` and `indicator`, and their corresponding values.
    '''

    # Check input variables type
    validators._variable_origin_static_type(
        vars_types=typing.get_type_hints(
            obj=self.scenario_indicators
        ),
        vars_values=locals()
    )

    # Check valid name of metric
    abbr_indicator = self.indicator_names
    for indicator in indicators:
        if indicator not in abbr_indicator:
            raise ValueError(
                f'Invalid name "{indicator}" in "indicatiors" list; expected names are {list(abbr_indicator.keys())}'
            )

    # Observed DataFrame
    obs_df = utils._df_observe(
        obs_file=pathlib.Path(obs_file).resolve(),
        date_format=date_format,
        obs_col=obs_col
    )
    obs_df.columns = ['date', 'obs']

    # Retrieve sensitivity output
    sensitivity_sim = utils._sensitivity_output_retrieval(
        sensim_file=pathlib.Path(sensim_file).resolve(),
        df_name=df_name,
        add_problem=True,
        add_sample=False
    )

    # Empty DataFrame to store scenario indicators
    ind_df = pandas.DataFrame(
        columns=indicators
    )

    # Iterate scenarios
    for key, df in sensitivity_sim['scenario'].items():
        df = df[['date', sim_col]]
        df.columns = ['date', 'sim']
        # Merge scenario DataFrame with observed DataFrame
        merge_df = df.merge(
            right=obs_df.copy(),
            how='inner',
            on='date'
        )
        # Normalized DataFrame
        norm_df = utils._df_normalize(
            df=merge_df[['sim', 'obs']],
            norm_col='obs'
        )
        # Iterate indicators
        for indicator in indicators:
            # Method from indicator abbreviation
            indicator_method = getattr(
                self,
                f'compute_{indicator.lower()}'
            )
            # Indicator value
            key_indicator = indicator_method(
                df=norm_df,
                sim_col='sim',
                obs_col='obs'
            )
            # Store indicator value in DataFrame
            ind_df.loc[key, indicator] = key_indicator

    # Reset index to scenario column
    scnro_col = 'Scenario'
    ind_df = ind_df.reset_index(
        names=[scnro_col]
    )
    ind_df[scnro_col] = ind_df[scnro_col].astype(int)

    # Save DataFrame
    if json_file is not None:
        json_file = pathlib.Path(json_file).resolve()
        # Raise error for invalid JSON file extension
        validators._json_extension(
            json_file=json_file
        )
        # Write DataFrame to the JSON file
        ind_df.to_json(
            path_or_buf=json_file,
            orient='records',
            indent=4
        )

    # Output dictionary
    output = {
        'problem': sensitivity_sim['problem'],
        'indicator': ind_df
    }

    return output