From 88c07266c5d5e5578defc6fe96659ec2fd023793 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Wed, 6 Dec 2023 14:01:08 +0100 Subject: [PATCH 01/16] Added function _weighted_cov_corr and modified cov and corr to call it if parameter weights is not None --- xarray/core/computation.py | 80 +++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index ed2c733d4ca..b188e060289 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1281,7 +1281,11 @@ def apply_ufunc( def cov( - da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None, ddof: int = 1 + da_a: T_DataArray, + da_b: T_DataArray, + dim: Dims = None, + ddof: int = 1, + weights: T_DataArray = None, ) -> T_DataArray: """ Compute covariance between two DataArray objects along a shared dimension. @@ -1297,6 +1301,8 @@ def cov( ddof : int, default: 1 If ddof=1, covariance is normalized by N-1, giving an unbiased estimate, else normalization is by N. + weights : DataArray, default: None + Array of weights. Returns ------- @@ -1358,11 +1364,22 @@ def cov( "Only xr.DataArray is supported." f"Given {[type(arr) for arr in [da_a, da_b]]}." ) - - return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") + if weights is not None: + if not isinstance(weights, DataArray): + raise TypeError( + "Only xr.DataArray is supported." + f"Given {type(weights)}." + ) + return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov") + else + return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") -def corr(da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None) -> T_DataArray: +def corr(da_a: T_DataArray, + da_b: T_DataArray, + dim: Dims = None, + weights: T_DataArray = None, +) -> T_DataArray: """ Compute the Pearson correlation coefficient between two DataArray objects along a shared dimension. @@ -1375,6 +1392,8 @@ def corr(da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None) -> T_DataArray: Array to compute. dim : str, iterable of hashable, "..." or None, optional The dimension along which the correlation will be computed + weights : DataArray, default: None + Array of weights. Returns ------- @@ -1437,7 +1456,15 @@ def corr(da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None) -> T_DataArray: f"Given {[type(arr) for arr in [da_a, da_b]]}." ) - return _cov_corr(da_a, da_b, dim=dim, method="corr") + if weights is not None: + if not isinstance(weights, DataArray): + raise TypeError( + "Only xr.DataArray is supported." + f"Given {type(weights)}." + ) + return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") + else + return _cov_corr(da_a, da_b, dim=dim, method="corr") def _cov_corr( @@ -1481,6 +1508,49 @@ def _cov_corr( corr = cov / (da_a_std * da_b_std) return corr +def _weighted_cov_corr( + da_a: T_DataArray, + da_b: T_DataArray, + weights: T_DataArray, + dim: Dims = None, + ddof: int = 0, + method: Literal["cov", "corr", None] = None, +) -> T_DataArray: + """ + Internal method for weighted xr.cov() and xr.corr(), extending + _cov_corr() functionality. + """ + # 1. Broadcast the two arrays + da_a, da_b = align(da_a, da_b, join="inner", copy=False) + + # 2. Ignore the nans + valid_values = da_a.notnull() & da_b.notnull() + da_a = da_a.where(valid_values) + da_b = da_b.where(valid_values) + + # 3. Detrend along the given dim + demeaned_da_a = da_a - da_a.weighted(weights).mean(dim=dim) + demeaned_da_b = da_b - da_b.weighted(weights).mean(dim=dim) + + # 4. Compute covariance along the given dim + # N.B. `skipna=True` is required or auto-covariance is computed incorrectly. E.g. + # Try xr.cov(da,da) for da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]) + cov = (demeaned_da_a.conj() * demeaned_da_b).weighted(weights).mean( + dim=dim, skipna=True, min_count=1 + ) + + if method == "cov": + # Adjust covariance for degrees of freedom + valid_count = valid_values.sum(dim) + adjust = valid_count / (valid_count - ddof) + return cov * adjust + + else: + # Compute std and corr + da_a_std = da_a.weighted(weights).std(dim=dim) + da_b_std = da_b.weighted(weights).std(dim=dim) + corr = cov / (da_a_std * da_b_std) + return corr def cross( a: DataArray | Variable, b: DataArray | Variable, *, dim: Hashable From 0d2f548d0bf220d46b3202a314c6b19486b13901 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Wed, 6 Dec 2023 16:55:00 +0100 Subject: [PATCH 02/16] Correct two indentation errors --- xarray/core/computation.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index b188e060289..b6b9f1dc1c2 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1366,10 +1366,10 @@ def cov( ) if weights is not None: if not isinstance(weights, DataArray): - raise TypeError( - "Only xr.DataArray is supported." - f"Given {type(weights)}." - ) + raise TypeError( + "Only xr.DataArray is supported." + f"Given {type(weights)}." + ) return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov") else return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") @@ -1458,10 +1458,10 @@ def corr(da_a: T_DataArray, if weights is not None: if not isinstance(weights, DataArray): - raise TypeError( - "Only xr.DataArray is supported." - f"Given {type(weights)}." - ) + raise TypeError( + "Only xr.DataArray is supported." + f"Given {type(weights)}." + ) return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") else return _cov_corr(da_a, da_b, dim=dim, method="corr") From 31302a741b700e81d3ee2271edee7c3976e689aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:00:24 +0000 Subject: [PATCH 03/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/computation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index b6b9f1dc1c2..f4d3c0113dc 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1376,8 +1376,8 @@ def cov( def corr(da_a: T_DataArray, - da_b: T_DataArray, - dim: Dims = None, + da_b: T_DataArray, + dim: Dims = None, weights: T_DataArray = None, ) -> T_DataArray: """ From d801d4ae5dd21fff78a0fb1f075f910bd5033efc Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Wed, 6 Dec 2023 17:07:31 +0100 Subject: [PATCH 04/16] Stupid typo --- xarray/core/computation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f4d3c0113dc..5c41b546a04 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1371,7 +1371,7 @@ def cov( f"Given {type(weights)}." ) return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov") - else + else: return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") @@ -1463,7 +1463,7 @@ def corr(da_a: T_DataArray, f"Given {type(weights)}." ) return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") - else + else: return _cov_corr(da_a, da_b, dim=dim, method="corr") From 042bb281f73efeae6a1f4275a0fb195c4212c45e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:08:18 +0000 Subject: [PATCH 05/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/computation.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 5c41b546a04..7d629cf3938 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1366,19 +1366,19 @@ def cov( ) if weights is not None: if not isinstance(weights, DataArray): - raise TypeError( - "Only xr.DataArray is supported." - f"Given {type(weights)}." - ) - return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov") + raise TypeError("Only xr.DataArray is supported." f"Given {type(weights)}.") + return _weighted_cov_corr( + da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov" + ) else: return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") -def corr(da_a: T_DataArray, - da_b: T_DataArray, - dim: Dims = None, - weights: T_DataArray = None, +def corr( + da_a: T_DataArray, + da_b: T_DataArray, + dim: Dims = None, + weights: T_DataArray = None, ) -> T_DataArray: """ Compute the Pearson correlation coefficient between @@ -1458,10 +1458,7 @@ def corr(da_a: T_DataArray, if weights is not None: if not isinstance(weights, DataArray): - raise TypeError( - "Only xr.DataArray is supported." - f"Given {type(weights)}." - ) + raise TypeError("Only xr.DataArray is supported." f"Given {type(weights)}.") return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") else: return _cov_corr(da_a, da_b, dim=dim, method="corr") @@ -1508,6 +1505,7 @@ def _cov_corr( corr = cov / (da_a_std * da_b_std) return corr + def _weighted_cov_corr( da_a: T_DataArray, da_b: T_DataArray, @@ -1535,8 +1533,10 @@ def _weighted_cov_corr( # 4. Compute covariance along the given dim # N.B. `skipna=True` is required or auto-covariance is computed incorrectly. E.g. # Try xr.cov(da,da) for da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]) - cov = (demeaned_da_a.conj() * demeaned_da_b).weighted(weights).mean( - dim=dim, skipna=True, min_count=1 + cov = ( + (demeaned_da_a.conj() * demeaned_da_b) + .weighted(weights) + .mean(dim=dim, skipna=True, min_count=1) ) if method == "cov": @@ -1552,6 +1552,7 @@ def _weighted_cov_corr( corr = cov / (da_a_std * da_b_std) return corr + def cross( a: DataArray | Variable, b: DataArray | Variable, *, dim: Hashable ) -> DataArray | Variable: From fcf86322f84d27fa5d85e474ce045b81006024da Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Wed, 6 Dec 2023 17:17:02 +0100 Subject: [PATCH 06/16] Remove the min_count argument from mean --- xarray/core/computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 5c41b546a04..0a777e05c91 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1536,7 +1536,7 @@ def _weighted_cov_corr( # N.B. `skipna=True` is required or auto-covariance is computed incorrectly. E.g. # Try xr.cov(da,da) for da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]) cov = (demeaned_da_a.conj() * demeaned_da_b).weighted(weights).mean( - dim=dim, skipna=True, min_count=1 + dim=dim, skipna=True ) if method == "cov": From d35fa9713f6a5075de6c5baa9c5e3a70e9cd45f6 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Thu, 7 Dec 2023 17:31:09 +0100 Subject: [PATCH 07/16] Unified the code for weighted and unweighted _cov_corr --- xarray/core/computation.py | 50 ++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 9c6112331a0..2d0b2b7c1bb 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1367,11 +1367,7 @@ def cov( if weights is not None: if not isinstance(weights, DataArray): raise TypeError("Only xr.DataArray is supported." f"Given {type(weights)}.") - return _weighted_cov_corr( - da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov" - ) - else: - return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") + return _cov_corr(da_a, da_b, weights=weights, dim=dim, ddof=ddof, method="cov") def corr( @@ -1455,16 +1451,13 @@ def corr( "Only xr.DataArray is supported." f"Given {[type(arr) for arr in [da_a, da_b]]}." ) - if weights is not None: if not isinstance(weights, DataArray): raise TypeError("Only xr.DataArray is supported." f"Given {type(weights)}.") - return _weighted_cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") - else: - return _cov_corr(da_a, da_b, dim=dim, method="corr") + return _cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") -def _cov_corr( +def old_cov_corr( da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None, @@ -1506,17 +1499,17 @@ def _cov_corr( return corr -def _weighted_cov_corr( +def _cov_corr( da_a: T_DataArray, da_b: T_DataArray, - weights: T_DataArray, + weights: T_DataArray = None, dim: Dims = None, ddof: int = 0, method: Literal["cov", "corr", None] = None, ) -> T_DataArray: """ - Internal method for weighted xr.cov() and xr.corr(), extending - _cov_corr() functionality. + Internal method for xr.cov() and xr.corr() so only have to + sanitize the input arrays once and we don't repeat code. """ # 1. Broadcast the two arrays da_a, da_b = align(da_a, da_b, join="inner", copy=False) @@ -1527,17 +1520,24 @@ def _weighted_cov_corr( da_b = da_b.where(valid_values) # 3. Detrend along the given dim - demeaned_da_a = da_a - da_a.weighted(weights).mean(dim=dim) - demeaned_da_b = da_b - da_b.weighted(weights).mean(dim=dim) + if weights is not None: + demeaned_da_a = da_a - da_a.weighted(weights).mean(dim=dim) + demeaned_da_b = da_b - da_b.weighted(weights).mean(dim=dim) + else: + demeaned_da_a = da_a - da_a.mean(dim=dim) + demeaned_da_b = da_b - da_b.mean(dim=dim) # 4. Compute covariance along the given dim # N.B. `skipna=True` is required or auto-covariance is computed incorrectly. E.g. # Try xr.cov(da,da) for da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]) - cov = ( - (demeaned_da_a.conj() * demeaned_da_b) - .weighted(weights) - .mean(dim=dim, skipna=True) - ) + if weights is not None: + cov = ( + (demeaned_da_a.conj() * demeaned_da_b) + .weighted(weights) + .mean(dim=dim, skipna=True) + ) + else: + cov = (demeaned_da_a.conj() * demeaned_da_b).mean(dim=dim, skipna=True) if method == "cov": # Adjust covariance for degrees of freedom @@ -1547,8 +1547,12 @@ def _weighted_cov_corr( else: # Compute std and corr - da_a_std = da_a.weighted(weights).std(dim=dim) - da_b_std = da_b.weighted(weights).std(dim=dim) + if weights is not None: + da_a_std = da_a.weighted(weights).std(dim=dim) + da_b_std = da_b.weighted(weights).std(dim=dim) + else: + da_a_std = da_a.std(dim=dim) + da_b_std = da_b.std(dim=dim) corr = cov / (da_a_std * da_b_std) return corr From 7c646a778ce0b90540d39a746ee4fda6e0301e95 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Fri, 8 Dec 2023 11:01:24 +0100 Subject: [PATCH 08/16] Remove old _cov_corr function after checking that new version produces same results when weights=None or weights=xr.DataArray(1) --- xarray/core/computation.py | 42 -------------------------------------- 1 file changed, 42 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 2d0b2b7c1bb..adc435b39aa 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1457,48 +1457,6 @@ def corr( return _cov_corr(da_a, da_b, weights=weights, dim=dim, method="corr") -def old_cov_corr( - da_a: T_DataArray, - da_b: T_DataArray, - dim: Dims = None, - ddof: int = 0, - method: Literal["cov", "corr", None] = None, -) -> T_DataArray: - """ - Internal method for xr.cov() and xr.corr() so only have to - sanitize the input arrays once and we don't repeat code. - """ - # 1. Broadcast the two arrays - da_a, da_b = align(da_a, da_b, join="inner", copy=False) - - # 2. Ignore the nans - valid_values = da_a.notnull() & da_b.notnull() - da_a = da_a.where(valid_values) - da_b = da_b.where(valid_values) - valid_count = valid_values.sum(dim) - ddof - - # 3. Detrend along the given dim - demeaned_da_a = da_a - da_a.mean(dim=dim) - demeaned_da_b = da_b - da_b.mean(dim=dim) - - # 4. Compute covariance along the given dim - # N.B. `skipna=True` is required or auto-covariance is computed incorrectly. E.g. - # Try xr.cov(da,da) for da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]) - cov = (demeaned_da_a.conj() * demeaned_da_b).sum( - dim=dim, skipna=True, min_count=1 - ) / (valid_count) - - if method == "cov": - return cov - - else: - # compute std + corr - da_a_std = da_a.std(dim=dim) - da_b_std = da_b.std(dim=dim) - corr = cov / (da_a_std * da_b_std) - return corr - - def _cov_corr( da_a: T_DataArray, da_b: T_DataArray, From 20936e667931872c31fc6d5a3f9b7cab171c98cc Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Fri, 8 Dec 2023 11:38:03 +0100 Subject: [PATCH 09/16] Added examples that use weights for cov and corr --- xarray/core/computation.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index adc435b39aa..730a7e9b2e0 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1356,6 +1356,23 @@ def cov( array([ 0.2 , -0.5 , 1.69333333]) Coordinates: * space (space) >> weights = DataArray( + ... [4, 2, 1], + ... dims=("space"), + ... coords=[ + ... ("space", ["IA", "IL", "IN"]), + ... ], + ... ) + >>> weights + + array([4, 2, 1]) + Coordinates: + * space (space) >> xr.cov(da_a, da_b, dim="space", weights=weights) + + array([-4.69346939, -4.49632653, -3.37959184]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 """ from xarray.core.dataarray import DataArray @@ -1443,6 +1460,23 @@ def corr( array([ 1., -1., 1.]) Coordinates: * space (space) >> weights = DataArray( + ... [4, 2, 1], + ... dims=("space"), + ... coords=[ + ... ("space", ["IA", "IL", "IN"]), + ... ], + ... ) + >>> weights + + array([4, 2, 1]) + Coordinates: + * space (space) >> xr.corr(da_a, da_b, dim="space", weights=weights) + + array([-0.50240504, -0.83215028, -0.99057446]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 """ from xarray.core.dataarray import DataArray From 3ce7d6e4ad56097ab3c8edd7edcd1fa8d27cdbd3 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Fri, 8 Dec 2023 19:43:25 +0100 Subject: [PATCH 10/16] Added two tests for weighted correlation and covariance --- xarray/tests/test_computation.py | 91 ++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 396507652c6..73d7c71f86d 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1775,6 +1775,97 @@ def test_complex_cov() -> None: assert abs(actual.item()) == 2 +@pytest.mark.parametrize("weighted", [True, False]) +def test_bilinear_cov_corr(weighted: bool) -> None: + # Test the bilinear properties of covariance and correlation + da = xr.DataArray( + np.random.random((3, 21, 4)), + coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)}, + dims=("a", "time", "x"), + ) + db = xr.DataArray( + np.random.random((3, 21, 4)), + coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)}, + dims=("a", "time", "x"), + ) + dc = xr.DataArray( + np.random.random((3, 21, 4)), + coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)}, + dims=("a", "time", "x"), + ) + if weighted: + weights = xr.DataArray( + np.abs(np.random.random(4)), + dims=("x"), + ) + else: + weights = None + k = np.random.random(1)[0] + + # Test covariance properties + assert_allclose( + xr.cov(da + k, db, weights=weights), xr.cov(da, db, weights=weights) + ) + assert_allclose( + xr.cov(da, db + k, weights=weights), xr.cov(da, db, weights=weights) + ) + assert_allclose( + xr.cov(da + dc, db, weights=weights), + xr.cov(da, db, weights=weights) + xr.cov(dc, db, weights=weights), + ) + assert_allclose( + xr.cov(da, db + dc, weights=weights), + xr.cov(da, db, weights=weights) + xr.cov(da, dc, weights=weights), + ) + assert_allclose( + xr.cov(k * da, db, weights=weights), k * xr.cov(da, db, weights=weights) + ) + assert_allclose( + xr.cov(da, k * db, weights=weights), k * xr.cov(da, db, weights=weights) + ) + + # Test correlation properties + assert_allclose( + xr.corr(da + k, db, weights=weights), xr.corr(da, db, weights=weights) + ) + assert_allclose( + xr.corr(da, db + k, weights=weights), xr.corr(da, db, weights=weights) + ) + assert_allclose( + xr.corr(k * da, db, weights=weights), xr.corr(da, db, weights=weights) + ) + assert_allclose( + xr.corr(da, k * db, weights=weights), xr.corr(da, db, weights=weights) + ) + + +def test_equally_weighted_cov_corr() -> None: + # Test that equal weights for all values produces same results as weights=None + da = xr.DataArray( + np.random.random((3, 21, 4)), + coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)}, + dims=("a", "time", "x"), + ) + db = xr.DataArray( + np.random.random((3, 21, 4)), + coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)}, + dims=("a", "time", "x"), + ) + # + assert_allclose( + xr.cov(da, db, weights=None), xr.cov(da, db, weights=xr.DataArray(1)) + ) + assert_allclose( + xr.cov(da, db, weights=None), xr.cov(da, db, weights=xr.DataArray(2)) + ) + assert_allclose( + xr.corr(da, db, weights=None), xr.corr(da, db, weights=xr.DataArray(1)) + ) + assert_allclose( + xr.corr(da, db, weights=None), xr.corr(da, db, weights=xr.DataArray(2)) + ) + + @requires_dask def test_vectorize_dask_new_output_dims() -> None: # regression test for GH3574 From f7bc41a66e8adec6ea06871b50b587554fff7c10 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Fri, 8 Dec 2023 20:42:12 +0100 Subject: [PATCH 11/16] Fix error in mypy, allow None as weights type. --- xarray/core/computation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 730a7e9b2e0..e1cdc5e16d5 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1285,7 +1285,7 @@ def cov( da_b: T_DataArray, dim: Dims = None, ddof: int = 1, - weights: T_DataArray = None, + weights: T_DataArray | None = None, ) -> T_DataArray: """ Compute covariance between two DataArray objects along a shared dimension. @@ -1391,7 +1391,7 @@ def corr( da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None, - weights: T_DataArray = None, + weights: T_DataArray | None = None, ) -> T_DataArray: """ Compute the Pearson correlation coefficient between @@ -1494,7 +1494,7 @@ def corr( def _cov_corr( da_a: T_DataArray, da_b: T_DataArray, - weights: T_DataArray = None, + weights: T_DataArray | None = None, dim: Dims = None, ddof: int = 0, method: Literal["cov", "corr", None] = None, From d61c1abb95c9f217e86292de50519fad0c31bf6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7=20Lled=C3=B3?= <89184300+lluritu@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:38:04 +0100 Subject: [PATCH 12/16] Update xarray/core/computation.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e1cdc5e16d5..9d4862504c9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1301,7 +1301,7 @@ def cov( ddof : int, default: 1 If ddof=1, covariance is normalized by N-1, giving an unbiased estimate, else normalization is by N. - weights : DataArray, default: None + weights : DataArray, optional Array of weights. Returns From db1c5c0e39bfd699daf77259b37c10506cdb8fde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7=20Lled=C3=B3?= <89184300+lluritu@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:38:16 +0100 Subject: [PATCH 13/16] Update xarray/core/computation.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 9d4862504c9..f582465a203 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1405,7 +1405,7 @@ def corr( Array to compute. dim : str, iterable of hashable, "..." or None, optional The dimension along which the correlation will be computed - weights : DataArray, default: None + weights : DataArray, optional Array of weights. Returns From b5694a115e85142a94be9148433430af4ce44cbd Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Mon, 11 Dec 2023 13:51:10 +0100 Subject: [PATCH 14/16] Info on new options for cov and corr in whatsnew --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7e99bc6a14e..1f78c4459dd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,8 @@ v2023.12.1 (unreleased) New Features ~~~~~~~~~~~~ - +- :py:meth:`xr.cov` and :py:meth:`xr.corr` now support using weights (:issue:`8527`, :pull:`7392`). + By `Llorenç Lledó `_. Breaking changes ~~~~~~~~~~~~~~~~ From f63acfa50d86989d5394a82749ae861e4d11d239 Mon Sep 17 00:00:00 2001 From: Llorenc Lledo Date: Mon, 11 Dec 2023 13:52:42 +0100 Subject: [PATCH 15/16] Info on new options for cov and corr in whatsnew --- doc/whats-new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1f78c4459dd..0274f5822db 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,7 @@ v2023.12.1 (unreleased) New Features ~~~~~~~~~~~~ + - :py:meth:`xr.cov` and :py:meth:`xr.corr` now support using weights (:issue:`8527`, :pull:`7392`). By `Llorenç Lledó `_. From 71cfac8941f885d42b2ba5699279f0e82fad284e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 8 Dec 2023 12:10:45 -0800 Subject: [PATCH 16/16] Fix typing --- xarray/core/computation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f582465a203..c6c7ef97e42 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -9,7 +9,7 @@ import warnings from collections import Counter from collections.abc import Hashable, Iterable, Iterator, Mapping, Sequence, Set -from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, Union, overload +from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, Union, cast, overload import numpy as np @@ -1535,7 +1535,10 @@ def _cov_corr( # Adjust covariance for degrees of freedom valid_count = valid_values.sum(dim) adjust = valid_count / (valid_count - ddof) - return cov * adjust + # I think the cast is required because of `T_DataArray` + `T_Xarray` (would be + # the same with `T_DatasetOrArray`) + # https://github.com/pydata/xarray/pull/8384#issuecomment-1784228026 + return cast(T_DataArray, cov * adjust) else: # Compute std and corr @@ -1546,7 +1549,7 @@ def _cov_corr( da_a_std = da_a.std(dim=dim) da_b_std = da_b.std(dim=dim) corr = cov / (da_a_std * da_b_std) - return corr + return cast(T_DataArray, corr) def cross(