Skip to content

Commit 60471c2

Browse files
committed
Merge branch 'issue-59652' of https://github.com/eicchen/pandas into issue-59652
2 parents e20b045 + 0823950 commit 60471c2

File tree

2 files changed

+91
-33
lines changed

2 files changed

+91
-33
lines changed

pandas/_libs/algos.pyx

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,8 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
345345
float64_t[:, ::1] result
346346
uint8_t[:, :] mask
347347
int64_t nobs = 0
348-
float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy, val
348+
float64_t xx, xy, meanx, meany, divisor, number, v1sq, v2sq, val
349+
float64_t* vx, vy
349350

350351
N, K = (<object>mat).shape
351352
if minp is None:
@@ -357,40 +358,55 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
357358
mask = np.isfinite(mat).view(np.uint8)
358359

359360
with nogil:
360-
for xi in range(K):
361-
for yi in range(xi + 1):
361+
# for xi in range(K):
362+
# for yi in range(xi + 1):
362363
# Welford's method for the variance-calculation
363364
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
364-
nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0
365-
for i in range(N):
366-
if mask[i, xi] and mask[i, yi]:
367-
vx = mat[i, xi]
368-
vy = mat[i, yi]
369-
nobs += 1
370-
dx = vx - meanx
371-
dy = vy - meany
372-
meanx += 1. / nobs * dx
373-
meany += 1. / nobs * dy
374-
ssqdmx += (vx - meanx) * dx
375-
ssqdmy += (vy - meany) * dy
376-
covxy += (vx - meanx) * dy
377-
378-
if nobs < minpv:
379-
result[xi, yi] = result[yi, xi] = NaN
380-
else:
381-
divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
382-
383-
# clip `covxy / divisor` to ensure coeff is within bounds
384-
if divisor != 0:
385-
val = covxy / divisor
386-
if not cov:
387-
if val > 1.0:
388-
val = 1.0
389-
elif val < -1.0:
390-
val = -1.0
391-
result[xi, yi] = result[yi, xi] = val
392-
else:
393-
result[xi, yi] = result[yi, xi] = NaN
365+
nobs = v1sq = v2sq = covxy = meanx = meany = 0
366+
for i in range(N):
367+
vx = ptr(mat[i,:])
368+
vy = mat[i,:]
369+
meanx = vx.mean()
370+
meany = vy.mean()
371+
for j in range(vx):
372+
xx = (vx[j]- meanx)
373+
yy = (vy[j]- meany)
374+
nobs += 1
375+
number += xx*yy
376+
v1sq += xx*xx
377+
v2sq += yy*yy
378+
val = number/sqrt(v1sq * v2sq)
379+
print(val)
380+
381+
# for i in range(N):
382+
# if mask[i, xi] and mask[i, yi]:
383+
# vx = mat[i, xi]
384+
# vy = mat[i, yi]
385+
# nobs += 1
386+
# dx = vx - meanx
387+
# dy = vy - meany
388+
# meanx += 1. / nobs * dx
389+
# meany += 1. / nobs * dy
390+
# ssqdmx += (vx - meanx) * dx
391+
# ssqdmy += (vy - meany) * dy
392+
# covxy += (vx - meanx) * dy
393+
394+
# if nobs < minpv:
395+
# result[xi, yi] = result[yi, xi] = NaN
396+
# else:esult[xi, yi] = result[yi, xi] = val
397+
# divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
398+
399+
# # clip `covxy / divisor` to ensure coeff is within bounds
400+
# if divisor != 0:
401+
# val = covxy / divisor
402+
# if not cov:
403+
# if val > 1.0:
404+
# val = 1.0
405+
# elif val < -1.0:
406+
# val = -1.0
407+
# result[xi, yi] = result[yi, xi] = val
408+
# else:
409+
# result[xi, yi] = result[yi, xi] = NaN
394410

395411
return result.base
396412

test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import pandas as pd
2+
3+
values = [
4+
{"col1": 30.0, "col2": 116.80000305175781},
5+
{"col1": None, "col2": None},
6+
{"col1": None, "col2": None},
7+
{"col1": None, "col2": None},
8+
{"col1": None, "col2": None},
9+
{"col1": None, "col2": None},
10+
{"col1": None, "col2": None},
11+
{"col1": None, "col2": None},
12+
{"col1": None, "col2": None},
13+
{"col1": None, "col2": None},
14+
{"col1": None, "col2": None},
15+
{"col1": None, "col2": None},
16+
{"col1": None, "col2": None},
17+
{"col1": 30.100000381469727, "col2": 116.8000030517578},
18+
{"col1": None, "col2": None},
19+
{"col1": None, "col2": None},
20+
]
21+
22+
data = pd.DataFrame(values)
23+
print(data.corr(method="pearson"))
24+
25+
# def corr_coef(X,Y):
26+
# x1 = np.array(X)
27+
# y1 = np.array(Y)
28+
# x_m=x1.mean()
29+
# y_m=y1.mean()
30+
# number=0
31+
# v1sq=0
32+
# v2sq=0
33+
# for i in range(len(x1)):
34+
# xx = (x1[i]-x_m)
35+
# yy = (y1[i]-y_m)
36+
# number+=xx*yy
37+
# v1sq+=xx*xx
38+
# v2sq+=yy*yy
39+
# return(number/(math.sqrt(v1sq*v2sq)))
40+
41+
data = data.dropna()
42+
# print(corr_coef(data.iloc[:,0],data.iloc[:,1]))

0 commit comments

Comments
 (0)