Merge branch 'issue-59652' of https://github.com/eicchen/pandas into issue-59652

eicchen · eicchen · commit 60471c27de42 · 2025-10-19T02:45:42.000-05:00
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -345,7 +345,8 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
         float64_t[:, ::1] result
         uint8_t[:, :] mask
         int64_t nobs = 0
-        float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy, val
+        float64_t xx, xy, meanx, meany, divisor, number, v1sq, v2sq, val
+        float64_t* vx, vy
 
     N, K = (<object>mat).shape
     if minp is None:
@@ -357,40 +358,55 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
     mask = np.isfinite(mat).view(np.uint8)
 
     with nogil:
-        for xi in range(K):
-            for yi in range(xi + 1):
+        # for xi in range(K):
+        #     for yi in range(xi + 1):
                 # Welford's method for the variance-calculation
                 # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-                nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0
-                for i in range(N):
-                    if mask[i, xi] and mask[i, yi]:
-                        vx = mat[i, xi]
-                        vy = mat[i, yi]
-                        nobs += 1
-                        dx = vx - meanx
-                        dy = vy - meany
-                        meanx += 1. / nobs * dx
-                        meany += 1. / nobs * dy
-                        ssqdmx += (vx - meanx) * dx
-                        ssqdmy += (vy - meany) * dy
-                        covxy += (vx - meanx) * dy
-
-                if nobs < minpv:
-                    result[xi, yi] = result[yi, xi] = NaN
-                else:
-                    divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
-
-                    # clip `covxy / divisor` to ensure coeff is within bounds
-                    if divisor != 0:
-                        val = covxy / divisor
-                        if not cov:
-                            if val > 1.0:
-                                val = 1.0
-                            elif val < -1.0:
-                                val = -1.0
-                        result[xi, yi] = result[yi, xi] = val
-                    else:
-                        result[xi, yi] = result[yi, xi] = NaN
+        nobs = v1sq = v2sq = covxy = meanx = meany = 0
+        for i in range(N):
+            vx = ptr(mat[i,:])
+            vy = mat[i,:]
+            meanx = vx.mean()
+            meany = vy.mean()
+            for j in range(vx):
+                xx = (vx[j]- meanx)
+                yy = (vy[j]- meany)
+                nobs += 1
+                number += xx*yy
+                v1sq += xx*xx
+                v2sq += yy*yy
+            val = number/sqrt(v1sq * v2sq)
+            print(val)
+
+        # for i in range(N):
+        #     if mask[i, xi] and mask[i, yi]:
+        #         vx = mat[i, xi]
+        #         vy = mat[i, yi]
+        #         nobs += 1
+        #         dx = vx - meanx
+        #         dy = vy - meany
+        #         meanx += 1. / nobs * dx
+        #         meany += 1. / nobs * dy
+        #         ssqdmx += (vx - meanx) * dx
+        #         ssqdmy += (vy - meany) * dy
+        #         covxy += (vx - meanx) * dy
+
+        # if nobs < minpv:
+        #     result[xi, yi] = result[yi, xi] = NaN
+        # else:esult[xi, yi] = result[yi, xi] = val
+        #     divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
+
+        #     # clip `covxy / divisor` to ensure coeff is within bounds
+        #     if divisor != 0:
+        #         val = covxy / divisor
+        #         if not cov:
+        #             if val > 1.0:
+        #                 val = 1.0
+        #             elif val < -1.0:
+        #                 val = -1.0
+        #         result[xi, yi] = result[yi, xi] = val
+        #     else:
+        #         result[xi, yi] = result[yi, xi] = NaN
 
     return result.base
 
diff --git a/test.py b/test.py
@@ -0,0 +1,42 @@
+import pandas as pd
+
+values = [
+    {"col1": 30.0, "col2": 116.80000305175781},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+    {"col1": 30.100000381469727, "col2": 116.8000030517578},
+    {"col1": None, "col2": None},
+    {"col1": None, "col2": None},
+]
+
+data = pd.DataFrame(values)
+print(data.corr(method="pearson"))
+
+# def corr_coef(X,Y):
+#     x1 = np.array(X)
+#     y1 = np.array(Y)
+#     x_m=x1.mean()
+#     y_m=y1.mean()
+#     number=0
+#     v1sq=0
+#     v2sq=0
+#     for i in range(len(x1)):
+#         xx = (x1[i]-x_m)
+#         yy = (y1[i]-y_m)
+#         number+=xx*yy
+#         v1sq+=xx*xx
+#         v2sq+=yy*yy
+#     return(number/(math.sqrt(v1sq*v2sq)))
+
+data = data.dropna()
+# print(corr_coef(data.iloc[:,0],data.iloc[:,1]))