Skip to content

Commit 3a09c42

Browse files
authored
Merge pull request #509 from sjsrey/encoding
Remove sklearn dependency
2 parents ad1afcb + caf385e commit 3a09c42

File tree

2 files changed

+62
-3
lines changed

2 files changed

+62
-3
lines changed

libpysal/weights/tests/test_weights.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import unittest
55
import pytest
6-
from ..weights import W, WSP
6+
from ..weights import W, WSP, _LabelEncoder
77
from .. import util
88
from ..util import WSP2W, lat2W
99
from ..contiguity import Rook
@@ -724,6 +724,15 @@ def test_from_WSP(self):
724724
self.assertEqual(w.n, 100)
725725
self.assertEqual(w.pct_nonzero, 4.62)
726726

727+
def test_LabelEncoder(self):
728+
le = _LabelEncoder()
729+
le.fit(["NY", "CA", "NY", "CA", "TX", "TX"])
730+
np.testing.assert_equal(le.classes_, np.array(["CA", "NY", "TX"]))
731+
np.testing.assert_equal(
732+
le.transform(["NY", "CA", "NY", "CA", "TX", "TX"]),
733+
np.array([1, 0, 1, 0, 2, 2]),
734+
)
735+
727736

728737
if __name__ == "__main__":
729738
unittest.main()

libpysal/weights/weights.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
import numpy as np
1111
import scipy.sparse
1212
from scipy.sparse.csgraph import connected_components
13-
from sklearn import preprocessing
1413
from collections import defaultdict
1514

15+
1616
# from .util import full, WSP2W resolve import cycle by
1717
# forcing these into methods
1818
from . import adjtools
@@ -21,6 +21,56 @@
2121
__all__ = ["W", "WSP"]
2222

2323

24+
class _LabelEncoder(object):
25+
"""Encode labels with values between 0 and n_classes-1.
26+
27+
Attributes
28+
----------
29+
classes_: array of shape [n_classes]
30+
Class labels for each index.
31+
32+
Examples
33+
--------
34+
>>> le = _LabelEncoder()
35+
>>> le.fit(["NY", "CA", "NY", "CA", "TX", "TX"])
36+
>>> le.classes_
37+
array(['CA', 'NY', 'TX'])
38+
>>> le.transform(["NY", "CA", "NY", "CA", "TX", "TX"])
39+
array([1, 0, 1, 0, 2, 2])
40+
"""
41+
42+
def fit(self, y):
43+
"""Fit label encoder.
44+
45+
Parameters
46+
----------
47+
y : list
48+
list of labels
49+
50+
Returns
51+
-------
52+
self : instance of self.
53+
Fitted label encoder.
54+
"""
55+
self.classes_ = np.unique(y)
56+
return self
57+
58+
def transform(self, y):
59+
"""Transform labels to normalized encoding.
60+
61+
Parameters
62+
----------
63+
y : list
64+
list of labels
65+
66+
Returns
67+
-------
68+
y : array
69+
array of normalized labels.
70+
"""
71+
return np.searchsorted(self.classes_, y)
72+
73+
2474
class W(object):
2575
"""
2676
Spatial weights class. Class attributes are described by their
@@ -505,7 +555,7 @@ def to_sparse(self, fmt="coo"):
505555
data = adj_list.weight
506556
row = adj_list.focal
507557
col = adj_list.neighbor
508-
le = preprocessing.LabelEncoder()
558+
le = _LabelEncoder()
509559
le.fit(row)
510560
row = le.transform(row)
511561
col = le.transform(col)

0 commit comments

Comments
 (0)