Skip to content
This repository was archived by the owner on Nov 1, 2024. It is now read-only.

Commit bf43812

Browse files
wenleixfacebook-github-bot
authored andcommitted
Sync from GitHub to fb internal (#449)
Summary: Pull Request resolved: #449 Reviewed By: bearzx Differential Revision: D37906148 Pulled By: wenleix fbshipit-source-id: 86f9352799aec7434c0833cf927c1b56818965dd
1 parent 4bc7a7e commit bf43812

File tree

10 files changed

+98
-12
lines changed

10 files changed

+98
-12
lines changed

.github/workflows/deploy-doc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121

2222
- name: Install TorchArrow
2323
run: |
24-
pip install --pre torcharrow -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
24+
pip install torcharrow
2525
2626
- name: Build the docs
2727
run: |

.github/workflows/nightly-wheel.yml

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,22 @@ on:
99
AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY:
1010
required: true
1111

12+
# Allows you to run this workflow manually from the Actions tab
13+
workflow_dispatch:
14+
1215
jobs:
1316
linux-container:
1417
runs-on: ubuntu-latest
15-
container: prestocpp/velox-sse-velox-torcharrow:kpai-20220615
18+
container: prestocpp/velox-sse-velox-torcharrow:kpai-20220711
1619
strategy:
1720
fail-fast: false
1821
matrix:
1922
python-version:
2023
- 3.7
2124
- 3.8
2225
- 3.9
26+
- "3.10"
2327
steps:
24-
- name: Print CPU info
25-
run: cat /proc/cpuinfo
26-
2728
- name: Check out source repository
2829
uses: actions/checkout@v2
2930
with:
@@ -55,6 +56,29 @@ jobs:
5556
~/.local/bin/aws s3 cp "$pkg" "$S3_PATH" --acl public-read
5657
done
5758
59+
# We only run this part on ubuntu wheel build for python 3.7,
60+
# since we only need to deploy the doc once.
61+
- name: Install TorchArrow and build docs
62+
if: matrix.python-version == '3.7'
63+
run: |
64+
source /opt/conda/etc/profile.d/conda.sh
65+
conda activate env${{ matrix.python-version }}
66+
pip install fixed_dist/*.whl
67+
cd ./docs
68+
pip install -r requirements.txt --user
69+
PATH=${PATH}:~/.local/bin
70+
make html
71+
cd ..
72+
73+
- name: Deploy Docs on Push
74+
if: matrix.python-version == '3.7'
75+
uses: JamesIves/github-pages-deploy-action@v4.2.5
76+
with:
77+
token: ${{ secrets.GITHUB_TOKEN }}
78+
branch: gh-pages # The branch the action should deploy to.
79+
folder: docs/build/html # The folder the action should deploy.
80+
target-folder: main
81+
5882
macos-container:
5983
runs-on: macos-latest
6084
strategy:
@@ -64,6 +88,7 @@ jobs:
6488
- 3.7
6589
- 3.8
6690
- 3.9
91+
- "3.10"
6792
steps:
6893
- name: Setup Python ${{ matrix.python-version }}
6994
uses: actions/setup-python@v2

.github/workflows/rc-wheel-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
jobs:
77
linux-container:
88
runs-on: ubuntu-latest
9-
container: prestocpp/velox-sse-velox-torcharrow:kpai-20220524
9+
container: prestocpp/velox-sse-velox-torcharrow:kpai-20220711
1010
strategy:
1111
fail-fast: false
1212
matrix:

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ conda create --name torcharrow python=3.7
2424
conda activate torcharrow
2525
```
2626

27+
### Version Compatibility
28+
29+
The following is the corresponding `torcharrow` versions and supported Python versions.
30+
31+
| `torch` | `torcharrow` | `python` |
32+
| ------------------ | ------------------ | ----------------- |
33+
| `main` / `nightly` | `main` / `nightly` | `>=3.7`, `<=3.10` |
34+
| `1.13.0` | `0.2.0` | `>=3.7`, `<=3.10` |
35+
36+
2737
### Colab
2838

2939
Follow the instructions [in this Colab notebook](https://colab.research.google.com/drive/1S0ldwN7qNM37E4WZnnAEnzn1DWnAQ6Vt)

csrc/velox/velox

Submodule velox updated 235 files

torcharrow/idataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def any(self):
425425
# column alnternating
426426
@trace
427427
@expression
428-
def drop(self, columns: List[str]):
428+
def drop(self, columns: Union[str, List[str]]):
429429
"""
430430
Returns DataFrame without the removed columns.
431431
"""

torcharrow/test/test_dataframe.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,24 @@ def base_test_describe_dataframe(self):
833833
],
834834
)
835835

836+
def base_test_drop_by_str_as_columns(self):
837+
df = ta.dataframe(device=self.device)
838+
df["aa"] = [1, 2, 3]
839+
df["ab"] = [11, 22, 33]
840+
df["ac"] = [111, 222, 333]
841+
self.assertEqual(list(df.drop("aa")), [(11, 111), (22, 222), (33, 333)])
842+
self.assertEqual(list(df.drop("ab")), [(1, 111), (2, 222), (3, 333)])
843+
self.assertEqual(list(df.drop("ac")), [(1, 11), (2, 22), (3, 33)])
844+
845+
def base_test_drop_by_list_of_str_as_columns(self):
846+
df = ta.dataframe(device=self.device)
847+
df["aa"] = [1, 2, 3]
848+
df["ab"] = [11, 22, 33]
849+
df["ac"] = [111, 222, 333]
850+
self.assertEqual(list(df.drop(["aa", "ab"])), [(111,), (222,), (333,)])
851+
self.assertEqual(list(df.drop(["aa", "ac"])), [(11,), (22,), (33,)])
852+
self.assertEqual(list(df.drop(["ab", "ac"])), [(1,), (2,), (3,)])
853+
836854
def base_test_drop_keep_rename_reorder_pipe(self):
837855
df = ta.dataframe(device=self.device)
838856
df["a"] = [1, 2, 3]
@@ -895,6 +913,18 @@ def base_test_locals_and_me_equivalence(self):
895913
)
896914
self.assertEqual(list(df.select("*", d=me["a"] + me["b"])), list(gf))
897915

916+
def base_test_groupby_str(self):
917+
df = ta.dataframe(
918+
{"a": [1, 1, 2], "b": [1, 2, 3], "c": [2, 2, 1]}, device=self.device
919+
)
920+
self.assertEqual(list(df.groupby("a").size), [(1, 2), (2, 1)])
921+
922+
def base_test_groupby_list_of_str(self):
923+
df = ta.dataframe(
924+
{"a": [1, 1, 2], "b": [1, 2, 3], "c": [2, 2, 1]}, device=self.device
925+
)
926+
self.assertEqual(list(df.groupby(["a"]).size), [(1, 2), (2, 1)])
927+
898928
def base_test_groupby_size_pipe(self):
899929
df = ta.dataframe(
900930
{"a": [1, 1, 2], "b": [1, 2, 3], "c": [2, 2, 1]}, device=self.device

torcharrow/test/test_dataframe_cpu.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ def test_isin2(self):
6464
def test_describe_dataframe(self):
6565
return self.base_test_describe_dataframe()
6666

67+
def test_drop_by_str_as_columns(self):
68+
return self.base_test_drop_by_str_as_columns()
69+
70+
def test_drop_by_list_of_str_as_columns(self):
71+
return self.base_test_drop_by_list_of_str_as_columns()
72+
6773
def test_drop_keep_rename_reorder_pipe(self):
6874
return self.base_test_drop_keep_rename_reorder_pipe()
6975

@@ -73,6 +79,12 @@ def test_me_on_str(self):
7379
def test_locals_and_me_equivalence(self):
7480
return self.base_test_locals_and_me_equivalence()
7581

82+
def test_groupby_str(self):
83+
return self.base_test_groupby_str()
84+
85+
def test_groupby_list_of_str(self):
86+
return self.base_test_groupby_list_of_str()
87+
7688
def test_groupby_size_pipe(self):
7789
return self.base_test_groupby_size_pipe()
7890

torcharrow/velox_rt/dataframe_cpu.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,9 @@ def append(self, values: Iterable[Union[None, dict, tuple]]):
308308
return self
309309

310310
def _check_columns(self, columns: Iterable[str]):
311+
if isinstance(columns, str):
312+
raise TypeError(f"columns should be Iterable of str but not str")
313+
311314
valid_names = {f.name for f in self.dtype.fields}
312315
for n in columns:
313316
if n not in valid_names:
@@ -1574,11 +1577,13 @@ def drop_null(self, how="any"):
15741577
@expression
15751578
def drop_duplicates(
15761579
self,
1577-
subset: Optional[List[str]] = None,
1580+
subset: Optional[Union[str, List[str]]] = None,
15781581
keep="first",
15791582
):
15801583
self._prototype_support_warning("drop_duplicates")
15811584

1585+
if isinstance(subset, str):
1586+
subset = [subset]
15821587
columns = subset if subset is not None else self.columns
15831588
self._check_columns(columns)
15841589

@@ -1834,7 +1839,9 @@ def describe(
18341839

18351840
@trace
18361841
@expression
1837-
def drop(self, columns: List[str]):
1842+
def drop(self, columns: Union[str, List[str]]):
1843+
if isinstance(columns, str):
1844+
columns = [columns]
18381845
self._check_columns(columns)
18391846
return self._fromdata(
18401847
{
@@ -2082,7 +2089,7 @@ def pipe(self, func, *args, **kwargs):
20822089
@expression
20832090
def groupby(
20842091
self,
2085-
by: List[str],
2092+
by: Union[str, List[str]],
20862093
sort=False,
20872094
drop_null=True,
20882095
):
@@ -2158,6 +2165,8 @@ def groupby(
21582165
# TODO implement
21592166
assert not sort
21602167
assert drop_null
2168+
if isinstance(by, str):
2169+
by = [by]
21612170
self._check_columns(by)
21622171

21632172
key_columns = by

version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.5a0
1+
0.2.0a0

0 commit comments

Comments
 (0)