Skip to content

Commit 30e7046

Browse files
authored
Merge pull request #172 from meta-pytorch/env-hub-job
Add automated Hugging Face collection manager
2 parents 1af82ff + bfe622f commit 30e7046

File tree

4 files changed

+787
-0
lines changed

4 files changed

+787
-0
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: Update HF Collection
2+
3+
on:
4+
# Run nightly at midnight UTC
5+
schedule:
6+
- cron: '0 0 * * *'
7+
8+
# Trigger when the management script is updated
9+
push:
10+
branches:
11+
- main
12+
paths:
13+
- 'scripts/manage_hf_collection.py'
14+
- '.github/workflows/manage-hf-collection.yml'
15+
16+
# Allow manual triggering
17+
workflow_dispatch:
18+
19+
jobs:
20+
update-collection:
21+
runs-on: ubuntu-latest
22+
permissions:
23+
contents: read
24+
25+
steps:
26+
- name: Checkout repository
27+
uses: actions/checkout@v4
28+
29+
- name: Set up Python
30+
uses: actions/setup-python@v4
31+
with:
32+
python-version: '3.12'
33+
34+
- name: Install dependencies
35+
run: |
36+
pip install huggingface-hub>=0.20.0
37+
38+
- name: Run collection manager
39+
env:
40+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
41+
run: |
42+
python scripts/manage_hf_collection.py
43+
44+
- name: Summary
45+
if: success()
46+
run: |
47+
echo "## Collection Updated ✅" >> $GITHUB_STEP_SUMMARY
48+
echo "" >> $GITHUB_STEP_SUMMARY
49+
echo "Successfully ran the collection manager." >> $GITHUB_STEP_SUMMARY
50+
echo "" >> $GITHUB_STEP_SUMMARY
51+
echo "**Details:**" >> $GITHUB_STEP_SUMMARY
52+
echo "- Scheduled to run daily at midnight UTC" >> $GITHUB_STEP_SUMMARY
53+
echo "- Task: Discover and add openenv-tagged Docker Spaces" >> $GITHUB_STEP_SUMMARY
54+
echo "- Collection: [openenv/environment-hub](https://huggingface.co/collections/openenv/environment-hub-68f16377abea1ea114fa0743)" >> $GITHUB_STEP_SUMMARY

scripts/manage_hf_collection.py

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Hugging Face Collection Manager for OpenEnv
4+
5+
This script automatically discovers Docker Spaces tagged with 'openenv' on Hugging Face
6+
and adds them to the Environment Hub collection if they're not already present.
7+
8+
Usage:
9+
python scripts/manage_hf_collection.py [--dry-run] [--verbose]
10+
11+
Environment Variables:
12+
HF_TOKEN: Required. Your Hugging Face API token with write access to collections.
13+
"""
14+
15+
import argparse
16+
import logging
17+
import os
18+
import sys
19+
from typing import Set, List
20+
from huggingface_hub import HfApi, list_spaces
21+
from huggingface_hub.utils import HfHubHTTPError
22+
23+
24+
# Constants
25+
COLLECTION_SLUG = "openenv/environment-hub-68f16377abea1ea114fa0743"
26+
TAG_FILTER = "openenv"
27+
SPACE_TYPE = "docker"
28+
29+
# Configure logging
30+
logging.basicConfig(
31+
level=logging.INFO,
32+
format='%(asctime)s - %(levelname)s - %(message)s',
33+
datefmt='%Y-%m-%d %H:%M:%S'
34+
)
35+
logger = logging.getLogger(__name__)
36+
37+
38+
def setup_api() -> HfApi:
39+
"""
40+
Initialize and authenticate the Hugging Face API client.
41+
42+
Returns:
43+
HfApi: Authenticated API client
44+
45+
Raises:
46+
SystemExit: If HF_TOKEN is not set
47+
"""
48+
hf_token = os.environ.get('HF_TOKEN')
49+
50+
if not hf_token:
51+
logger.error("HF_TOKEN environment variable is not set!")
52+
logger.error("Please set it with: export HF_TOKEN=your_token_here")
53+
sys.exit(1)
54+
55+
logger.info("Authenticating with Hugging Face...")
56+
api = HfApi(token=hf_token)
57+
58+
try:
59+
whoami = api.whoami()
60+
logger.info(f"✓ Authenticated as: {whoami['name']}")
61+
except Exception as e:
62+
logger.error(f"Failed to authenticate with Hugging Face: {e}")
63+
sys.exit(1)
64+
65+
return api
66+
67+
68+
def get_collection_spaces(api: HfApi) -> Set[str]:
69+
"""
70+
Retrieve the list of spaces currently in the Environment Hub collection.
71+
72+
Args:
73+
api: Authenticated HfApi client
74+
75+
Returns:
76+
Set of space IDs (in format "owner/space-name")
77+
"""
78+
logger.info(f"Fetching current collection: {COLLECTION_SLUG}")
79+
80+
try:
81+
collection = api.get_collection(COLLECTION_SLUG)
82+
83+
# Extract space IDs from collection items
84+
space_ids = set()
85+
for item in collection.items:
86+
if item.item_type == "space":
87+
space_ids.add(item.item_id)
88+
89+
logger.info(f"✓ Found {len(space_ids)} spaces in collection")
90+
return space_ids
91+
92+
except HfHubHTTPError as e:
93+
if e.response.status_code == 404:
94+
logger.error(f"Collection not found: {COLLECTION_SLUG}")
95+
logger.error("Please ensure the collection exists and you have access to it")
96+
else:
97+
logger.error(f"Error fetching collection: {e}")
98+
sys.exit(1)
99+
except Exception as e:
100+
logger.error(f"Unexpected error fetching collection: {e}")
101+
sys.exit(1)
102+
103+
104+
def discover_openenv_spaces(api: HfApi) -> List[str]:
105+
"""
106+
Search for all Docker Spaces tagged with 'openenv'.
107+
108+
Args:
109+
api: Authenticated HfApi client
110+
111+
Returns:
112+
List of space IDs (in format "owner/space-name")
113+
"""
114+
logger.info(f"Searching for Docker Spaces with tag '{TAG_FILTER}'...")
115+
116+
try:
117+
# List all spaces with the openenv tag using search parameter
118+
spaces = list(list_spaces(
119+
search=TAG_FILTER,
120+
full=False,
121+
sort="trending_score",
122+
direction=-1
123+
))
124+
125+
# Filter for Docker spaces with the openenv tag
126+
# Note: search may return spaces that mention 'openenv' in description too,
127+
# so we need to verify the tag is actually present
128+
docker_spaces_with_tag = []
129+
for space in spaces:
130+
# Get full space info to check tags
131+
try:
132+
space_info = api.space_info(space.id)
133+
# Check if it's a Docker space and has the openenv tag
134+
if (hasattr(space_info, 'sdk') and space_info.sdk == 'docker' and
135+
hasattr(space_info, 'tags') and TAG_FILTER in space_info.tags and
136+
space_info.runtime.stage != "RUNTIME_ERROR"):
137+
docker_spaces_with_tag.append(space.id)
138+
except Exception as e:
139+
logger.warning(f"Could not fetch info for space {space.id}: {e}")
140+
continue
141+
142+
logger.info(f"✓ Discovered {len(docker_spaces_with_tag)} Docker spaces with tag '{TAG_FILTER}'")
143+
144+
return docker_spaces_with_tag
145+
146+
except Exception as e:
147+
logger.error(f"Error discovering spaces: {e}")
148+
sys.exit(1)
149+
150+
151+
def add_spaces_to_collection(
152+
api: HfApi,
153+
space_ids: List[str],
154+
dry_run: bool = False
155+
) -> int:
156+
"""
157+
Add new spaces to the Environment Hub collection.
158+
159+
Args:
160+
api: Authenticated HfApi client
161+
space_ids: List of space IDs to add
162+
dry_run: If True, only simulate the addition without making changes
163+
164+
Returns:
165+
Number of spaces added (or would be added in dry-run mode)
166+
"""
167+
if not space_ids:
168+
logger.info("No new spaces to add")
169+
return 0
170+
171+
added_count = 0
172+
failed_count = 0
173+
174+
for space_id in space_ids:
175+
if dry_run:
176+
logger.info(f"[DRY RUN] Would add space: {space_id}")
177+
added_count += 1
178+
else:
179+
try:
180+
logger.info(f"Adding space to collection: {space_id}")
181+
api.add_collection_item(
182+
collection_slug=COLLECTION_SLUG,
183+
item_id=space_id,
184+
item_type="space"
185+
)
186+
logger.info(f"✓ Successfully added: {space_id}")
187+
added_count += 1
188+
except HfHubHTTPError as e:
189+
if e.response.status_code == 409:
190+
# Space already in collection (race condition)
191+
logger.warning(f"Space already in collection: {space_id}")
192+
else:
193+
logger.error(f"Failed to add {space_id}: {e}")
194+
failed_count += 1
195+
except Exception as e:
196+
logger.error(f"Unexpected error adding {space_id}: {e}")
197+
failed_count += 1
198+
199+
if failed_count > 0:
200+
logger.warning(f"Failed to add {failed_count} spaces")
201+
202+
return added_count
203+
204+
205+
def main():
206+
"""Main execution function."""
207+
parser = argparse.ArgumentParser(
208+
description="Manage Hugging Face Environment Hub collection for OpenEnv spaces",
209+
formatter_class=argparse.RawDescriptionHelpFormatter,
210+
epilog="""
211+
Examples:
212+
# Run in dry-run mode to preview changes
213+
python scripts/manage_hf_collection.py --dry-run --verbose
214+
215+
# Run for real to add spaces to collection
216+
python scripts/manage_hf_collection.py
217+
218+
# View verbose output
219+
python scripts/manage_hf_collection.py --verbose
220+
221+
Environment Variables:
222+
HF_TOKEN: Required. Your Hugging Face API token.
223+
"""
224+
)
225+
226+
parser.add_argument(
227+
'--dry-run',
228+
action='store_true',
229+
help='Preview changes without modifying the collection'
230+
)
231+
232+
parser.add_argument(
233+
'--verbose',
234+
action='store_true',
235+
help='Enable verbose logging output'
236+
)
237+
238+
args = parser.parse_args()
239+
240+
# Set logging level
241+
if args.verbose:
242+
logger.setLevel(logging.DEBUG)
243+
logger.debug("Verbose logging enabled")
244+
245+
if args.dry_run:
246+
logger.info("=" * 60)
247+
logger.info("DRY RUN MODE - No changes will be made")
248+
logger.info("=" * 60)
249+
250+
# Step 1: Setup API
251+
api = setup_api()
252+
253+
# Step 2: Get current collection spaces
254+
current_spaces = get_collection_spaces(api)
255+
256+
if args.verbose:
257+
logger.debug(f"Current spaces in collection: {sorted(current_spaces)}")
258+
259+
# Step 3: Discover all openenv spaces
260+
discovered_spaces = discover_openenv_spaces(api)
261+
262+
if args.verbose:
263+
logger.debug(f"Discovered spaces: {sorted(discovered_spaces)}")
264+
265+
# Step 4: Find new spaces not yet in collection
266+
new_spaces = [s for s in discovered_spaces if s not in current_spaces]
267+
268+
logger.info("=" * 60)
269+
logger.info(f"Summary:")
270+
logger.info(f" Total spaces in collection: {len(current_spaces)}")
271+
logger.info(f" Total spaces discovered: {len(discovered_spaces)}")
272+
logger.info(f" New spaces to add: {len(new_spaces)}")
273+
logger.info("=" * 60)
274+
275+
if new_spaces:
276+
logger.info(f"New spaces found:")
277+
for space in new_spaces:
278+
logger.info(f" - {space}")
279+
280+
# Step 5: Add new spaces to collection
281+
added_count = add_spaces_to_collection(api, new_spaces, dry_run=args.dry_run)
282+
283+
# Final summary
284+
logger.info("=" * 60)
285+
if args.dry_run:
286+
logger.info(f"[DRY RUN] Would add {added_count} new spaces to collection")
287+
else:
288+
logger.info(f"✓ Successfully added {added_count} new spaces to collection")
289+
logger.info("=" * 60)
290+
291+
logger.info(f"Collection URL: https://huggingface.co/collections/{COLLECTION_SLUG}")
292+
293+
294+
if __name__ == "__main__":
295+
main()
296+

tests/scripts/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
Tests for scripts in the scripts/ directory.
3+
"""
4+

0 commit comments

Comments
 (0)