Skip to content

Commit f9c1090

Browse files
committed
download second page
1 parent 2d916d1 commit f9c1090

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

2025/national-jukebox/download_first_page.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
DATA_DIR = pathlib.Path(__file__).parent / "data"
2828

2929

30-
target_url = "https://www.loc.gov/collections/national-jukebox/?sb=date_desc&c=100"
30+
# target_url = "https://www.loc.gov/collections/national-jukebox/?sb=date_desc&c=100"
31+
target_url = "https://www.loc.gov/collections/national-jukebox/?c=100&sb=date_desc&sp=2"
3132
item_urls = list_urls.get_national_jukebox_song_detail_urls(target_url)
3233

3334

2025/national-jukebox/download_mp3s.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ def download_mp3(base_url):
4242
jukebox_path = DATA_DIR / "jukebox.jsonl"
4343
jukebox = pandas.read_json(jukebox_path, lines=True, orient="records")
4444

45-
for _, row in jukebox.iterrows():
45+
# for _, row in jukebox.iterrows():
46+
for _, row in jukebox.iloc[100:].iterrows():
4647
jukebox_id = row["URL"].split("/")[-2]
4748
mp3_path = (DATA_DIR / jukebox_id).with_suffix(".mp3")
4849
if mp3_path.exists():

0 commit comments

Comments
 (0)