Skip to content

Commit 044516f

Browse files
Clean and organize run index code (#1090)
* Create entypoint for cli and api (#1067) * Add cli and api entrypoints for update index * Semver * Update docs * Run tests on feature branch main * Better /main handling in tests * Clean and organize run index code * Ruff fix * Pyright fix * Format fixes * Pyright fix * Format * Fix integ tests * Fix ruff * Reorganize and clean up
1 parent 2d45ece commit 044516f

20 files changed

+742
-488
lines changed

.github/workflows/python-ci.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
name: Python CI
22
on:
33
push:
4-
branches: [main]
4+
branches:
5+
- "**/main" # Matches branches like feature/main
6+
- "main" # Matches the main branch
57
pull_request:
6-
branches: [main]
8+
branches:
9+
- "**/main"
10+
- "main"
711

812
permissions:
913
contents: read
@@ -72,4 +76,4 @@ jobs:
7276
7377
- name: Unit Test
7478
run: |
75-
poetry run poe test_unit
79+
poetry run poe test_unit

.github/workflows/python-integration-tests.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
name: Python Integration Tests
22
on:
33
push:
4-
branches: [main]
4+
branches:
5+
- "**/main" # Matches branches like feature/main
6+
- "main" # Matches the main branch
57
pull_request:
6-
branches: [main]
8+
branches:
9+
- "**/main"
10+
- "main"
711

812
permissions:
913
contents: read

.github/workflows/python-notebook-tests.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
name: Python Notebook Tests
22
on:
33
push:
4-
branches: [main]
4+
branches:
5+
- "**/main" # Matches branches like feature/main
6+
- "main" # Matches the main branch
57
pull_request:
6-
branches: [main]
8+
branches:
9+
- "**/main"
10+
- "main"
711

812
permissions:
913
contents: read
@@ -64,7 +68,6 @@ jobs:
6468
poetry run python -m pip install gensim
6569
poetry install
6670
67-
6871
- name: Notebook Test
6972
run: |
7073
poetry run poe test_notebook

.github/workflows/python-smoke-tests.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
name: Python Smoke Tests
22
on:
33
push:
4-
branches: [main]
4+
branches:
5+
- "**/main" # Matches branches like feature/main
6+
- "main" # Matches the main branch
57
pull_request:
6-
branches: [main]
8+
branches:
9+
- "**/main"
10+
- "main"
711

812
permissions:
913
contents: read

.github/workflows/spellcheck.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
branches: [main]
55
pull_request:
66
paths:
7-
- '**/*'
7+
- "**/*"
88
jobs:
99
spellcheck:
1010
runs-on: ubuntu-latest
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Add entrypoints for incremental indexing"
4+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Clean up and organize run index code"
4+
}

graphrag/index/__main__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,25 @@
6868
help="Skip any preflight validation. Useful when running no LLM steps.",
6969
action="store_true",
7070
)
71+
parser.add_argument(
72+
"--update-index",
73+
help="Update a given index run id, leveraging previous outputs and applying new indexes.",
74+
# Only required if config is not defined
75+
required=False,
76+
default=None,
77+
type=str,
78+
)
7179
args = parser.parse_args()
7280

81+
if args.resume and args.update_index:
82+
msg = "Cannot resume and update a run at the same time."
83+
raise ValueError(msg)
84+
7385
index_cli(
7486
root_dir=args.root,
7587
verbose=args.verbose or False,
7688
resume=args.resume,
89+
update_index_id=args.update_index,
7790
memprofile=args.memprofile or False,
7891
nocache=args.nocache or False,
7992
reporter=args.reporter,

graphrag/index/api.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
Backwards compatibility is not guaranteed at this time.
99
"""
1010

11-
from pathlib import Path
12-
1311
from graphrag.config import CacheType, GraphRagConfig
1412

1513
from .cache.noop_pipeline_cache import NoopPipelineCache
@@ -24,8 +22,10 @@
2422

2523
async def build_index(
2624
config: GraphRagConfig,
27-
run_id: str,
28-
memory_profile: bool,
25+
run_id: str = "",
26+
is_resume_run: bool = False,
27+
is_update_run: bool = False,
28+
memory_profile: bool = False,
2929
progress_reporter: ProgressReporter | None = None,
3030
emit: list[str] | None = None,
3131
) -> list[PipelineRunResult]:
@@ -37,6 +37,10 @@ async def build_index(
3737
The configuration.
3838
run_id : str
3939
The run id. Creates a output directory with this name.
40+
is_resume_run : bool default=False
41+
Whether to resume a previous index run.
42+
is_update_run : bool default=False
43+
Whether to update a previous index run.
4044
memory_profile : bool
4145
Whether to enable memory profiling.
4246
progress_reporter : ProgressReporter | None default=None
@@ -50,7 +54,10 @@ async def build_index(
5054
list[PipelineRunResult]
5155
The list of pipeline run results
5256
"""
53-
resume = Path(config.storage.base_dir).exists()
57+
if is_resume_run and is_update_run:
58+
msg = "Cannot resume and update a run at the same time."
59+
raise ValueError(msg)
60+
5461
pipeline_config = create_pipeline_config(config)
5562
pipeline_cache = (
5663
NoopPipelineCache() if config.cache.type == CacheType.none is None else None
@@ -63,7 +70,8 @@ async def build_index(
6370
cache=pipeline_cache,
6471
progress_reporter=progress_reporter,
6572
emit=([TableEmitterType(e) for e in emit] if emit is not None else None),
66-
is_resume_run=resume,
73+
is_resume_run=is_resume_run,
74+
is_update_run=is_update_run,
6775
):
6876
outputs.append(output)
6977
if progress_reporter:

graphrag/index/cli.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ def index_cli(
101101
init: bool,
102102
verbose: bool,
103103
resume: str | None,
104+
update_index_id: str | None,
104105
memprofile: bool,
105106
nocache: bool,
106107
reporter: str | None,
@@ -112,7 +113,7 @@ def index_cli(
112113
"""Run the pipeline with the given config."""
113114
progress_reporter = load_progress_reporter(reporter or "rich")
114115
info, error, success = _logger(progress_reporter)
115-
run_id = resume or time.strftime("%Y%m%d-%H%M%S")
116+
run_id = resume or update_index_id or time.strftime("%Y%m%d-%H%M%S")
116117

117118
if init:
118119
_initialize_project_at(root_dir, progress_reporter)
@@ -152,11 +153,13 @@ def index_cli(
152153

153154
outputs = asyncio.run(
154155
build_index(
155-
config,
156-
run_id,
157-
memprofile,
158-
progress_reporter,
159-
pipeline_emit,
156+
config=config,
157+
run_id=run_id,
158+
is_resume_run=bool(resume),
159+
is_update_run=bool(update_index_id),
160+
memory_profile=memprofile,
161+
progress_reporter=progress_reporter,
162+
emit=pipeline_emit,
160163
)
161164
)
162165
encountered_errors = any(

0 commit comments

Comments
 (0)