Skip to content

Commit 20c1202

Browse files
Feat/update cli (#1376)
* Add update cli option with default storage * Semver * Semver * Pyright * Format
1 parent baa261c commit 20c1202

File tree

5 files changed

+138
-2
lines changed

5 files changed

+138
-2
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Add update cli entrypoint for incremental indexing"
4+
}

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"**/.yarn": true,
44
"**/.pnp.*": true
55
},
6+
"editor.formatOnSave": false,
67
"eslint.nodePath": ".yarn/sdks",
78
"typescript.tsdk": ".yarn/sdks/typescript/lib",
89
"typescript.enablePromptUseWorkspaceTsdk": true,

graphrag/cli/index.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,76 @@ def index_cli(
7979
output_dir: Path | None,
8080
):
8181
"""Run the pipeline with the given config."""
82+
config = load_config(root_dir, config_filepath)
83+
84+
_run_index(
85+
config=config,
86+
verbose=verbose,
87+
resume=resume,
88+
memprofile=memprofile,
89+
cache=cache,
90+
reporter=reporter,
91+
emit=emit,
92+
dry_run=dry_run,
93+
skip_validation=skip_validation,
94+
output_dir=output_dir,
95+
)
96+
97+
98+
def update_cli(
99+
root_dir: Path,
100+
verbose: bool,
101+
memprofile: bool,
102+
cache: bool,
103+
reporter: ReporterType,
104+
config_filepath: Path | None,
105+
emit: list[TableEmitterType],
106+
skip_validation: bool,
107+
output_dir: Path | None,
108+
):
109+
"""Run the pipeline with the given config."""
110+
config = load_config(root_dir, config_filepath)
111+
112+
# Check if update storage exist, if not configure it with default values
113+
if not config.update_index_storage:
114+
from graphrag.config.defaults import STORAGE_TYPE, UPDATE_STORAGE_BASE_DIR
115+
from graphrag.config.models.storage_config import StorageConfig
116+
117+
config.update_index_storage = StorageConfig(
118+
type=STORAGE_TYPE,
119+
base_dir=UPDATE_STORAGE_BASE_DIR,
120+
)
121+
122+
_run_index(
123+
config=config,
124+
verbose=verbose,
125+
resume=False,
126+
memprofile=memprofile,
127+
cache=cache,
128+
reporter=reporter,
129+
emit=emit,
130+
dry_run=False,
131+
skip_validation=skip_validation,
132+
output_dir=output_dir,
133+
)
134+
135+
136+
def _run_index(
137+
config,
138+
verbose,
139+
resume,
140+
memprofile,
141+
cache,
142+
reporter,
143+
emit,
144+
dry_run,
145+
skip_validation,
146+
output_dir,
147+
):
82148
progress_reporter = create_progress_reporter(reporter)
83149
info, error, success = _logger(progress_reporter)
84150
run_id = resume or time.strftime("%Y%m%d-%H%M%S")
85151

86-
config = load_config(root_dir, config_filepath)
87152
config.storage.base_dir = str(output_dir) if output_dir else config.storage.base_dir
88153
config.reporting.base_dir = (
89154
str(output_dir) if output_dir else config.reporting.base_dir

graphrag/cli/main.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from graphrag.prompt_tune.generator import MAX_TOKEN_COUNT
1717
from graphrag.prompt_tune.loader import MIN_CHUNK_SIZE
1818

19-
from .index import index_cli
19+
from .index import index_cli, update_cli
2020
from .initialize import initialize_project_at
2121
from .prompt_tune import prompt_tune
2222
from .query import run_drift_search, run_global_search, run_local_search
@@ -129,6 +129,71 @@ def _index_cli(
129129
)
130130

131131

132+
@app.command("update")
133+
def _update_cli(
134+
config: Annotated[
135+
Path | None,
136+
typer.Option(
137+
help="The configuration to use.", exists=True, file_okay=True, readable=True
138+
),
139+
] = None,
140+
root: Annotated[
141+
Path,
142+
typer.Option(
143+
help="The project root directory.",
144+
exists=True,
145+
dir_okay=True,
146+
writable=True,
147+
resolve_path=True,
148+
),
149+
] = Path(), # set default to current directory
150+
verbose: Annotated[
151+
bool, typer.Option(help="Run the indexing pipeline with verbose logging")
152+
] = False,
153+
memprofile: Annotated[
154+
bool, typer.Option(help="Run the indexing pipeline with memory profiling")
155+
] = False,
156+
reporter: Annotated[
157+
ReporterType, typer.Option(help="The progress reporter to use.")
158+
] = ReporterType.RICH,
159+
emit: Annotated[
160+
str, typer.Option(help="The data formats to emit, comma-separated.")
161+
] = TableEmitterType.Parquet.value,
162+
cache: Annotated[bool, typer.Option(help="Use LLM cache.")] = True,
163+
skip_validation: Annotated[
164+
bool,
165+
typer.Option(
166+
help="Skip any preflight validation. Useful when running no LLM steps."
167+
),
168+
] = False,
169+
output: Annotated[
170+
Path | None,
171+
typer.Option(
172+
help="Indexing pipeline output directory. Overrides storage.base_dir in the configuration file.",
173+
dir_okay=True,
174+
writable=True,
175+
resolve_path=True,
176+
),
177+
] = None,
178+
):
179+
"""
180+
Update an existing knowledge graph index.
181+
182+
Applies a default storage configuration (if not provided by config), saving the new index to the local file system in the `update_output` folder.
183+
"""
184+
update_cli(
185+
root_dir=root,
186+
verbose=verbose,
187+
memprofile=memprofile,
188+
cache=cache,
189+
reporter=ReporterType(reporter),
190+
config_filepath=config,
191+
emit=[TableEmitterType(value.strip()) for value in emit.split(",")],
192+
skip_validation=skip_validation,
193+
output_dir=output,
194+
)
195+
196+
132197
@app.command("prompt-tune")
133198
def _prompt_tune_cli(
134199
root: Annotated[

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ test_smoke = "pytest ./tests/smoke"
139139
test_notebook = "pytest ./tests/notebook"
140140
test_verbs = "pytest ./tests/verbs"
141141
index = "python -m graphrag index"
142+
update = "python -m graphrag update"
142143
init = "python -m graphrag init"
143144
query = "python -m graphrag query"
144145
prompt_tune = "python -m graphrag prompt-tune"

0 commit comments

Comments
 (0)