Skip to content

Commit e01652a

Browse files
committed
proxied through sdk
1 parent 29a7681 commit e01652a

File tree

18 files changed

+599
-435
lines changed

18 files changed

+599
-435
lines changed

poetry.lock

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ grpcio = "^1.70.0"
3030
grpcio-status = "^1.60.1"
3131
pandas = "^2.2.0"
3232
ipykernel = "^6.29.5"
33+
markdown = "^3.7"
3334

3435
[build-system]
3536
requires = ["poetry-core>=2.0.0,<3.0.0"]

src/source_msgraph/async_interator.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def __init__(self, async_gen: AsyncGenerator[Any, None]):
1414
1515
Args:
1616
async_gen (AsyncGenerator): The async generator yielding results.
17-
max_pages (int): Maximum allowed pages to fetch (prevents infinite loops).
1817
"""
1918
self.async_gen = async_gen
2019
self.loop = self._get_event_loop()
@@ -56,3 +55,68 @@ def __iter__(self) -> Iterator:
5655
"""Returns the synchronous iterator."""
5756
return self.iterator
5857

58+
59+
import asyncio
60+
from typing import AsyncGenerator, Iterator, Any
61+
62+
class AsyncToSyncIteratorV2:
63+
"""
64+
Converts an async generator into a synchronous iterator while ensuring proper event loop handling.
65+
"""
66+
67+
def __init__(self, async_gen: AsyncGenerator[Any, None]):
68+
"""
69+
Initializes the iterator by consuming an async generator synchronously.
70+
71+
Args:
72+
async_gen (AsyncGenerator): The async generator yielding results.
73+
"""
74+
self.async_gen = async_gen
75+
self.iterator = self._to_iterator()
76+
77+
def _to_iterator(self) -> Iterator:
78+
"""
79+
Ensures that the async generator is consumed using the correct event loop.
80+
Uses streaming (does not load all results into memory).
81+
"""
82+
try:
83+
loop = asyncio.get_running_loop()
84+
return self._sync_generator(loop) # Works inside Jupyter
85+
except RuntimeError:
86+
return iter(asyncio.run(self._collect_results())) # Works in scripts
87+
88+
def _sync_generator(self, loop: asyncio.AbstractEventLoop) -> Iterator:
89+
"""
90+
Streams async results into a sync generator while inside a running event loop.
91+
"""
92+
queue = asyncio.Queue()
93+
94+
async def _producer():
95+
"""Fills the queue with async results."""
96+
async for item in self.async_gen:
97+
await queue.put(item)
98+
await queue.put(None) # Sentinel to signal completion
99+
100+
async def _consumer():
101+
"""Yields items from the queue in sync mode."""
102+
task = loop.create_task(_producer())
103+
while True:
104+
item = await queue.get()
105+
if item is None:
106+
break
107+
yield item
108+
await task # Ensure producer task completes
109+
110+
return iter(loop.run_until_complete(self._collect_results()))
111+
112+
async def _collect_results(self):
113+
"""Collects async generator results into a list (safe for asyncio.run)."""
114+
return [item async for item in self.async_gen]
115+
116+
def __iter__(self) -> Iterator:
117+
"""Returns the synchronous iterator."""
118+
return self.iterator
119+
120+
def __next__(self) -> Any:
121+
"""Returns the next item from the iterator."""
122+
return next(self.iterator)

src/source_msgraph/client.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from msgraph import GraphServiceClient
2+
from kiota_abstractions.base_request_configuration import RequestConfiguration
3+
from msgraph.generated.models.o_data_errors.o_data_error import ODataError
4+
from azure.identity import ClientSecretCredential
5+
from source_msgraph.async_interator import AsyncToSyncIterator, AsyncToSyncIteratorV2
6+
from source_msgraph.models import ConnectorOptions
7+
from source_msgraph.utils import get_python_schema, to_json, to_pyspark_schema
8+
9+
class GraphClient:
10+
def __init__(self, options: ConnectorOptions):
11+
"""
12+
Initializes the fetcher with the Graph client, resource path, and query parameters.
13+
14+
15+
:param options: Connector options.
16+
"""
17+
credentials = ClientSecretCredential(options.tenant_id, options.client_id, options.client_secret)
18+
self.graph_client = GraphServiceClient(credentials=credentials)
19+
self.options: ConnectorOptions = options
20+
21+
22+
async def fetch_data(self):
23+
"""
24+
Fetches data from Microsoft Graph using the dynamically built request.
25+
Handles pagination automatically.
26+
"""
27+
query_parameters_cls = self.options.resource.get_query_parameters_cls()
28+
29+
if query_parameters_cls:
30+
try:
31+
query_parameters_instance = query_parameters_cls() # Ensure it can be instantiated without arguments
32+
except TypeError as e:
33+
raise ValueError(f"Failed to instantiate {query_parameters_cls.__name__}: {e}")
34+
35+
if self.options.resource.query_params:
36+
for k, v in self.options.resource.query_params.items():
37+
k = k.removeprefix("%24")
38+
if hasattr(query_parameters_instance, k):
39+
setattr(query_parameters_instance, k, v) # Set attributes dynamically
40+
else:
41+
raise AttributeError(f"{query_parameters_cls.__name__} has no attribute '{k}'")
42+
43+
request_configuration = RequestConfiguration(
44+
query_parameters=query_parameters_instance
45+
)
46+
47+
try:
48+
builder = self.options.resource.get_request_builder_cls()(self.graph_client.request_adapter, self.options.resource.resource_params)
49+
items = await builder.get(request_configuration=request_configuration)
50+
while True:
51+
print("Page fetched....")
52+
for item in items.value:
53+
yield item
54+
if not items.odata_next_link:
55+
break
56+
items = await builder.with_url(items.odata_next_link).get()
57+
58+
except ODataError as e:
59+
raise Exception(f"Graph API Error: {e.error.message}")
60+
61+
62+
def iter_records(options: ConnectorOptions):
63+
"""
64+
Iterates over records from the Microsoft Graph API.
65+
66+
:param options: Connector options containing authentication credentials and resource details.
67+
:return: A synchronous iterator over the fetched data.
68+
:raises ValueError: If any required credentials or resource parameters are missing.
69+
:raises GraphAPIError: If the API request fails.
70+
"""
71+
fetcher = GraphClient(options)
72+
async_gen = fetcher.fetch_data()
73+
return AsyncToSyncIterator(async_gen)
74+
75+
import json
76+
from typing import Dict, Any
77+
from dataclasses import asdict
78+
79+
def get_resource_schema(options: ConnectorOptions) -> Dict[str, Any]:
80+
"""
81+
Retrieves the schema of a Microsoft Graph API resource by fetching a single record.
82+
83+
:param options: Connector options containing authentication credentials and resource details.
84+
:return: A dictionary representing the schema of the resource.
85+
:raises ValueError: If no records are found or if required options are missing.
86+
:raises GraphAPIError: If the API request fails.
87+
"""
88+
fetcher = GraphClient(options)
89+
async_gen = fetcher.fetch_data()
90+
91+
try:
92+
record = next(AsyncToSyncIteratorV2(async_gen), None)
93+
if not record:
94+
raise ValueError(f"No records found for resource: {options.resource.resource_name}")
95+
record = to_json(record)
96+
schema = to_pyspark_schema(get_python_schema(record))
97+
return record, schema
98+
99+
except StopIteration:
100+
raise ValueError(f"No records available for {options.resource.resource_name}")
101+
102+
# Example usage
103+
# options = ConnectorOptions(...)
104+
# schema = get_resource_schema(options)
105+
# print(json.dumps(schema, indent=2))

src/source_msgraph/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Base generated package for Microsoft Graph SDK
2+
MSGRAPH_SDK_PACKAGE = "msgraph.generated"
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import os
2+
from urllib.parse import unquote
3+
from source_msgraph.models import BaseResource
4+
from source_msgraph.resources import RESOURCE_CONFIGS
5+
6+
def generate_markdown(resource: BaseResource) -> str:
7+
"""
8+
Generates sophisticated markdown documentation for a given BaseResource.
9+
"""
10+
md_content = [f"# {resource.name.capitalize()} Resource", ""]
11+
md_content.append(f"**Resource Name:** `{resource.name.lower()}`")
12+
13+
14+
md_content.append("\n## Overview")
15+
md_content.append(f"The `{resource.name}` resource provides a structured way to interact with Microsoft Graph API.")
16+
md_content.append("This resource supports operations such as retrieval and filtering of data.")
17+
18+
md_content.append("\n## Resource Parameters")
19+
if len(resource.resource_params.keys()) > 0:
20+
md_content.append("| Parameter | Type | Required | Description |")
21+
md_content.append("|-----------|------|----------|-------------|")
22+
for param in resource.resource_params or {}:
23+
md_content.append(f"| `{unquote(param)}` | `str` | ✅ | Required path parameter for resource access. |")
24+
else:
25+
md_content.append(f"> No parameters required for `{resource.name.lower()}` resource.")
26+
27+
28+
md_content.append("\n## Query Parameters")
29+
if len(resource.query_params.keys()) > 0:
30+
md_content.append("| Parameter | Type | Required | Description |")
31+
md_content.append("|-----------|------|----------|-------------|")
32+
for param in resource.query_params or {}:
33+
md_content.append(f"| `{unquote(param)}` | `str` | ❌ | Optional query parameter to refine the API request. |")
34+
else:
35+
md_content.append(f">> No query parameters are required for `{resource.name.lower()}` resource.")
36+
37+
md_content.append("---")
38+
39+
md_content.append("Tip: Please refer [Microsoft Graph API]() documentation if you don't see a field. This can be resolved by provising `expand` option.")
40+
41+
md_content.append("\n## Example Usage")
42+
md_content.append("```python")
43+
md_content.append("from source_msgraph.source import MSGraphDataSource")
44+
md_content.append("spark.dataSource.register(MSGraphDataSource)")
45+
md_content.append("")
46+
md_content.append("# Read data using Microsoft Graph")
47+
md_content.append("df = spark.read.format(\"msgraph\") ")
48+
md_content.append(" .option(\"tenant_id\", tenant_id)")
49+
md_content.append(" .option(\"client_id\", client_id)")
50+
md_content.append(" .option(\"client_secret\", client_secret)")
51+
md_content.append(f" .option(\"resource\", \"{resource.name}\")")
52+
for param in resource.resource_params or {}:
53+
md_content.append(f" .option(\"{param}\", \"<value>\")")
54+
for param in resource.query_params or {}:
55+
md_content.append(f" .option(\"{param}\", \"<value>\")")
56+
md_content.append(" .schema(\"id string, eTag string\")")
57+
md_content.append(" .load()")
58+
md_content.append("")
59+
md_content.append("df.show()")
60+
md_content.append("```")
61+
62+
return "\n".join(md_content)
63+
64+
def generate_docs(output_dir: str = "docs"):
65+
"""
66+
Generates sophisticated markdown documentation for all configured resources.
67+
"""
68+
os.makedirs(output_dir, exist_ok=True)
69+
70+
for config in RESOURCE_CONFIGS:
71+
resource = BaseResource(
72+
name=config["name"],
73+
resource_name=config["resource_name"],
74+
request_builder_module=config["request_builder_module"]
75+
)
76+
77+
md_content = generate_markdown(resource)
78+
file_path = os.path.join(output_dir, f"{resource.name}.md")
79+
with open(file_path, "w", encoding="utf-8") as f:
80+
f.write(md_content)
81+
print(f"Generated documentation: {file_path}")
82+
83+
if __name__ == "__main__":
84+
generate_docs()

src/source_msgraph/graph.py

Lines changed: 0 additions & 84 deletions
This file was deleted.

0 commit comments

Comments
 (0)