Skip to content

Commit 0edebbe

Browse files
author
Baz
authored
fix: (CDK) (HttpRequester) - Make the HttpRequester.path optional (#370)
1 parent 40e5002 commit 0edebbe

File tree

12 files changed

+227
-45
lines changed

12 files changed

+227
-45
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,7 +1794,6 @@ definitions:
17941794
type: object
17951795
required:
17961796
- type
1797-
- path
17981797
- url_base
17991798
properties:
18001799
type:
@@ -1806,9 +1805,18 @@ definitions:
18061805
type: string
18071806
interpolation_context:
18081807
- config
1808+
- next_page_token
1809+
- stream_interval
1810+
- stream_partition
1811+
- stream_slice
1812+
- creation_response
1813+
- polling_response
1814+
- download_target
18091815
examples:
18101816
- "https://connect.squareup.com/v2"
1811-
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api/"
1817+
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
1818+
- "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
1819+
- "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
18121820
path:
18131821
title: URL Path
18141822
description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,7 @@ class MinMaxDatetime(BaseModel):
939939
)
940940
datetime_format: Optional[str] = Field(
941941
"",
942-
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
942+
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
943943
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
944944
title="Datetime Format",
945945
)
@@ -1545,7 +1545,7 @@ class DatetimeBasedCursor(BaseModel):
15451545
)
15461546
datetime_format: str = Field(
15471547
...,
1548-
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
1548+
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
15491549
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"],
15501550
title="Outgoing Datetime Format",
15511551
)
@@ -2072,12 +2072,14 @@ class HttpRequester(BaseModel):
20722072
description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
20732073
examples=[
20742074
"https://connect.squareup.com/v2",
2075-
"{{ config['base_url'] or 'https://app.posthog.com'}}/api/",
2075+
"{{ config['base_url'] or 'https://app.posthog.com'}}/api",
2076+
"https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups",
2077+
"https://example.com/api/v1/resource/{{ next_page_token['id'] }}",
20762078
],
20772079
title="API Base URL",
20782080
)
2079-
path: str = Field(
2080-
...,
2081+
path: Optional[str] = Field(
2082+
None,
20812083
description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
20822084
examples=[
20832085
"/products",

airbyte_cdk/sources/declarative/requesters/http_requester.py

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
from airbyte_cdk.sources.streams.call_rate import APIBudget
2626
from airbyte_cdk.sources.streams.http import HttpClient
2727
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
28-
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
29-
from airbyte_cdk.utils.mapping_helpers import combine_mappings
28+
from airbyte_cdk.sources.types import Config, EmptyString, StreamSlice, StreamState
29+
from airbyte_cdk.utils.mapping_helpers import combine_mappings, get_interpolation_context
3030

3131

3232
@dataclass
@@ -49,9 +49,10 @@ class HttpRequester(Requester):
4949

5050
name: str
5151
url_base: Union[InterpolatedString, str]
52-
path: Union[InterpolatedString, str]
5352
config: Config
5453
parameters: InitVar[Mapping[str, Any]]
54+
55+
path: Optional[Union[InterpolatedString, str]] = None
5556
authenticator: Optional[DeclarativeAuthenticator] = None
5657
http_method: Union[str, HttpMethod] = HttpMethod.GET
5758
request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
@@ -66,7 +67,9 @@ class HttpRequester(Requester):
6667

6768
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
6869
self._url_base = InterpolatedString.create(self.url_base, parameters=parameters)
69-
self._path = InterpolatedString.create(self.path, parameters=parameters)
70+
self._path = InterpolatedString.create(
71+
self.path if self.path else EmptyString, parameters=parameters
72+
)
7073
if self.request_options_provider is None:
7174
self._request_options_provider = InterpolatedRequestOptionsProvider(
7275
config=self.config, parameters=parameters
@@ -112,27 +115,33 @@ def exit_on_rate_limit(self, value: bool) -> None:
112115
def get_authenticator(self) -> DeclarativeAuthenticator:
113116
return self._authenticator
114117

115-
def get_url_base(self) -> str:
116-
return os.path.join(self._url_base.eval(self.config), "")
118+
def get_url_base(
119+
self,
120+
*,
121+
stream_state: Optional[StreamState] = None,
122+
stream_slice: Optional[StreamSlice] = None,
123+
next_page_token: Optional[Mapping[str, Any]] = None,
124+
) -> str:
125+
interpolation_context = get_interpolation_context(
126+
stream_state=stream_state,
127+
stream_slice=stream_slice,
128+
next_page_token=next_page_token,
129+
)
130+
return os.path.join(self._url_base.eval(self.config, **interpolation_context), EmptyString)
117131

118132
def get_path(
119133
self,
120134
*,
121-
stream_state: Optional[StreamState],
122-
stream_slice: Optional[StreamSlice],
123-
next_page_token: Optional[Mapping[str, Any]],
135+
stream_state: Optional[StreamState] = None,
136+
stream_slice: Optional[StreamSlice] = None,
137+
next_page_token: Optional[Mapping[str, Any]] = None,
124138
) -> str:
125-
kwargs = {
126-
"stream_slice": stream_slice,
127-
"next_page_token": next_page_token,
128-
# update the interpolation context with extra fields, if passed.
129-
**(
130-
stream_slice.extra_fields
131-
if stream_slice is not None and hasattr(stream_slice, "extra_fields")
132-
else {}
133-
),
134-
}
135-
path = str(self._path.eval(self.config, **kwargs))
139+
interpolation_context = get_interpolation_context(
140+
stream_state=stream_state,
141+
stream_slice=stream_slice,
142+
next_page_token=next_page_token,
143+
)
144+
path = str(self._path.eval(self.config, **interpolation_context))
136145
return path.lstrip("/")
137146

138147
def get_method(self) -> HttpMethod:
@@ -330,7 +339,20 @@ def _request_body_json(
330339

331340
@classmethod
332341
def _join_url(cls, url_base: str, path: str) -> str:
333-
return urljoin(url_base, path)
342+
"""
343+
Joins a base URL with a given path and returns the resulting URL with any trailing slash removed.
344+
345+
This method ensures that there are no duplicate slashes when concatenating the base URL and the path,
346+
which is useful when the full URL is provided from an interpolation context.
347+
348+
Args:
349+
url_base (str): The base URL to which the path will be appended.
350+
path (str): The path to join with the base URL.
351+
352+
Returns:
353+
str: The concatenated URL with the trailing slash (if any) removed.
354+
"""
355+
return urljoin(url_base, path).rstrip("/")
334356

335357
def send_request(
336358
self,
@@ -347,7 +369,11 @@ def send_request(
347369
request, response = self._http_client.send_request(
348370
http_method=self.get_method().value,
349371
url=self._join_url(
350-
self.get_url_base(),
372+
self.get_url_base(
373+
stream_state=stream_state,
374+
stream_slice=stream_slice,
375+
next_page_token=next_page_token,
376+
),
351377
path
352378
or self.get_path(
353379
stream_state=stream_state,

airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
2626
from airbyte_cdk.utils.mapping_helpers import (
2727
_validate_component_request_option_paths,
28+
get_interpolation_context,
2829
)
2930

3031

@@ -150,11 +151,22 @@ def next_page_token(
150151
else:
151152
return None
152153

153-
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
154+
def path(
155+
self,
156+
next_page_token: Optional[Mapping[str, Any]],
157+
stream_state: Optional[Mapping[str, Any]] = None,
158+
stream_slice: Optional[StreamSlice] = None,
159+
) -> Optional[str]:
154160
token = next_page_token.get("next_page_token") if next_page_token else None
155161
if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
162+
# make additional interpolation context
163+
interpolation_context = get_interpolation_context(
164+
stream_state=stream_state,
165+
stream_slice=stream_slice,
166+
next_page_token=next_page_token,
167+
)
156168
# Replace url base to only return the path
157-
return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
169+
return str(token).replace(self.url_base.eval(self.config, **interpolation_context), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
158170
else:
159171
return None
160172

@@ -258,8 +270,17 @@ def next_page_token(
258270
response, last_page_size, last_record, last_page_token_value
259271
)
260272

261-
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
262-
return self._decorated.path(next_page_token)
273+
def path(
274+
self,
275+
next_page_token: Optional[Mapping[str, Any]],
276+
stream_state: Optional[Mapping[str, Any]] = None,
277+
stream_slice: Optional[StreamSlice] = None,
278+
) -> Optional[str]:
279+
return self._decorated.path(
280+
next_page_token=next_page_token,
281+
stream_state=stream_state,
282+
stream_slice=stream_slice,
283+
)
263284

264285
def get_request_params(
265286
self,

airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ class NoPagination(Paginator):
1919

2020
parameters: InitVar[Mapping[str, Any]]
2121

22-
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
22+
def path(
23+
self,
24+
next_page_token: Optional[Mapping[str, Any]],
25+
stream_state: Optional[Mapping[str, Any]] = None,
26+
stream_slice: Optional[StreamSlice] = None,
27+
) -> Optional[str]:
2328
return None
2429

2530
def get_request_params(

airbyte_cdk/sources/declarative/requesters/paginators/paginator.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
1212
RequestOptionsProvider,
1313
)
14-
from airbyte_cdk.sources.types import Record
14+
from airbyte_cdk.sources.types import Record, StreamSlice
1515

1616

1717
@dataclass
@@ -49,7 +49,12 @@ def next_page_token(
4949
pass
5050

5151
@abstractmethod
52-
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
52+
def path(
53+
self,
54+
next_page_token: Optional[Mapping[str, Any]],
55+
stream_state: Optional[Mapping[str, Any]] = None,
56+
stream_slice: Optional[StreamSlice] = None,
57+
) -> Optional[str]:
5358
"""
5459
Returns the URL path to hit to fetch the next page of records
5560

airbyte_cdk/sources/declarative/requesters/requester.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,13 @@ def get_authenticator(self) -> DeclarativeAuthenticator:
3535
pass
3636

3737
@abstractmethod
38-
def get_url_base(self) -> str:
38+
def get_url_base(
39+
self,
40+
*,
41+
stream_state: Optional[StreamState],
42+
stream_slice: Optional[StreamSlice],
43+
next_page_token: Optional[Mapping[str, Any]],
44+
) -> str:
3945
"""
4046
:return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/"
4147
"""

0 commit comments

Comments
 (0)