⚡️ Speed up method JiraDataSource.get_issue_all_types by 25%
#541
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 25% (0.25x) speedup for
JiraDataSource.get_issue_all_typesinbackend/python/app/sources/external/jira/jira.py⏱️ Runtime :
2.74 milliseconds→2.20 milliseconds(best of247runs)📝 Explanation and details
The optimized code achieves a 24% runtime improvement and 1.6% throughput increase through strategic elimination of redundant object allocations and computations.
Key Optimizations:
Module-level singleton for empty dictionaries: Introduced
_EMPTY_STR_DICTto replace repeated_as_str_dict({})calls forpath_paramsandquery_params, which are always empty in this Jira API endpoint. This eliminates 1,726 unnecessary dict comprehensions per execution.Early return in
_as_str_dict: Added a fast path that immediately returns the shared empty dict when input is empty, avoiding the costly comprehension{str(k): _serialize_value(v) for k, v in d.items()}for empty cases.Optimized URL formatting: Enhanced
_safe_format_urlto skip the expensive.format_map(_SafeDict(params))operation when no parameters need formatting, directly returning the template string.Performance Impact Analysis:
_as_str_dicttime dropped from 2.68ms to 1.67ms (-38%)_safe_format_urltime reduced from 1.01ms to 0.29ms (-71%)Workload Benefits:
These optimizations are especially effective for:
The changes maintain full backward compatibility while providing consistent performance gains across all test scenarios, from basic single calls to high-concurrency loads of 200+ requests.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import asyncio # used to run async functions
import pytest # used for our unit tests
from app.sources.external.jira.jira import JiraDataSource
Mocks and helpers for HTTPRequest/HTTPResponse and JiraClient
class DummyHTTPResponse:
"""A dummy HTTPResponse for testing."""
def init(self, status_code=200, json_data=None, text_data=None):
self.status_code = status_code
self._json_data = json_data if json_data is not None else {}
self.text = text_data if text_data is not None else ""
def json(self):
return self._json_data
class DummyHTTPClient:
"""A dummy async HTTP client that returns a DummyHTTPResponse."""
def init(self, base_url="https://dummy.atlassian.net"):
self._base_url = base_url
self.execute_calls = []
self.should_raise = None # If set, raise this exception on execute
self.response = DummyHTTPResponse() # Default response
class DummyJiraClient:
"""A dummy JiraClient wrapper."""
def init(self, client):
self.client = client
def get_client(self):
return self.client
from app.sources.external.jira.jira import JiraDataSource
Dummy HTTPRequest for compatibility
class HTTPRequest:
def init(self, method, url, headers, path_params, query_params, body):
self.method = method
self.url = url
self.headers = headers
self.path_params = path_params
self.query_params = query_params
self.body = body
---- UNIT TESTS ----
1. Basic Test Cases
@pytest.mark.asyncio
async def test_get_issue_all_types_basic_success():
"""Test basic async/await behavior and correct response."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
# Await the async function and check response type
resp = await ds.get_issue_all_types()
@pytest.mark.asyncio
async def test_get_issue_all_types_with_headers():
"""Test passing custom headers and ensure they are used."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
headers = {"X-Test-Header": "test-value"}
await ds.get_issue_all_types(headers=headers)
# The last execute call should have the correct headers
last_req, _ = dummy_client.execute_calls[-1]
@pytest.mark.asyncio
async def test_get_issue_all_types_empty_headers():
"""Test passing empty headers dict."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
resp = await ds.get_issue_all_types(headers={})
@pytest.mark.asyncio
async def test_get_issue_all_types_none_headers():
"""Test passing None for headers (default behavior)."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
resp = await ds.get_issue_all_types(headers=None)
2. Edge Test Cases
@pytest.mark.asyncio
async def test_get_issue_all_types_client_not_initialized():
"""Test ValueError if HTTP client is not initialized."""
class BadJiraClient:
def get_client(self): return None
with pytest.raises(ValueError, match="HTTP client is not initialized"):
JiraDataSource(BadJiraClient())
@pytest.mark.asyncio
async def test_get_issue_all_types_client_missing_get_base_url():
"""Test ValueError if HTTP client lacks get_base_url method."""
class NoBaseURLClient:
pass
class NoBaseURLJiraClient:
def get_client(self): return NoBaseURLClient()
with pytest.raises(ValueError, match="HTTP client does not have get_base_url method"):
JiraDataSource(NoBaseURLJiraClient())
@pytest.mark.asyncio
async def test_get_issue_all_types_client_execute_raises():
"""Test that exceptions from execute propagate."""
dummy_client = DummyHTTPClient()
dummy_client.should_raise = RuntimeError("Execute failed")
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
with pytest.raises(RuntimeError, match="Execute failed"):
await ds.get_issue_all_types()
@pytest.mark.asyncio
async def test_get_issue_all_types_concurrent_execution():
"""Test concurrent async calls to the function."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
# Run 5 concurrent requests
tasks = [ds.get_issue_all_types(headers={"X-Req": i}) for i in range(5)]
results = await asyncio.gather(*tasks)
# Check that each request had the correct header value
for idx, (req, _) in enumerate(dummy_client.execute_calls):
pass
@pytest.mark.asyncio
async def test_get_issue_all_types_special_header_values():
"""Test headers with unusual values (bool, int, list, None)."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
headers = {
"Bool-True": True,
"Bool-False": False,
"Int": 42,
"List": [1, 2, 3],
"None": None,
}
await ds.get_issue_all_types(headers=headers)
last_req, _ = dummy_client.execute_calls[-1]
3. Large Scale Test Cases
@pytest.mark.asyncio
async def test_get_issue_all_types_many_concurrent_requests():
"""Test scalability with many concurrent requests."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
num_requests = 100 # Avoid exceeding 1000
tasks = [ds.get_issue_all_types(headers={"X-Req": i}) for i in range(num_requests)]
results = await asyncio.gather(*tasks)
# Check that all requests have unique header values
for idx, (req, _) in enumerate(dummy_client.execute_calls):
pass
@pytest.mark.asyncio
async def test_get_issue_all_types_large_headers_dict():
"""Test with a large number of headers."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
large_headers = {f"Header-{i}": i for i in range(200)} # 200 headers
resp = await ds.get_issue_all_types(headers=large_headers)
last_req, _ = dummy_client.execute_calls[-1]
# All headers should be present and stringified
for i in range(200):
pass
4. Throughput Test Cases
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_small_load():
"""Throughput test: small concurrent load."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
tasks = [ds.get_issue_all_types() for _ in range(10)]
results = await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_medium_load():
"""Throughput test: medium concurrent load."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
tasks = [ds.get_issue_all_types(headers={"Load": i}) for i in range(50)]
results = await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_high_load():
"""Throughput test: high concurrent load, but <1000."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
tasks = [ds.get_issue_all_types(headers={"Load": i}) for i in range(200)]
results = await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_sustained_pattern():
"""Throughput test: sustained rapid calls in batches."""
dummy_client = DummyHTTPClient()
dummy_jira_client = DummyJiraClient(dummy_client)
ds = JiraDataSource(dummy_jira_client)
batch_size = 20
batches = 5
for batch in range(batches):
tasks = [ds.get_issue_all_types(headers={"Batch": batch, "Req": i}) for i in range(batch_size)]
results = await asyncio.gather(*tasks)
# Check that headers are correct for this batch
for idx, (req, _) in enumerate(dummy_client.execute_calls[-batch_size:]):
pass
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import asyncio # used to run async functions
Mocks and helpers for testing
from typing import Any, Dict, Optional
import pytest # used for our unit tests
from app.sources.external.jira.jira import JiraDataSource
---- Minimal stubs for HTTPRequest/HTTPResponse ----
class HTTPRequest:
def init(self, method, url, headers, path_params, query_params, body):
self.method = method
self.url = url
self.headers = headers
self.path_params = path_params
self.query_params = query_params
self.body = body
class HTTPResponse:
def init(self, data):
self.data = data
---- Minimal stub for HTTP client ----
class MockHTTPClient:
def init(self, base_url='https://mockjira.atlassian.net', should_raise=False, response_data=None):
self._base_url = base_url
self.should_raise = should_raise
self.response_data = response_data if response_data is not None else {"types": ["Bug", "Task", "Story"]}
---- Minimal stub for JiraClient ----
class JiraClient:
def init(self, client):
self.client = client
from app.sources.external.jira.jira import JiraDataSource
---- UNIT TESTS ----
1. BASIC TEST CASES
@pytest.mark.asyncio
async def test_get_issue_all_types_basic_returns_expected_response():
"""Test that function returns expected HTTPResponse with default mock data."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
resp = await ds.get_issue_all_types()
@pytest.mark.asyncio
async def test_get_issue_all_types_with_custom_headers():
"""Test that custom headers are accepted and passed to HTTPRequest."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
custom_headers = {"X-Test": "value", "Accept": "application/json"}
resp = await ds.get_issue_all_types(headers=custom_headers)
@pytest.mark.asyncio
async def test_get_issue_all_types_empty_headers_dict():
"""Test that empty headers dict does not cause errors."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
resp = await ds.get_issue_all_types(headers={})
2. EDGE TEST CASES
@pytest.mark.asyncio
async def test_get_issue_all_types_client_none_raises_value_error():
"""Test that ValueError is raised if client is None during init."""
class DummyJiraClient:
def get_client(self):
return None
with pytest.raises(ValueError, match='HTTP client is not initialized'):
JiraDataSource(DummyJiraClient())
@pytest.mark.asyncio
async def test_get_issue_all_types_client_missing_get_base_url_raises_value_error():
"""Test that ValueError is raised if client does not have get_base_url."""
class NoBaseUrlClient:
pass
jira_client = JiraClient(NoBaseUrlClient())
with pytest.raises(ValueError, match='HTTP client does not have get_base_url method'):
JiraDataSource(jira_client)
@pytest.mark.asyncio
async def test_get_issue_all_types_execute_raises_runtime_error():
"""Test that an exception in HTTPClient.execute is propagated."""
mock_client = MockHTTPClient(should_raise=True)
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
with pytest.raises(RuntimeError, match='Mock HTTP error'):
await ds.get_issue_all_types()
@pytest.mark.asyncio
async def test_get_issue_all_types_concurrent_execution():
"""Test concurrent calls to get_issue_all_types with asyncio.gather."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
# Run 5 concurrent calls
results = await asyncio.gather(
*(ds.get_issue_all_types() for _ in range(5))
)
@pytest.mark.asyncio
async def test_get_issue_all_types_with_varied_response_data():
"""Test with client returning different response data."""
response_data = {"types": ["Epic", "Sub-task"]}
mock_client = MockHTTPClient(response_data=response_data)
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
resp = await ds.get_issue_all_types()
3. LARGE SCALE TEST CASES
@pytest.mark.asyncio
async def test_get_issue_all_types_large_concurrent_load():
"""Test function under moderate concurrent load (50 calls)."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
# Run 50 concurrent calls
results = await asyncio.gather(
*(ds.get_issue_all_types() for _ in range(50))
)
@pytest.mark.asyncio
async def test_get_issue_all_types_large_headers():
"""Test with a large number of headers."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
large_headers = {f"X-Header-{i}": f"value-{i}" for i in range(100)}
resp = await ds.get_issue_all_types(headers=large_headers)
4. THROUGHPUT TEST CASES
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_small_load():
"""Throughput test: small load (10 calls)."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
results = await asyncio.gather(
*(ds.get_issue_all_types() for _ in range(10))
)
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_medium_load():
"""Throughput test: medium load (100 calls)."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
results = await asyncio.gather(
*(ds.get_issue_all_types() for _ in range(100))
)
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_varied_data():
"""Throughput test: varied response data per call."""
datas = [
{"types": [f"Type-{i}"]} for i in range(20)
]
clients = [MockHTTPClient(response_data=data) for data in datas]
sources = [JiraDataSource(JiraClient(client)) for client in clients]
results = await asyncio.gather(
*(ds.get_issue_all_types() for ds in sources)
)
for i, resp in enumerate(results):
pass
@pytest.mark.asyncio
async def test_get_issue_all_types_throughput_high_load():
"""Throughput test: high load (200 calls)."""
mock_client = MockHTTPClient()
jira_client = JiraClient(mock_client)
ds = JiraDataSource(jira_client)
results = await asyncio.gather(
*(ds.get_issue_all_types() for _ in range(200))
)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-JiraDataSource.get_issue_all_types-mhrymngzand push.