From eaa105d3f2f3f510d64e65a1e3538efb640a8971 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Mon, 22 Sep 2025 22:20:33 +0200 Subject: [PATCH 01/27] Initial implementation of using dynamic grid carbon intensity --- config.yml.example | 8 +- docker/structure.sql | 4 +- lib/carbon_intensity.py | 147 +++++++++ lib/phase_stats.py | 122 ++++++- lib/scenario_runner.py | 8 +- lib/user.py | 7 +- .../2025_09_22_dynamic_carbon_intensity.sql | 34 ++ tests/lib/test_carbon_intensity.py | 305 ++++++++++++++++++ tools/phase_stats.py | 2 +- 9 files changed, 618 insertions(+), 19 deletions(-) create mode 100644 lib/carbon_intensity.py create mode 100644 migrations/2025_09_22_dynamic_carbon_intensity.sql create mode 100644 tests/lib/test_carbon_intensity.py diff --git a/config.yml.example b/config.yml.example index bd2aafcdf..90637ee41 100644 --- a/config.yml.example +++ b/config.yml.example @@ -10,6 +10,11 @@ redis: host: green-coding-redis-container port: 6379 +elephant: + host: localhost + port: 8000 + protocol: http + smtp: server: SMTP_SERVER sender: SMTP_SENDER @@ -237,8 +242,7 @@ sci: # The default is the value for a developer machine (Pro Laptop - https://dataviz.boavizta.org/terminalimpact) TE: 181000 # I is the Carbon Intensity at the location of this machine - # The value can either be a number in gCO2e/kWh or a carbon intensity provider that fetches this number dynamically - # https://docs.green-coding.io/docs/measuring/carbon-intensity-providers/carbon-intensity-providers-overview/ (TODO) + # This is a static value in gCO2e/kWh. For dynamic carbon intensity, see frontend user settings (Documentation: https://docs.green-coding.io/docs/measuring/carbon/grid-carbon-intensity/). # For fixed world-wide values get the number from https://ember-climate.org/insights/research/global-electricity-review-2025/ # The number worldwide for 2024 is 473 # The number 334 that comes as default is for Germany from 2024 and comes from https://app.electricitymaps.com/zone/DE/all/yearly diff --git a/docker/structure.sql b/docker/structure.sql index 31c14112c..9ebdb71a9 100644 --- a/docker/structure.sql +++ b/docker/structure.sql @@ -54,7 +54,9 @@ VALUES ( "measurement.post_test_sleep", "measurement.phase_transition_time", "measurement.wait_time_dependencies", - "measurement.skip_volume_inspect" + "measurement.skip_volume_inspect", + "measurement.use_dynamic_carbon_intensity", + "measurement.carbon_intensity_location" ] }, "api": { diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py new file mode 100644 index 000000000..ec8bec3d5 --- /dev/null +++ b/lib/carbon_intensity.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import requests +from datetime import datetime +from typing import List, Dict, Any +from lib import error_helpers +from lib.global_config import GlobalConfig + + +class CarbonIntensityServiceError(Exception): + """Raised when carbon intensity service request fails.""" + +class CarbonIntensityDataError(Exception): + """Raised when carbon intensity service returns invalid data.""" + +class CarbonIntensityClient: + def __init__(self, base_url: str = None): + """ + Initialize carbon intensity client for Elephant service. + + Args: + base_url: Base URL of the Elephant service. If None, reads from config.yml + """ + if base_url is None: + config = GlobalConfig().config + elephant_config = config.get('elephant', {}) + protocol = elephant_config.get('protocol', 'http') + host = elephant_config.get('host', 'localhost') + port = elephant_config.get('port', 8000) + base_url = f"{protocol}://{host}:{port}" + + self.base_url = base_url.rstrip('/') + + def get_carbon_intensity_history(self, location: str, start_time: str, end_time: str) -> List[Dict[str, Any]]: + """ + Fetch carbon intensity history from Elephant service. + + Args: + location: Location code (e.g., "DE", "ES-IB-MA") + start_time: Start time in ISO 8601 format (e.g., "2025-09-22T10:50:00Z") + end_time: End time in ISO 8601 format (e.g., "2025-09-22T10:55:00Z") + + Returns: + List of carbon intensity data points: + [{"location": "DE", "time": "2025-09-22T10:00:00Z", "carbon_intensity": 185.0}, ...] + + Raises: + Exception: On any service error, network issue, or invalid response + """ + url = f"{self.base_url}/carbon-intensity/history" + params = { + 'location': location, + 'startTime': start_time, + 'endTime': end_time, + 'interpolate': 'true' + } + + try: + response = requests.get(url, params=params, timeout=30) + response.raise_for_status() + + data = response.json() + + if not isinstance(data, list): + raise ValueError(f"Expected list response from carbon intensity service, got {type(data)}") + + for item in data: + if not all(key in item for key in ['location', 'time', 'carbon_intensity']): + raise ValueError(f"Invalid carbon intensity data format: missing required fields in {item}") + + return data + + except requests.exceptions.RequestException as e: + error_helpers.log_error(f"Carbon intensity service request failed: {e}") + raise CarbonIntensityServiceError(f"Failed to fetch carbon intensity data: {e}") from e + except (ValueError, KeyError) as e: + error_helpers.log_error(f"Invalid carbon intensity service response: {e}") + raise CarbonIntensityDataError(f"Invalid response from carbon intensity service: {e}") from e + + +def microseconds_to_iso8601(timestamp_us: int) -> str: + """ + Convert microsecond timestamp to ISO 8601 format. + + Args: + timestamp_us: Timestamp in microseconds since epoch + + Returns: + ISO 8601 formatted timestamp string (e.g., "2025-09-22T10:50:00Z") + """ + timestamp_seconds = timestamp_us / 1_000_000 + dt = datetime.utcfromtimestamp(timestamp_seconds) + return dt.strftime('%Y-%m-%dT%H:%M:%SZ') + + +def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: + """ + Interpolate carbon intensity value for a specific timestamp. + + Args: + timestamp_us: Target timestamp in microseconds + carbon_data: List of carbon intensity data points from service + + Returns: + Interpolated carbon intensity value in gCO2e/kWh + + Raises: + ValueError: If carbon_data is empty or timestamp is outside range + """ + if not carbon_data: + raise ValueError("No carbon intensity data available for interpolation") + + target_time = datetime.utcfromtimestamp(timestamp_us / 1_000_000).replace(tzinfo=None) + + # Convert carbon data times to datetime objects for comparison + data_points = [] + for item in carbon_data: + item_time = datetime.fromisoformat(item['time'].replace('Z', '+00:00')).replace(tzinfo=None) + data_points.append((item_time, float(item['carbon_intensity']))) + + # Sort by time + data_points.sort(key=lambda x: x[0]) + + # Check if target is before first or after last data point + if target_time <= data_points[0][0]: + return data_points[0][1] + if target_time >= data_points[-1][0]: + return data_points[-1][1] + + # Find surrounding data points for interpolation + for i in range(len(data_points) - 1): + time1, value1 = data_points[i] + time2, value2 = data_points[i + 1] + + if time1 <= target_time <= time2: + # Linear interpolation + time_diff = (time2 - time1).total_seconds() + if time_diff == 0: + return value1 + + target_diff = (target_time - time1).total_seconds() + ratio = target_diff / time_diff + + return value1 + (value2 - value1) * ratio + + raise ValueError(f"Could not interpolate carbon intensity for timestamp {target_time}") diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 4421bd975..a46515692 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -10,6 +10,13 @@ from lib.db import DB from lib import error_helpers +from lib.carbon_intensity import ( + CarbonIntensityClient, + CarbonIntensityServiceError, + CarbonIntensityDataError, + microseconds_to_iso8601, + interpolate_carbon_intensity +) def reconstruct_runtime_phase(run_id, runtime_phase_idx): # First we create averages for all types. This includes means and totals @@ -66,12 +73,71 @@ def generate_csv_line(run_id, metric, detail_name, phase_name, value, value_type # else '' resolves to NULL return f"{run_id},{metric},{detail_name},{phase_name},{round(value)},{value_type},{round(max_value) if max_value is not None else ''},{round(min_value) if min_value is not None else ''},{round(sampling_rate_avg) if sampling_rate_avg is not None else ''},{round(sampling_rate_max) if sampling_rate_max is not None else ''},{round(sampling_rate_95p) if sampling_rate_95p is not None else ''},{unit},NOW()\n" -def build_and_store_phase_stats(run_id, sci=None): +def get_carbon_intensity_for_timestamp(timestamp_us, sci, carbon_intensity_data): + """ + Get carbon intensity value for a specific timestamp. + Uses dynamic data if available, otherwise falls back to static value. + + Args: + timestamp_us: Timestamp in microseconds + sci: SCI configuration dict with static 'I' value + carbon_intensity_data: Dynamic carbon intensity data (None for static mode) + + Returns: + Carbon intensity value in gCO2e/kWh + + Raises: + ValueError: If no carbon intensity data is available + """ + if carbon_intensity_data: + # Dynamic mode: interpolate from time series data + return interpolate_carbon_intensity(timestamp_us, carbon_intensity_data) + else: + # Static mode: use configured value + if sci.get('I') is None: + raise ValueError("No carbon intensity value available (static 'I' value missing)") + return Decimal(sci['I']) + + +def build_and_store_phase_stats(run_id, sci=None, measurement_config=None): if not sci: sci = {} + if not measurement_config: + measurement_config = {} software_carbon_intensity_global = {} + # Check for dynamic carbon intensity configuration + capabilities = measurement_config.get('capabilities', {}) + measurement_capabilities = capabilities.get('measurement', {}) + use_dynamic_carbon_intensity = measurement_capabilities.get('use_dynamic_carbon_intensity', False) + carbon_intensity_location = measurement_capabilities.get('carbon_intensity_location') + + # For dynamic carbon intensity, fetch time series data + carbon_intensity_data = None + if use_dynamic_carbon_intensity: + if not carbon_intensity_location: + raise ValueError("carbon_intensity_location is required when use_dynamic_carbon_intensity is True") + + # Get run start and end times + run_query = """ + SELECT start_measurement, end_measurement + FROM runs + WHERE id = %s + """ + run_data = DB().fetch_one(run_query, (run_id,)) + if not run_data or not run_data[0] or not run_data[1]: + raise ValueError(f"Run {run_id} does not have valid start_measurement and end_measurement times") + + start_time_us, end_time_us = run_data + start_time_iso = microseconds_to_iso8601(start_time_us) + end_time_iso = microseconds_to_iso8601(end_time_us) + + carbon_client = CarbonIntensityClient() + carbon_intensity_data = carbon_client.get_carbon_intensity_history( + carbon_intensity_location, start_time_iso, end_time_iso + ) + query = """ SELECT id, metric, unit, detail_name, sampling_rate_configured FROM measurement_metrics @@ -239,13 +305,29 @@ def build_and_store_phase_stats(run_id, sci=None): power_min = (min_value * 10**3) / (duration / value_count) csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_power_')}", detail_name, f"{idx:03}_{phase['name']}", power_avg, 'MEAN', power_max, power_min, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'mW')) - if sci.get('I', None) is not None: - value_carbon_ug = (value_sum / 3_600_000) * Decimal(sci['I']) - - csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, f"{idx:03}_{phase['name']}", value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) - - if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count ... needs refactor ... see comment at beginning of file - software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug + # Calculate carbon values (static or dynamic) + try: + if use_dynamic_carbon_intensity and carbon_intensity_data: + # Dynamic carbon intensity: calculate based on measurement timing + # For simplicity, use phase midpoint timestamp for now + # TODO: Implement proper time-weighted carbon calculation per measurement + phase_midpoint_us = (phase['start'] + phase['end']) // 2 + carbon_intensity_value = Decimal(get_carbon_intensity_for_timestamp(phase_midpoint_us, sci, carbon_intensity_data)) + elif sci.get('I', None) is not None: + # Static carbon intensity + carbon_intensity_value = Decimal(sci['I']) + else: + carbon_intensity_value = None + + if carbon_intensity_value is not None: + value_carbon_ug = (value_sum / 3_600_000) * carbon_intensity_value + csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, f"{idx:03}_{phase['name']}", value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) + + if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count ... needs refactor ... see comment at beginning of file + software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug + except (CarbonIntensityServiceError, CarbonIntensityDataError, ValueError) as e: + error_helpers.log_error(f"Failed to calculate carbon intensity for energy metric: {e}", run_id=run_id) + # Continue processing without carbon calculation if metric.endswith('_machine'): @@ -262,7 +344,11 @@ def build_and_store_phase_stats(run_id, sci=None): # after going through detail metrics, create cumulated ones if network_bytes_total: - if sci.get('N', None) is not None and sci.get('I', None) is not None: + # Check if we can calculate network energy and carbon + has_network_factor = sci.get('N', None) is not None + has_carbon_intensity = (use_dynamic_carbon_intensity and carbon_intensity_data) or sci.get('I', None) is not None + + if has_network_factor and has_carbon_intensity: # build the network energy by using a formula: https://www.green-coding.io/co2-formulas/ # pylint: disable=invalid-name network_io_in_kWh = Decimal(sum(network_bytes_total)) / 1_000_000_000 * Decimal(sci['N']) @@ -273,9 +359,21 @@ def build_and_store_phase_stats(run_id, sci=None): network_io_power_in_mW = (network_io_in_kWh * Decimal('3600000') / Decimal(duration_in_s) * Decimal('1000')) csv_buffer.write(generate_csv_line(run_id, 'network_power_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_power_in_mW, 'TOTAL', None, None, None, None, None, 'mW')) - # co2 calculations - network_io_carbon_in_ug = network_io_in_kWh * Decimal(sci['I']) * 1_000_000 - csv_buffer.write(generate_csv_line(run_id, 'network_carbon_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_carbon_in_ug, 'TOTAL', None, None, None, None, None, 'ug')) + # co2 calculations (static or dynamic) + try: + if use_dynamic_carbon_intensity and carbon_intensity_data: + # Dynamic carbon intensity for network + phase_midpoint_us = (phase['start'] + phase['end']) // 2 + carbon_intensity_value = Decimal(get_carbon_intensity_for_timestamp(phase_midpoint_us, sci, carbon_intensity_data)) + else: + # Static carbon intensity + carbon_intensity_value = Decimal(sci['I']) + + network_io_carbon_in_ug = network_io_in_kWh * carbon_intensity_value * 1_000_000 + csv_buffer.write(generate_csv_line(run_id, 'network_carbon_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_carbon_in_ug, 'TOTAL', None, None, None, None, None, 'ug')) + except Exception as e: # pylint: disable=broad-except + error_helpers.log_error(f"Failed to calculate network carbon intensity: {e}", run_id=run_id) + network_io_carbon_in_ug = 0 else: error_helpers.log_error('Cannot calculate the total network energy consumption. SCI values I and N are missing in the config.', run_id=run_id) network_io_carbon_in_ug = 0 diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index d5d21d661..39aca26c8 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -2079,7 +2079,13 @@ def _process_phase_stats(self): # get all the metrics from the measurements table grouped by metric # loop over them issuing separate queries to the DB from tools.phase_stats import build_and_store_phase_stats # pylint: disable=import-outside-toplevel - build_and_store_phase_stats(self._run_id, self._sci) + + # Get measurement_config from database to support dynamic carbon intensity + measurement_config_query = "SELECT measurement_config FROM runs WHERE id = %s" + measurement_config_result = DB().fetch_one(measurement_config_query, (self._run_id,)) + measurement_config = measurement_config_result[0] if measurement_config_result else {} + + build_and_store_phase_stats(self._run_id, self._sci, measurement_config) def _post_process(self, index): try: diff --git a/lib/user.py b/lib/user.py index 28fc3ec27..4e98205b9 100644 --- a/lib/user.py +++ b/lib/user.py @@ -1,6 +1,5 @@ import json import hashlib -import uuid from lib.secure_variable import SecureVariable from lib.db import DB @@ -65,7 +64,7 @@ def change_setting(self, name, value): raise ValueError(f"You cannot change this setting: {name}") match name: - case 'measurement.dev_no_optimizations' | 'measurement.dev_no_sleeps' | 'measurement.phase_padding' | 'measurement.skip_volume_inspect': + case 'measurement.dev_no_optimizations' | 'measurement.dev_no_sleeps' | 'measurement.phase_padding' | 'measurement.skip_volume_inspect' | 'measurement.use_dynamic_carbon_intensity': if not isinstance(value, bool): raise ValueError(f'The setting {name} must be boolean') case 'measurement.flow_process_duration' | 'measurement.total_duration': @@ -86,6 +85,10 @@ def change_setting(self, name, value): if not (isinstance(value, int) or value.isdigit()) or int(value) <= 0 or int(value) > 86400: raise ValueError(f'The setting {name} must be between 1 and 86400') value = int(value) + case 'measurement.carbon_intensity_location': + if not isinstance(value, str) or not value.strip(): + raise ValueError(f'The setting {name} must be a non-empty string (electricity grid zone code)') + value = value.strip() case _: raise ValueError(f'The setting {name} is unknown') diff --git a/migrations/2025_09_22_dynamic_carbon_intensity.sql b/migrations/2025_09_22_dynamic_carbon_intensity.sql new file mode 100644 index 000000000..772306cb6 --- /dev/null +++ b/migrations/2025_09_22_dynamic_carbon_intensity.sql @@ -0,0 +1,34 @@ +-- Migration: Add dynamic carbon intensity capabilities to existing users +-- Date: 2025-09-22 +-- Description: Adds measurement.use_dynamic_carbon_intensity and measurement.carbon_intensity_location +-- to user updateable_settings and sets default values + +-- Add new settings to updateable_settings for all users (excluding system user 0) +UPDATE users +SET capabilities = jsonb_set( + capabilities, + '{user,updateable_settings}', + ( + COALESCE(capabilities->'user'->'updateable_settings', '[]'::jsonb) || + '["measurement.use_dynamic_carbon_intensity", "measurement.carbon_intensity_location"]'::jsonb + ), + true +) WHERE id != 0; + +-- Set default value for use_dynamic_carbon_intensity (disabled by default) +UPDATE users +SET capabilities = jsonb_set( + capabilities, + '{measurement,use_dynamic_carbon_intensity}', + 'false', + true +) WHERE id != 0; + +-- Set default value for carbon_intensity_location (empty string, will be validated when dynamic is enabled) +UPDATE users +SET capabilities = jsonb_set( + capabilities, + '{measurement,carbon_intensity_location}', + '""', + true +) WHERE id != 0; diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py new file mode 100644 index 000000000..fac20ccb7 --- /dev/null +++ b/tests/lib/test_carbon_intensity.py @@ -0,0 +1,305 @@ +import calendar +import os +import pytest +import requests +from unittest.mock import Mock, patch +from datetime import datetime +from decimal import Decimal + +GMT_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))+'/../../' + +from tests import test_functions as Tests +from lib.db import DB +from lib.carbon_intensity import ( + CarbonIntensityClient, + CarbonIntensityServiceError, + CarbonIntensityDataError, + microseconds_to_iso8601, + interpolate_carbon_intensity +) +from lib.phase_stats import build_and_store_phase_stats, get_carbon_intensity_for_timestamp + + +class TestCarbonIntensityClient: + + @patch('lib.carbon_intensity.GlobalConfig') + def test_config_based_initialization(self, mock_global_config): + # Test that client reads URL from config when not provided + mock_config = Mock() + mock_config.config = { + 'elephant': { + 'protocol': 'https', + 'host': 'example.com', + 'port': 9000 + } + } + mock_global_config.return_value = mock_config + + client = CarbonIntensityClient() + assert client.base_url == "https://example.com:9000" + + @patch('lib.carbon_intensity.GlobalConfig') + def test_config_based_initialization_defaults(self, mock_global_config): + # Test that client uses defaults when config is empty + mock_config = Mock() + mock_config.config = {} + mock_global_config.return_value = mock_config + + client = CarbonIntensityClient() + assert client.base_url == "http://localhost:8000" + + def test_microseconds_to_iso8601(self): + # Test timestamp conversion + timestamp_us = 1727003400000000 # Some timestamp + result = microseconds_to_iso8601(timestamp_us) + # Just verify format is correct ISO 8601 + assert len(result) == 20 + assert result.endswith('Z') + assert 'T' in result + # Verify it's a valid timestamp that can be parsed back + parsed = datetime.fromisoformat(result.replace('Z', '+00:00')) + assert parsed is not None + + def test_interpolate_carbon_intensity_single_point(self): + # Test with single data point + carbon_data = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} + ] + timestamp_us = 1727003400000000 # 2024-09-22T10:50:00Z + result = interpolate_carbon_intensity(timestamp_us, carbon_data) + assert result == 185.0 + + def test_interpolate_carbon_intensity_between_points(self): + # Test interpolation between two points + carbon_data = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 180.0}, + {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 200.0} + ] + # Calculate correct timestamp for 10:30:00 UTC + mid_time = datetime(2024, 9, 22, 10, 30, 0) # UTC time + timestamp_us = int(calendar.timegm(mid_time.timetuple()) * 1_000_000) + + result = interpolate_carbon_intensity(timestamp_us, carbon_data) + assert result == 190.0 # Linear interpolation: 180 + (200-180) * 0.5 + + def test_interpolate_carbon_intensity_before_range(self): + # Test with timestamp before data range + carbon_data = [ + {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 185.0} + ] + timestamp_us = 1727001600000000 # 2024-09-22T10:20:00Z (before 11:00) + result = interpolate_carbon_intensity(timestamp_us, carbon_data) + assert result == 185.0 # Should return first value + + def test_interpolate_carbon_intensity_after_range(self): + # Test with timestamp after data range + carbon_data = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} + ] + timestamp_us = 1727007000000000 # 2024-09-22T11:50:00Z (after 10:00) + result = interpolate_carbon_intensity(timestamp_us, carbon_data) + assert result == 185.0 # Should return last value + + def test_interpolate_carbon_intensity_empty_data(self): + # Test with empty data + with pytest.raises(ValueError, match="No carbon intensity data available"): + interpolate_carbon_intensity(1727003400000000, []) + + @patch('lib.carbon_intensity.requests.get') + def test_carbon_intensity_client_success(self, mock_get): + # Test successful API call + mock_response = Mock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0}, + {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 183.0} + ] + mock_get.return_value = mock_response + + client = CarbonIntensityClient("http://localhost:8000") + result = client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") + + assert len(result) == 2 + assert result[0]['carbon_intensity'] == 185.0 + assert result[1]['carbon_intensity'] == 183.0 + + mock_get.assert_called_once_with( + "http://localhost:8000/carbon-intensity/history", + params={ + 'location': 'DE', + 'startTime': '2024-09-22T10:50:00Z', + 'endTime': '2024-09-22T10:55:00Z', + 'interpolate': 'true' + }, + timeout=30 + ) + + @patch('lib.carbon_intensity.requests.get') + def test_carbon_intensity_client_network_error(self, mock_get): + # Test network error handling + mock_get.side_effect = requests.exceptions.RequestException("Network error") + + client = CarbonIntensityClient("http://localhost:8000") + with pytest.raises(CarbonIntensityServiceError, match="Failed to fetch carbon intensity data"): + client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") + + @patch('lib.carbon_intensity.requests.get') + def test_carbon_intensity_client_invalid_response(self, mock_get): + # Test invalid response handling + mock_response = Mock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = {"invalid": "response"} + mock_get.return_value = mock_response + + client = CarbonIntensityClient("http://localhost:8000") + with pytest.raises(CarbonIntensityDataError, match="Invalid response from carbon intensity service"): + client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") + + +class TestGetCarbonIntensityForTimestamp: + + def test_static_mode_with_value(self): + # Test static mode with I value + sci = {'I': 334} + result = get_carbon_intensity_for_timestamp(1727003400000000, sci, None) + assert result == Decimal('334') + + def test_static_mode_missing_value(self): + # Test static mode without I value + sci = {} + with pytest.raises(ValueError, match="No carbon intensity value available"): + get_carbon_intensity_for_timestamp(1727003400000000, sci, None) + + def test_dynamic_mode(self): + # Test dynamic mode with carbon data + sci = {'I': 334} # Should be ignored in dynamic mode + carbon_data = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} + ] + result = get_carbon_intensity_for_timestamp(1727003400000000, sci, carbon_data) + assert result == 185.0 + + +class TestDynamicCarbonIntensityPhaseStats: + + @patch('lib.phase_stats.CarbonIntensityClient') + def test_dynamic_carbon_intensity_integration(self, mock_client_class): + # Test full integration with dynamic carbon intensity + run_id = Tests.insert_run() + Tests.import_machine_energy(run_id) + + # Add measurement start/end times to the run + DB().query( + "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", + (Tests.TEST_MEASUREMENT_START_TIME, Tests.TEST_MEASUREMENT_END_TIME, run_id) + ) + + # Mock the carbon intensity client + mock_client = Mock() + mock_client.get_carbon_intensity_history.return_value = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 200.0}, + {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 180.0} + ] + mock_client_class.return_value = mock_client + + # Test configuration with dynamic carbon intensity enabled + sci = {'I': 334, 'N': 0.04106063} # Static I should be ignored + measurement_config = { + 'capabilities': { + 'measurement': { + 'use_dynamic_carbon_intensity': True, + 'carbon_intensity_location': 'DE' + } + } + } + + build_and_store_phase_stats(run_id, sci, measurement_config) + + # Verify the carbon intensity client was called + mock_client.get_carbon_intensity_history.assert_called_once() + args = mock_client.get_carbon_intensity_history.call_args[0] + assert args[0] == 'DE' # location + # args[1] and args[2] are start/end times in ISO format + + # Check that carbon stats were generated + carbon_data = DB().fetch_all( + 'SELECT metric, value FROM phase_stats WHERE metric LIKE %s AND phase = %s', + params=('%carbon%', '004_[RUNTIME]'), + fetch_mode='dict' + ) + + assert len(carbon_data) > 0 + # Should have carbon data calculated with dynamic intensity (not static 334) + + def test_static_carbon_intensity_fallback(self): + # Test fallback to static carbon intensity when dynamic is disabled + run_id = Tests.insert_run() + Tests.import_machine_energy(run_id) + + sci = {'I': 334} + measurement_config = { + 'capabilities': { + 'measurement': { + 'use_dynamic_carbon_intensity': False + } + } + } + + build_and_store_phase_stats(run_id, sci, measurement_config) + + # Check that carbon stats were generated with static intensity + carbon_data = DB().fetch_all( + 'SELECT metric, value FROM phase_stats WHERE metric LIKE %s AND phase = %s', + params=('%carbon%', '004_[RUNTIME]'), + fetch_mode='dict' + ) + + assert len(carbon_data) > 0 + + def test_missing_location_error(self): + # Test error when location is missing for dynamic mode + run_id = Tests.insert_run() + Tests.import_machine_energy(run_id) + + sci = {'I': 334} + measurement_config = { + 'capabilities': { + 'measurement': { + 'use_dynamic_carbon_intensity': True + # Missing carbon_intensity_location + } + } + } + + with pytest.raises(ValueError, match="carbon_intensity_location is required"): + build_and_store_phase_stats(run_id, sci, measurement_config) + + @patch('lib.phase_stats.CarbonIntensityClient') + def test_service_error_propagation(self, mock_client_class): + # Test that service errors are properly propagated + run_id = Tests.insert_run() + Tests.import_machine_energy(run_id) + + # Add measurement start/end times to the run + DB().query( + "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", + (Tests.TEST_MEASUREMENT_START_TIME, Tests.TEST_MEASUREMENT_END_TIME, run_id) + ) + + # Mock the client to raise an exception + mock_client = Mock() + mock_client.get_carbon_intensity_history.side_effect = CarbonIntensityServiceError("Service unavailable") + mock_client_class.return_value = mock_client + + sci = {'I': 334} + measurement_config = { + 'capabilities': { + 'measurement': { + 'use_dynamic_carbon_intensity': True, + 'carbon_intensity_location': 'DE' + } + } + } + + with pytest.raises(CarbonIntensityServiceError, match="Service unavailable"): + build_and_store_phase_stats(run_id, sci, measurement_config) diff --git a/tools/phase_stats.py b/tools/phase_stats.py index 30b135adf..658f778ac 100644 --- a/tools/phase_stats.py +++ b/tools/phase_stats.py @@ -26,4 +26,4 @@ ''' data = DB().fetch_one(query, params=(args.run_id, ), fetch_mode='dict') - build_and_store_phase_stats(args.run_id, data['measurement_config']['sci']) + build_and_store_phase_stats(args.run_id, data['measurement_config']['sci'], data['measurement_config']) From b793d97b2a9e0f57c4c06709cd35097f01f1d059 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Tue, 23 Sep 2025 17:24:56 +0200 Subject: [PATCH 02/27] Improve dynamic grid intensity implementation --- config.yml.example | 5 - docker/structure.sql | 6 +- frontend/js/helpers/config.js.example | 10 + frontend/js/settings.js | 13 +- frontend/settings.html | 14 + lib/carbon_intensity.py | 223 +++++++++++++++- lib/job/run.py | 2 + lib/phase_stats.py | 132 +++------- lib/scenario_runner.py | 42 ++- lib/user.py | 4 +- .../2025_09_22_dynamic_carbon_intensity.sql | 18 +- runner.py | 4 + tests/frontend/test_frontend.py | 12 + tests/lib/test_carbon_intensity.py | 245 ++++++++---------- tests/lib/test_phase_stats.py | 4 + tests/test_functions.py | 9 + tools/phase_stats.py | 2 +- 17 files changed, 476 insertions(+), 269 deletions(-) diff --git a/config.yml.example b/config.yml.example index 90637ee41..1bcdda8ac 100644 --- a/config.yml.example +++ b/config.yml.example @@ -256,11 +256,6 @@ sci: # ignore: # - example_optimization_test -# In order to get the carbon intensity we use electricity maps which requires a token. -# You can get this under https://api-portal.electricitymaps.com/ -# This is a free service please note that you need to pay if you want to use this commercially! -#electricity_maps_token: '123' - # Modules API / Frontend # GMT can selectively activate some API and frontend components. This is asked in the install process and should NOT # only be changed here as files in different locations are changed too. Please re-run the install process. diff --git a/docker/structure.sql b/docker/structure.sql index 9ebdb71a9..dd05c9d89 100644 --- a/docker/structure.sql +++ b/docker/structure.sql @@ -55,8 +55,8 @@ VALUES ( "measurement.phase_transition_time", "measurement.wait_time_dependencies", "measurement.skip_volume_inspect", - "measurement.use_dynamic_carbon_intensity", - "measurement.carbon_intensity_location" + "measurement.use_dynamic_grid_carbon_intensity", + "measurement.grid_carbon_intensity_location" ] }, "api": { @@ -117,6 +117,8 @@ VALUES ( }, "machines": [1], "measurement": { + "use_dynamic_grid_carbon_intensity": false, + "grid_carbon_intensity_location": "DE", "phase_padding": true, "quotas": {}, "dev_no_sleeps": false, diff --git a/frontend/js/helpers/config.js.example b/frontend/js/helpers/config.js.example index b62a99280..df9976a95 100644 --- a/frontend/js/helpers/config.js.example +++ b/frontend/js/helpers/config.js.example @@ -621,5 +621,15 @@ METRIC_MAPPINGS = { "clean_name": "Total System Disk Writes", "source": "cgroup", "explanation": "Total data written to disk for the system via cgroup" + }, + "grid_carbon_intensity_static": { + "clean_name": "Grid Carbon Intensity", + "source": "Config (Static)", + "explanation": "Static grid carbon intensity used to calculate the carbon emissions" + }, + "grid_carbon_intensity_dynamic": { + "clean_name": "Grid Carbon Intensity", + "source": "External Provider (Dynamic)", + "explanation": "Dynamic grid carbon intensity during the run retrieved from external carbon intensity provider" } } // PLEASE DO NOT REMOVE THIS COMMENT -- END METRIC_MAPPINGS diff --git a/frontend/js/settings.js b/frontend/js/settings.js index 0bc8c69ed..81a605c3c 100644 --- a/frontend/js/settings.js +++ b/frontend/js/settings.js @@ -23,10 +23,15 @@ const getSettings = async () => { try { const data = await makeAPICall('/v1/user/settings'); - if (data?.data?._capabilities?.measurement?.dev_no_optimizations === true) document.querySelector('#measurement-dev-no-optimizations').checked = true; - if (data?.data?._capabilities?.measurement?.dev_no_sleeps === true) document.querySelector('#measurement-dev-no-sleeps').checked = true; - if (data?.data?._capabilities?.measurement?.phase_padding === true) document.querySelector('#measurement-phase-padding').checked = true; - if (data?.data?._capabilities?.measurement?.skip_volume_inspect === true) document.querySelector('#measurement-skip-volume-inspect').checked = true; + // Checkboxes + document.querySelector('#measurement-dev-no-optimizations').checked = data?.data?._capabilities?.measurement?.dev_no_optimizations === true; + document.querySelector('#measurement-dev-no-sleeps').checked = data?.data?._capabilities?.measurement?.dev_no_sleeps === true; + document.querySelector('#measurement-phase-padding').checked = data?.data?._capabilities?.measurement?.phase_padding === true; + document.querySelector('#measurement-skip-volume-inspect').checked = data?.data?._capabilities?.measurement?.skip_volume_inspect === true; + document.querySelector('#measurement-use-dynamic-grid-carbon-intensity').checked = data?.data?._capabilities?.measurement?.use_dynamic_grid_carbon_intensity === true; + + // Text + document.querySelector('#measurement-grid-carbon-intensity-location').value = data?.data?._capabilities?.measurement?.grid_carbon_intensity_location; document.querySelector('#measurement-flow-process-duration').value = data?.data?._capabilities?.measurement?.flow_process_duration; document.querySelector('#measurement-total-duration').value = data?.data?._capabilities?.measurement?.total_duration; $('#measurement-disabled-metric-providers').dropdown('set exactly', data?.data?._capabilities?.measurement?.disabled_metric_providers); diff --git a/frontend/settings.html b/frontend/settings.html index 60fd3e9f4..4a680bc86 100644 --- a/frontend/settings.html +++ b/frontend/settings.html @@ -95,6 +95,20 @@

Determines if the live dynamic grid carbon intensity is for the calculation of carbon emissions or a static value + + + + + + Grid location
+ Only relevant if dynamic grid carbon intensity is used. Location is relevant to get the live grid carbon intensity. Value needs to be a valid grid zone code. See
ElectricityMaps for valid zone codes. + + + + System check threshold
Can be 1=INFO, 2=WARN or 3=ERROR. When set on 3 runs will fail only on erros, when 2 then also on warnings and 1 also on pure info statements. diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index ec8bec3d5..e0a83b4e0 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -2,10 +2,12 @@ # -*- coding: utf-8 -*- import requests -from datetime import datetime +from datetime import datetime, timezone from typing import List, Dict, Any +from io import StringIO from lib import error_helpers from lib.global_config import GlobalConfig +from lib.db import DB class CarbonIntensityServiceError(Exception): @@ -72,26 +74,37 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: return data except requests.exceptions.RequestException as e: - error_helpers.log_error(f"Carbon intensity service request failed: {e}") raise CarbonIntensityServiceError(f"Failed to fetch carbon intensity data: {e}") from e except (ValueError, KeyError) as e: - error_helpers.log_error(f"Invalid carbon intensity service response: {e}") raise CarbonIntensityDataError(f"Invalid response from carbon intensity service: {e}") from e -def microseconds_to_iso8601(timestamp_us: int) -> str: + +def get_carbon_intensity_data_for_run(run_id): """ - Convert microsecond timestamp to ISO 8601 format. + Get carbon intensity data for a run, automatically detecting dynamic vs static. Args: - timestamp_us: Timestamp in microseconds since epoch + run_id: UUID of the run Returns: - ISO 8601 formatted timestamp string (e.g., "2025-09-22T10:50:00Z") + List of carbon intensity data points or None if no data found """ - timestamp_seconds = timestamp_us / 1_000_000 - dt = datetime.utcfromtimestamp(timestamp_seconds) - return dt.strftime('%Y-%m-%dT%H:%M:%SZ') + # Auto-detect what carbon intensity data is available for this run + # Check for both static and dynamic carbon intensity + query = """ + SELECT metric, detail_name + FROM measurement_metrics + WHERE run_id = %s AND metric IN ('grid_carbon_intensity_static', 'grid_carbon_intensity_dynamic') + LIMIT 1 + """ + result = DB().fetch_one(query, (run_id,)) + + if result: + metric, detail_name = result + return _get_stored_carbon_intensity_data(run_id, metric, detail_name) + + return None def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: @@ -111,7 +124,7 @@ def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, if not carbon_data: raise ValueError("No carbon intensity data available for interpolation") - target_time = datetime.utcfromtimestamp(timestamp_us / 1_000_000).replace(tzinfo=None) + target_time = datetime.fromtimestamp(timestamp_us / 1_000_000, timezone.utc).replace(tzinfo=None) # Convert carbon data times to datetime objects for comparison data_points = [] @@ -145,3 +158,191 @@ def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, return value1 + (value2 - value1) * ratio raise ValueError(f"Could not interpolate carbon intensity for timestamp {target_time}") + + +def _microseconds_to_iso8601(timestamp_us: int) -> str: + """ + Convert microsecond timestamp to ISO 8601 format. + + Args: + timestamp_us: Timestamp in microseconds since epoch + + Returns: + ISO 8601 formatted timestamp string (e.g., "2025-09-22T10:50:00Z") + """ + timestamp_seconds = timestamp_us / 1_000_000 + dt = datetime.fromtimestamp(timestamp_seconds, timezone.utc) + return dt.strftime('%Y-%m-%dT%H:%M:%SZ') + + +def store_static_carbon_intensity(run_id, static_value): + """ + Store static carbon intensity value as a constant time series. + + Args: + run_id: UUID of the run + static_value: Static carbon intensity value from config (gCO2e/kWh) + """ + # Get run start and end times + run_query = """ + SELECT start_measurement, end_measurement + FROM runs + WHERE id = %s + """ + run_data = DB().fetch_one(run_query, (run_id,)) + if not run_data or not run_data[0] or not run_data[1]: + error_helpers.log_error(f"Run {run_id} does not have valid start_measurement and end_measurement times", run_id=run_id) + return + + start_time_us, end_time_us = run_data + + # Create measurement_metric entry for static carbon intensity + metric_name = 'grid_carbon_intensity_static' + detail_name = '[CONFIG]' + unit = 'gCO2e/kWh' + sampling_rate_configured = 0 # Static value has no sampling rate + + measurement_metric_id = DB().fetch_one(''' + INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) + VALUES (%s, %s, %s, %s, %s) + RETURNING id + ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + + # Convert static value to integer (multiply by 1000 for precision) + carbon_intensity_value = int(float(static_value) * 1000) + + # Store as constant time series: same value at start and end times + values_data = [ + f"{measurement_metric_id},{carbon_intensity_value},{start_time_us}", + f"{measurement_metric_id},{carbon_intensity_value},{end_time_us}" + ] + + csv_data = '\n'.join(values_data) + f = StringIO(csv_data) + DB().copy_from( + file=f, + table='measurement_values', + columns=['measurement_metric_id', 'value', 'time'], + sep=',' + ) + f.close() + + print(f"Stored static carbon intensity value {static_value} gCO2e/kWh as constant time series") + + +def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): + """ + Store dynamic carbon intensity data from API as time series. + + Args: + run_id: UUID of the run + grid_carbon_intensity_location: Location code (e.g., "DE", "ES-IB-MA") + """ + # Get run start and end times + run_query = """ + SELECT start_measurement, end_measurement + FROM runs + WHERE id = %s + """ + run_data = DB().fetch_one(run_query, (run_id,)) + if not run_data or not run_data[0] or not run_data[1]: + error_helpers.log_error(f"Run {run_id} does not have valid start_measurement and end_measurement times", run_id=run_id) + return + + start_time_us, end_time_us = run_data + start_time_iso = _microseconds_to_iso8601(start_time_us) + end_time_iso = _microseconds_to_iso8601(end_time_us) + + # Fetch carbon intensity data + carbon_client = CarbonIntensityClient() + carbon_intensity_data = carbon_client.get_carbon_intensity_history( + grid_carbon_intensity_location, start_time_iso, end_time_iso + ) + + if not carbon_intensity_data: + error_helpers.log_error("No carbon intensity data received from service", run_id=run_id) + return + + # Create measurement_metric entry for dynamic carbon intensity + metric_name = 'grid_carbon_intensity_dynamic' + detail_name = grid_carbon_intensity_location + unit = 'gCO2e/kWh' + # Estimate sampling rate as 5 minutes (300000ms) based on typical grid data frequency + sampling_rate_configured = 300000 + + measurement_metric_id = DB().fetch_one(''' + INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) + VALUES (%s, %s, %s, %s, %s) + RETURNING id + ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + + # Prepare measurement values for bulk insert + values_data = [] + for data_point in carbon_intensity_data: + # Convert ISO timestamp to microseconds + iso_time = data_point['time'] + dt = datetime.fromisoformat(iso_time.replace('Z', '+00:00')) + timestamp_us = int(dt.timestamp() * 1_000_000) + + # Convert carbon intensity to integer (multiply by 1000 for precision) + carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) + + values_data.append(f"{measurement_metric_id},{carbon_intensity_value},{timestamp_us}") + + if values_data: + # Bulk insert measurement values + csv_data = '\n'.join(values_data) + f = StringIO(csv_data) + DB().copy_from( + file=f, + table='measurement_values', + columns=['measurement_metric_id', 'value', 'time'], + sep=',' + ) + f.close() + + print(f"Stored {len(values_data)} dynamic carbon intensity data points for location {grid_carbon_intensity_location}") + + +def _get_stored_carbon_intensity_data(run_id, metric_name, detail_name): + """ + Retrieve stored carbon intensity data from measurement_metrics for a run. + + Args: + run_id: UUID of the run + metric_name: Either 'grid_carbon_intensity_static' or 'grid_carbon_intensity_dynamic' + detail_name: '[CONFIG]' for static, location code for dynamic (e.g., "DE", "ES-IB-MA") + + Returns: + List of carbon intensity data points or None if no data found + """ + query = """ + SELECT mv.time, mv.value + FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s + AND mm.metric = %s + AND mm.detail_name = %s + ORDER BY mv.time ASC + """ + results = DB().fetch_all(query, (run_id, metric_name, detail_name)) + + if not results: + return None + + # Convert stored data back to the format expected by interpolate_carbon_intensity + carbon_data = [] + for timestamp_us, value_int in results: + # Convert back from integer storage (divide by 1000 to restore decimal precision) + carbon_intensity = float(value_int) / 1000.0 + # Convert timestamp to ISO format for consistency + dt = datetime.fromtimestamp(timestamp_us / 1_000_000, timezone.utc) + iso_time = dt.strftime('%Y-%m-%dT%H:%M:%SZ') + + carbon_data.append({ + 'time': iso_time, + 'carbon_intensity': carbon_intensity, + 'location': detail_name + }) + + return carbon_data diff --git a/lib/job/run.py b/lib/job/run.py index bc9b11183..1399bf08d 100644 --- a/lib/job/run.py +++ b/lib/job/run.py @@ -64,6 +64,8 @@ def _process(self, docker_prune=False, full_docker_prune=False): dev_no_optimizations=user._capabilities['measurement']['dev_no_optimizations'], disabled_metric_providers=user._capabilities['measurement']['disabled_metric_providers'], allowed_run_args=user._capabilities['measurement']['orchestrators']['docker']['allowed_run_args'], # They are specific to the orchestrator. However currently we only have one. As soon as we support more orchestrators we will sub-class Runner with dedicated child classes (DockerRunner, PodmanRunner etc.) + use_dynamic_grid_carbon_intensity=user._capabilities['measurement']['use_dynamic_grid_carbon_intensity'], + grid_carbon_intensity_location=user._capabilities['measurement']['grid_carbon_intensity_location'], ) diff --git a/lib/phase_stats.py b/lib/phase_stats.py index a46515692..76c564ec8 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -11,11 +11,10 @@ from lib.db import DB from lib import error_helpers from lib.carbon_intensity import ( - CarbonIntensityClient, CarbonIntensityServiceError, CarbonIntensityDataError, - microseconds_to_iso8601, - interpolate_carbon_intensity + interpolate_carbon_intensity, + get_carbon_intensity_data_for_run ) def reconstruct_runtime_phase(run_id, runtime_phase_idx): @@ -73,75 +72,25 @@ def generate_csv_line(run_id, metric, detail_name, phase_name, value, value_type # else '' resolves to NULL return f"{run_id},{metric},{detail_name},{phase_name},{round(value)},{value_type},{round(max_value) if max_value is not None else ''},{round(min_value) if min_value is not None else ''},{round(sampling_rate_avg) if sampling_rate_avg is not None else ''},{round(sampling_rate_max) if sampling_rate_max is not None else ''},{round(sampling_rate_95p) if sampling_rate_95p is not None else ''},{unit},NOW()\n" -def get_carbon_intensity_for_timestamp(timestamp_us, sci, carbon_intensity_data): - """ - Get carbon intensity value for a specific timestamp. - Uses dynamic data if available, otherwise falls back to static value. - - Args: - timestamp_us: Timestamp in microseconds - sci: SCI configuration dict with static 'I' value - carbon_intensity_data: Dynamic carbon intensity data (None for static mode) - - Returns: - Carbon intensity value in gCO2e/kWh - Raises: - ValueError: If no carbon intensity data is available - """ - if carbon_intensity_data: - # Dynamic mode: interpolate from time series data - return interpolate_carbon_intensity(timestamp_us, carbon_intensity_data) - else: - # Static mode: use configured value - if sci.get('I') is None: - raise ValueError("No carbon intensity value available (static 'I' value missing)") - return Decimal(sci['I']) -def build_and_store_phase_stats(run_id, sci=None, measurement_config=None): +def build_and_store_phase_stats(run_id, sci=None): if not sci: sci = {} - if not measurement_config: - measurement_config = {} software_carbon_intensity_global = {} - # Check for dynamic carbon intensity configuration - capabilities = measurement_config.get('capabilities', {}) - measurement_capabilities = capabilities.get('measurement', {}) - use_dynamic_carbon_intensity = measurement_capabilities.get('use_dynamic_carbon_intensity', False) - carbon_intensity_location = measurement_capabilities.get('carbon_intensity_location') - - # For dynamic carbon intensity, fetch time series data - carbon_intensity_data = None - if use_dynamic_carbon_intensity: - if not carbon_intensity_location: - raise ValueError("carbon_intensity_location is required when use_dynamic_carbon_intensity is True") - - # Get run start and end times - run_query = """ - SELECT start_measurement, end_measurement - FROM runs - WHERE id = %s - """ - run_data = DB().fetch_one(run_query, (run_id,)) - if not run_data or not run_data[0] or not run_data[1]: - raise ValueError(f"Run {run_id} does not have valid start_measurement and end_measurement times") - - start_time_us, end_time_us = run_data - start_time_iso = microseconds_to_iso8601(start_time_us) - end_time_iso = microseconds_to_iso8601(end_time_us) - - carbon_client = CarbonIntensityClient() - carbon_intensity_data = carbon_client.get_carbon_intensity_history( - carbon_intensity_location, start_time_iso, end_time_iso - ) + # Load carbon intensity time series data once to use as lookup table during energy metric processing. + # This data is used for interpolation at specific timestamps rather than being aggregated like regular metrics. + # Auto-detects whether dynamic (API) or static (config) carbon intensity data is available. + carbon_intensity_data = get_carbon_intensity_data_for_run(run_id) query = """ SELECT id, metric, unit, detail_name, sampling_rate_configured FROM measurement_metrics WHERE run_id = %s + AND metric NOT IN ('grid_carbon_intensity_static', 'grid_carbon_intensity_dynamic') -- Exclude carbon intensity from phase stats processing ORDER BY metric ASC -- we need this ordering for later, when we read again """ metrics = DB().fetch_all(query, (run_id, )) @@ -181,6 +130,16 @@ def build_and_store_phase_stats(run_id, sci=None, measurement_config=None): cpu_utilization_machine = None network_io_carbon_in_ug = None + # Calculate carbon intensity once per phase for reuse in energy and network calculations + phase_carbon_intensity = None + if carbon_intensity_data: + try: + phase_midpoint_us = (phase['start'] + phase['end']) // 2 + phase_carbon_intensity = Decimal(interpolate_carbon_intensity(phase_midpoint_us, carbon_intensity_data)) + except (CarbonIntensityServiceError, CarbonIntensityDataError, ValueError) as e: + error_helpers.log_error(f"Failed to calculate carbon intensity for phase {phase['name']}: {e}", run_id=run_id) + phase_carbon_intensity = None + select_query = """ WITH lag_table as ( SELECT time, value, (time - LAG(time) OVER (ORDER BY time ASC)) AS diff @@ -305,29 +264,15 @@ def build_and_store_phase_stats(run_id, sci=None, measurement_config=None): power_min = (min_value * 10**3) / (duration / value_count) csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_power_')}", detail_name, f"{idx:03}_{phase['name']}", power_avg, 'MEAN', power_max, power_min, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'mW')) - # Calculate carbon values (static or dynamic) - try: - if use_dynamic_carbon_intensity and carbon_intensity_data: - # Dynamic carbon intensity: calculate based on measurement timing - # For simplicity, use phase midpoint timestamp for now - # TODO: Implement proper time-weighted carbon calculation per measurement - phase_midpoint_us = (phase['start'] + phase['end']) // 2 - carbon_intensity_value = Decimal(get_carbon_intensity_for_timestamp(phase_midpoint_us, sci, carbon_intensity_data)) - elif sci.get('I', None) is not None: - # Static carbon intensity - carbon_intensity_value = Decimal(sci['I']) - else: - carbon_intensity_value = None - - if carbon_intensity_value is not None: - value_carbon_ug = (value_sum / 3_600_000) * carbon_intensity_value - csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, f"{idx:03}_{phase['name']}", value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) - - if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count ... needs refactor ... see comment at beginning of file - software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug - except (CarbonIntensityServiceError, CarbonIntensityDataError, ValueError) as e: - error_helpers.log_error(f"Failed to calculate carbon intensity for energy metric: {e}", run_id=run_id) - # Continue processing without carbon calculation + # Calculate carbon values using pre-calculated phase specific carbon intensity + if phase_carbon_intensity is not None: + value_carbon_ug = (value_sum / 3_600_000) * phase_carbon_intensity + csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, f"{idx:03}_{phase['name']}", value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) + + if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count ... needs refactor ... see comment at beginning of file + software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug + else: + error_helpers.log_error(f'Cannot calculate carbon intensity for energy metric {metric}". No carbon intensity data found in measurement_metrics. Configure either dynamic carbon intensity or static SCI value I.', run_id=run_id) if metric.endswith('_machine'): @@ -346,9 +291,8 @@ def build_and_store_phase_stats(run_id, sci=None, measurement_config=None): if network_bytes_total: # Check if we can calculate network energy and carbon has_network_factor = sci.get('N', None) is not None - has_carbon_intensity = (use_dynamic_carbon_intensity and carbon_intensity_data) or sci.get('I', None) is not None - if has_network_factor and has_carbon_intensity: + if has_network_factor: # build the network energy by using a formula: https://www.green-coding.io/co2-formulas/ # pylint: disable=invalid-name network_io_in_kWh = Decimal(sum(network_bytes_total)) / 1_000_000_000 * Decimal(sci['N']) @@ -359,23 +303,15 @@ def build_and_store_phase_stats(run_id, sci=None, measurement_config=None): network_io_power_in_mW = (network_io_in_kWh * Decimal('3600000') / Decimal(duration_in_s) * Decimal('1000')) csv_buffer.write(generate_csv_line(run_id, 'network_power_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_power_in_mW, 'TOTAL', None, None, None, None, None, 'mW')) - # co2 calculations (static or dynamic) - try: - if use_dynamic_carbon_intensity and carbon_intensity_data: - # Dynamic carbon intensity for network - phase_midpoint_us = (phase['start'] + phase['end']) // 2 - carbon_intensity_value = Decimal(get_carbon_intensity_for_timestamp(phase_midpoint_us, sci, carbon_intensity_data)) - else: - # Static carbon intensity - carbon_intensity_value = Decimal(sci['I']) - - network_io_carbon_in_ug = network_io_in_kWh * carbon_intensity_value * 1_000_000 + # co2 calculations using pre-calculated phase specific carbon intensity + if phase_carbon_intensity is not None: + network_io_carbon_in_ug = network_io_in_kWh * phase_carbon_intensity * 1_000_000 csv_buffer.write(generate_csv_line(run_id, 'network_carbon_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_carbon_in_ug, 'TOTAL', None, None, None, None, None, 'ug')) - except Exception as e: # pylint: disable=broad-except - error_helpers.log_error(f"Failed to calculate network carbon intensity: {e}", run_id=run_id) + else: + error_helpers.log_error('Cannot calculate network carbon emissions. No carbon intensity data found in measurement_metrics. Configure either dynamic carbon intensity or static SCI value I.', run_id=run_id) network_io_carbon_in_ug = 0 else: - error_helpers.log_error('Cannot calculate the total network energy consumption. SCI values I and N are missing in the config.', run_id=run_id) + error_helpers.log_error('Cannot calculate the total network energy consumption. SCI value N is missing in the config.', run_id=run_id) network_io_carbon_in_ug = 0 else: network_io_carbon_in_ug = 0 diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index 39aca26c8..6747325a7 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -75,7 +75,7 @@ def __init__(self, skip_volume_inspect=False, commit_hash_folder=None, usage_scenario_variables=None, phase_padding=True, measurement_system_check_threshold=3, measurement_pre_test_sleep=5, measurement_idle_duration=60, measurement_baseline_duration=60, measurement_post_test_sleep=5, measurement_phase_transition_time=1, - measurement_wait_time_dependencies=60): + measurement_wait_time_dependencies=60, use_dynamic_grid_carbon_intensity=False, grid_carbon_intensity_location=None): config = GlobalConfig().config @@ -143,6 +143,8 @@ def __init__(self, self._measurement_post_test_sleep = measurement_post_test_sleep self._measurement_phase_transition_time = measurement_phase_transition_time self._measurement_wait_time_dependencies = measurement_wait_time_dependencies + self._use_dynamic_grid_carbon_intensity = use_dynamic_grid_carbon_intensity + self._grid_carbon_intensity_location = grid_carbon_intensity_location self._last_measurement_duration = 0 self._phase_padding = phase_padding self._phase_padding_ms = max( @@ -161,6 +163,7 @@ def __init__(self, ('_save_notes_runner', {}), ('_save_run_logs', {}), ('_save_warnings', {}), + ('_save_grid_carbon_intensity_metrics', {}), ('_process_phase_stats', {}), ) @@ -635,6 +638,8 @@ def _initialize_run(self): measurement_config['disabled_metric_providers'] = self._disabled_metric_providers measurement_config['sci'] = self._sci measurement_config['phase_padding'] = self._phase_padding_ms + measurement_config['use_dynamic_grid_carbon_intensity'] = self._use_dynamic_grid_carbon_intensity + measurement_config['grid_carbon_intensity_location'] = self._grid_carbon_intensity_location # We issue a fetch_one() instead of a query() here, cause we want to get the RUN_ID self._run_id = DB().fetch_one(""" @@ -2067,6 +2072,34 @@ def _patch_phases(self): if self.__phases.get('[RUNTIME]', None) is not None and self.__phases['[RUNTIME]'].get('end', None) is None: self.__phases['[RUNTIME]']['end'] = int(time.time_ns() / 1_000) + def _save_grid_carbon_intensity_metrics(self): + if not self._run_id or self._dev_no_save: + return + + print(TerminalColors.HEADER, '\nStore grid carbon intensity metrics', TerminalColors.ENDC) + + # pylint: disable=import-outside-toplevel + from lib.carbon_intensity import store_static_carbon_intensity, store_dynamic_carbon_intensity + + try: + if self._use_dynamic_grid_carbon_intensity: + # Store dynamic carbon intensity from API + if self._grid_carbon_intensity_location is None: + error_helpers.log_error("Dynamic grid carbon intensity is enabled, but location is missing! Carbon footprint calculations will be skipped.", run_id=self._run_id) + return + + store_dynamic_carbon_intensity(self._run_id, self._grid_carbon_intensity_location) + elif self._sci['I']: + # Store static carbon intensity from config as constant time series + store_static_carbon_intensity(self._run_id, self._sci['I']) + else: + # No carbon intensity configured - this will prevent carbon calculations + # This is only acceptable if no energy metrics are being collected + error_helpers.log_error("No grid carbon intensity configured. Carbon footprint calculations will be skipped.", run_id=self._run_id) + + except Exception as e: # pylint: disable=broad-except + error_helpers.log_error(f"Unexpected error storing grid carbon intensity metrics: {e}", run_id=self._run_id) + def _process_phase_stats(self): if not self._run_id or self._dev_no_phase_stats or self._dev_no_save: return @@ -2080,12 +2113,7 @@ def _process_phase_stats(self): # loop over them issuing separate queries to the DB from tools.phase_stats import build_and_store_phase_stats # pylint: disable=import-outside-toplevel - # Get measurement_config from database to support dynamic carbon intensity - measurement_config_query = "SELECT measurement_config FROM runs WHERE id = %s" - measurement_config_result = DB().fetch_one(measurement_config_query, (self._run_id,)) - measurement_config = measurement_config_result[0] if measurement_config_result else {} - - build_and_store_phase_stats(self._run_id, self._sci, measurement_config) + build_and_store_phase_stats(self._run_id, self._sci) def _post_process(self, index): try: diff --git a/lib/user.py b/lib/user.py index 4e98205b9..e4040b221 100644 --- a/lib/user.py +++ b/lib/user.py @@ -64,7 +64,7 @@ def change_setting(self, name, value): raise ValueError(f"You cannot change this setting: {name}") match name: - case 'measurement.dev_no_optimizations' | 'measurement.dev_no_sleeps' | 'measurement.phase_padding' | 'measurement.skip_volume_inspect' | 'measurement.use_dynamic_carbon_intensity': + case 'measurement.dev_no_optimizations' | 'measurement.dev_no_sleeps' | 'measurement.phase_padding' | 'measurement.skip_volume_inspect' | 'measurement.use_dynamic_grid_carbon_intensity': if not isinstance(value, bool): raise ValueError(f'The setting {name} must be boolean') case 'measurement.flow_process_duration' | 'measurement.total_duration': @@ -85,7 +85,7 @@ def change_setting(self, name, value): if not (isinstance(value, int) or value.isdigit()) or int(value) <= 0 or int(value) > 86400: raise ValueError(f'The setting {name} must be between 1 and 86400') value = int(value) - case 'measurement.carbon_intensity_location': + case 'measurement.grid_carbon_intensity_location': if not isinstance(value, str) or not value.strip(): raise ValueError(f'The setting {name} must be a non-empty string (electricity grid zone code)') value = value.strip() diff --git a/migrations/2025_09_22_dynamic_carbon_intensity.sql b/migrations/2025_09_22_dynamic_carbon_intensity.sql index 772306cb6..b62fe7c5f 100644 --- a/migrations/2025_09_22_dynamic_carbon_intensity.sql +++ b/migrations/2025_09_22_dynamic_carbon_intensity.sql @@ -1,7 +1,7 @@ --- Migration: Add dynamic carbon intensity capabilities to existing users +-- Migration: Add dynamic carbon intensity capabilities and separate metric names -- Date: 2025-09-22 --- Description: Adds measurement.use_dynamic_carbon_intensity and measurement.carbon_intensity_location --- to user updateable_settings and sets default values +-- Description: Adds measurement.use_dynamic_grid_carbon_intensity and measurement.grid_carbon_intensity_location +-- to user updateable_settings and separates static/dynamic carbon intensity metrics -- Add new settings to updateable_settings for all users (excluding system user 0) UPDATE users @@ -10,25 +10,25 @@ SET capabilities = jsonb_set( '{user,updateable_settings}', ( COALESCE(capabilities->'user'->'updateable_settings', '[]'::jsonb) || - '["measurement.use_dynamic_carbon_intensity", "measurement.carbon_intensity_location"]'::jsonb + '["measurement.use_dynamic_grid_carbon_intensity", "measurement.grid_carbon_intensity_location"]'::jsonb ), true ) WHERE id != 0; --- Set default value for use_dynamic_carbon_intensity (disabled by default) +-- Set default value for use_dynamic_grid_carbon_intensity (disabled by default) UPDATE users SET capabilities = jsonb_set( capabilities, - '{measurement,use_dynamic_carbon_intensity}', + '{measurement,use_dynamic_grid_carbon_intensity}', 'false', true ) WHERE id != 0; --- Set default value for carbon_intensity_location (empty string, will be validated when dynamic is enabled) +-- Set default value for grid_carbon_intensity_location (default to DE) UPDATE users SET capabilities = jsonb_set( capabilities, - '{measurement,carbon_intensity_location}', - '""', + '{measurement,grid_carbon_intensity_location}', + '"DE"', true ) WHERE id != 0; diff --git a/runner.py b/runner.py index 58ef6aaa9..9da27783a 100755 --- a/runner.py +++ b/runner.py @@ -60,6 +60,8 @@ parser.add_argument('--print-logs', action='store_true', help='Prints the container and process logs to stdout') parser.add_argument('--iterations', type=int, default=1, help='Specify how many times each scenario should be run. Default is 1. With multiple files, all files are processed sequentially, then the entire sequence is repeated N times. Example: with files A.yml, B.yml and --iterations 2, the execution order is A, B, A, B.') + parser.add_argument('--use-dynamic-grid-carbon-intensity', action='store_true', help='Use dynamic grid carbon intensity value instead of static value from config') + parser.add_argument('--grid-carbon-intensity-location', type=str, help='Location of the electricity grid (e.g. DE), used for the dynamic carbon intensity calculation') # Measurement settings parser.add_argument('--measurement-system-check-threshold', type=int, default=3, help='System check threshold when to issue warning and when to fail. When set on 3 runs will fail only on erros, when 2 then also on warnings and 1 also on pure info statements. Can be 1=INFO, 2=WARN or 3=ERROR') @@ -158,6 +160,8 @@ docker_prune=args.docker_prune, dev_no_phase_stats=args.dev_no_phase_stats, user_id=args.user_id, skip_volume_inspect=args.skip_volume_inspect, commit_hash_folder=args.commit_hash_folder, usage_scenario_variables=variables_dict, phase_padding=not args.no_phase_padding, + use_dynamic_grid_carbon_intensity=args.use_dynamic_grid_carbon_intensity, + grid_carbon_intensity_location=args.grid_carbon_intensity_location, measurement_system_check_threshold=args.measurement_system_check_threshold, measurement_pre_test_sleep=args.measurement_pre_test_sleep, measurement_idle_duration=args.measurement_idle_duration, diff --git a/tests/frontend/test_frontend.py b/tests/frontend/test_frontend.py index cf0271357..0819dcd16 100644 --- a/tests/frontend/test_frontend.py +++ b/tests/frontend/test_frontend.py @@ -659,6 +659,12 @@ def test_settings_measurement(self): assert user._capabilities['measurement']['skip_volume_inspect'] is False + value = page.locator('#measurement-use-dynamic-grid-carbon-intensity').is_checked() + assert value is user._capabilities['measurement']['use_dynamic_grid_carbon_intensity'] + + value = page.locator('#measurement-grid-carbon-intensity-location').input_value() + assert value.strip() == user._capabilities['measurement']['grid_carbon_intensity_location'] + value = page.locator('#measurement-disabled-metric-providers').input_value() providers = [] if value.strip() == '' else [value.strip()] assert providers == user._capabilities['measurement']['disabled_metric_providers'] @@ -703,6 +709,8 @@ def test_settings_measurement(self): assert value is user._capabilities['measurement']['skip_volume_inspect'] + page.locator('#measurement-use-dynamic-grid-carbon-intensity').click() + page.locator('#measurement-grid-carbon-intensity-location').fill('DE') page.locator('#measurement-system-check-threshold').fill('2') page.evaluate('$("#measurement-disabled-metric-providers").dropdown("set exactly", "NetworkConnectionsProxyContainerProvider");') page.locator('#measurement-flow-process-duration').fill('456') @@ -718,6 +726,8 @@ def test_settings_measurement(self): page.locator('#measurement-dev-no-optimizations').click() page.locator('#measurement-skip-volume-inspect').click() + page.locator('#save-measurement-use-dynamic-grid-carbon-intensity').click() + page.locator('#save-measurement-grid-carbon-intensity-location').click() page.locator('#save-measurement-system-check-threshold').click() page.locator('#save-measurement-disabled-metric-providers').click() page.locator('#save-measurement-flow-process-duration').click() @@ -737,6 +747,8 @@ def test_settings_measurement(self): time.sleep(1) user = User(1) + assert user._capabilities['measurement']['use_dynamic_grid_carbon_intensity'] is True + assert user._capabilities['measurement']['grid_carbon_intensity_location'] == 'DE' assert user._capabilities['measurement']['disabled_metric_providers'] == ['NetworkConnectionsProxyContainerProvider'] assert user._capabilities['measurement']['flow_process_duration'] == 456 assert user._capabilities['measurement']['total_duration'] == 123 diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index fac20ccb7..b8a839af8 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -4,7 +4,6 @@ import requests from unittest.mock import Mock, patch from datetime import datetime -from decimal import Decimal GMT_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))+'/../../' @@ -14,11 +13,12 @@ CarbonIntensityClient, CarbonIntensityServiceError, CarbonIntensityDataError, - microseconds_to_iso8601, - interpolate_carbon_intensity + _microseconds_to_iso8601, + interpolate_carbon_intensity, + get_carbon_intensity_data_for_run, + store_static_carbon_intensity, + store_dynamic_carbon_intensity ) -from lib.phase_stats import build_and_store_phase_stats, get_carbon_intensity_for_timestamp - class TestCarbonIntensityClient: @@ -48,10 +48,10 @@ def test_config_based_initialization_defaults(self, mock_global_config): client = CarbonIntensityClient() assert client.base_url == "http://localhost:8000" - def test_microseconds_to_iso8601(self): + def test__microseconds_to_iso8601(self): # Test timestamp conversion timestamp_us = 1727003400000000 # Some timestamp - result = microseconds_to_iso8601(timestamp_us) + result = _microseconds_to_iso8601(timestamp_us) # Just verify format is correct ISO 8601 assert len(result) == 20 assert result.endswith('Z') @@ -156,150 +156,135 @@ def test_carbon_intensity_client_invalid_response(self, mock_get): client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") -class TestGetCarbonIntensityForTimestamp: +class TestGetCarbonIntensityDataForRun: - def test_static_mode_with_value(self): - # Test static mode with I value - sci = {'I': 334} - result = get_carbon_intensity_for_timestamp(1727003400000000, sci, None) - assert result == Decimal('334') + def test_no_carbon_intensity_data(self): + # Test with run that has no carbon intensity data + run_id = Tests.insert_run() + result = get_carbon_intensity_data_for_run(run_id) + assert result is None - def test_static_mode_missing_value(self): - # Test static mode without I value - sci = {} - with pytest.raises(ValueError, match="No carbon intensity value available"): - get_carbon_intensity_for_timestamp(1727003400000000, sci, None) + def test_with_dynamic_carbon_intensity_data(self): + # Test with run that has dynamic carbon intensity data stored + run_id = Tests.insert_run() - def test_dynamic_mode(self): - # Test dynamic mode with carbon data - sci = {'I': 334} # Should be ignored in dynamic mode - carbon_data = [ - {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} - ] - result = get_carbon_intensity_for_timestamp(1727003400000000, sci, carbon_data) - assert result == 185.0 + # Insert mock carbon intensity metadata into measurement_metrics + # This simulates data that would be stored during a run with dynamic carbon intensity + metric_id = DB().fetch_one( + "INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) VALUES (%s, %s, %s, %s, %s) RETURNING id", + (run_id, 'grid_carbon_intensity_dynamic', 'DE', 'gCO2e/kWh', 1000) + )[0] + + # Insert actual carbon intensity values + DB().query( + "INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES (%s, %s, %s)", + (metric_id, 185, 1727003400000000) + ) + + result = get_carbon_intensity_data_for_run(run_id) + + # Should return the stored carbon intensity data + assert result is not None + assert len(result) > 0 -class TestDynamicCarbonIntensityPhaseStats: +class TestStoreCarbonIntensityAsMetrics: - @patch('lib.phase_stats.CarbonIntensityClient') - def test_dynamic_carbon_intensity_integration(self, mock_client_class): - # Test full integration with dynamic carbon intensity + @pytest.fixture + def run_with_measurement_times(self): + """Fixture that creates a test run with measurement start/end times set.""" run_id = Tests.insert_run() - Tests.import_machine_energy(run_id) - # Add measurement start/end times to the run + # Set measurement times (required for carbon intensity functions) DB().query( "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", (Tests.TEST_MEASUREMENT_START_TIME, Tests.TEST_MEASUREMENT_END_TIME, run_id) ) - # Mock the carbon intensity client - mock_client = Mock() - mock_client.get_carbon_intensity_history.return_value = [ - {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 200.0}, - {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 180.0} - ] - mock_client_class.return_value = mock_client - - # Test configuration with dynamic carbon intensity enabled - sci = {'I': 334, 'N': 0.04106063} # Static I should be ignored - measurement_config = { - 'capabilities': { - 'measurement': { - 'use_dynamic_carbon_intensity': True, - 'carbon_intensity_location': 'DE' - } - } - } + return run_id - build_and_store_phase_stats(run_id, sci, measurement_config) + def test_store_carbon_intensity_static_value(self, run_with_measurement_times): + # Test that static carbon intensity is stored when dynamic is not enabled + run_id = run_with_measurement_times + static_carbon_intensity = 250.5 - # Verify the carbon intensity client was called - mock_client.get_carbon_intensity_history.assert_called_once() - args = mock_client.get_carbon_intensity_history.call_args[0] - assert args[0] == 'DE' # location - # args[1] and args[2] are start/end times in ISO format + # Call the function with static value + store_static_carbon_intensity(run_id, static_carbon_intensity) - # Check that carbon stats were generated - carbon_data = DB().fetch_all( - 'SELECT metric, value FROM phase_stats WHERE metric LIKE %s AND phase = %s', - params=('%carbon%', '004_[RUNTIME]'), - fetch_mode='dict' + # Verify that measurement_metrics entry was created for static carbon intensity + metric_result = DB().fetch_one( + "SELECT metric, detail_name, unit FROM measurement_metrics WHERE run_id = %s", + (run_id,) ) - assert len(carbon_data) > 0 - # Should have carbon data calculated with dynamic intensity (not static 334) - - def test_static_carbon_intensity_fallback(self): - # Test fallback to static carbon intensity when dynamic is disabled - run_id = Tests.insert_run() - Tests.import_machine_energy(run_id) - - sci = {'I': 334} - measurement_config = { - 'capabilities': { - 'measurement': { - 'use_dynamic_carbon_intensity': False - } - } - } - - build_and_store_phase_stats(run_id, sci, measurement_config) - - # Check that carbon stats were generated with static intensity - carbon_data = DB().fetch_all( - 'SELECT metric, value FROM phase_stats WHERE metric LIKE %s AND phase = %s', - params=('%carbon%', '004_[RUNTIME]'), - fetch_mode='dict' + assert metric_result is not None + assert metric_result[0] == 'grid_carbon_intensity_static' + assert metric_result[1] == '[CONFIG]' + assert metric_result[2] == 'gCO2e/kWh' + + # Verify that static value was stored (should have 2 data points: start and end) + values_result = DB().fetch_all( + """SELECT mv.value + FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_static'""", + (run_id,) ) - assert len(carbon_data) > 0 - - def test_missing_location_error(self): - # Test error when location is missing for dynamic mode - run_id = Tests.insert_run() - Tests.import_machine_energy(run_id) - - sci = {'I': 334} - measurement_config = { - 'capabilities': { - 'measurement': { - 'use_dynamic_carbon_intensity': True - # Missing carbon_intensity_location - } - } - } - - with pytest.raises(ValueError, match="carbon_intensity_location is required"): - build_and_store_phase_stats(run_id, sci, measurement_config) - - @patch('lib.phase_stats.CarbonIntensityClient') - def test_service_error_propagation(self, mock_client_class): - # Test that service errors are properly propagated - run_id = Tests.insert_run() - Tests.import_machine_energy(run_id) + assert len(values_result) == 2 + # Both values should be the same static value (multiplied by 1000) + assert values_result[0][0] == 250500 # 250.5 * 1000 + assert values_result[1][0] == 250500 # 250.5 * 1000 + + def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_times): + # Test that dynamic grid carbon intensity is stored when enabled in measurement config + run_id = run_with_measurement_times + + + # Mock the carbon intensity API call + with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: + mock_client = Mock() + mock_client_class.return_value = mock_client + mock_client.get_carbon_intensity_history.return_value = [ + {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0}, + {"location": "DE", "time": "2024-09-22T10:30:00Z", "carbon_intensity": 190.0}, + {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 183.0} + ] + + # Call the function under test + store_dynamic_carbon_intensity(run_id, 'DE') + + # Verify that measurement_metrics entry was created for dynamic carbon intensity + metric_result = DB().fetch_one( + "SELECT metric, detail_name, unit FROM measurement_metrics WHERE run_id = %s", + (run_id,) + ) - # Add measurement start/end times to the run - DB().query( - "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", - (Tests.TEST_MEASUREMENT_START_TIME, Tests.TEST_MEASUREMENT_END_TIME, run_id) + assert metric_result is not None + assert metric_result[0] == 'grid_carbon_intensity_dynamic' + assert metric_result[1] == 'DE' + assert metric_result[2] == 'gCO2e/kWh' + + # Verify that measurement values were stored + values_result = DB().fetch_all( + """SELECT mv.value, mv.time + FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_dynamic' + ORDER BY mv.time""", + (run_id,) ) - # Mock the client to raise an exception - mock_client = Mock() - mock_client.get_carbon_intensity_history.side_effect = CarbonIntensityServiceError("Service unavailable") - mock_client_class.return_value = mock_client - - sci = {'I': 334} - measurement_config = { - 'capabilities': { - 'measurement': { - 'use_dynamic_carbon_intensity': True, - 'carbon_intensity_location': 'DE' - } - } - } + assert len(values_result) == 3 + # Values should be stored as integers (multiplied by 1000) + assert values_result[0][0] == 185000 # 185.0 * 1000 + assert values_result[1][0] == 190000 # 190.0 * 1000 + assert values_result[2][0] == 183000 # 183.0 * 1000 + + def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurement_times): + # Test error handling when dynamic method is called with None location + run_id = run_with_measurement_times - with pytest.raises(CarbonIntensityServiceError, match="Service unavailable"): - build_and_store_phase_stats(run_id, sci, measurement_config) + # Call the function with None location - should raise an exception or fail gracefully + with pytest.raises(Exception): # The method should fail when location is None + store_dynamic_carbon_intensity(run_id, None) diff --git a/tests/lib/test_phase_stats.py b/tests/lib/test_phase_stats.py index 7cbcc435b..38ceef3ea 100644 --- a/tests/lib/test_phase_stats.py +++ b/tests/lib/test_phase_stats.py @@ -130,6 +130,8 @@ def test_phase_embodied_and_operational_carbon(): Tests.import_machine_energy(run_id) sci = {"I":436,"R":0,"EL":4,"RS":1,"TE":181000,"R_d":"page request"} + Tests.import_carbon_intensity_value(run_id, sci['I']) + build_and_store_phase_stats(run_id, sci=sci) data = DB().fetch_all('SELECT metric, detail_name, unit, value, type, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, phase FROM phase_stats WHERE phase = %s ', params=('004_[RUNTIME]', ), fetch_mode='dict') @@ -288,6 +290,7 @@ def test_phase_stats_network_data(): 'N': 0.001, # Network energy intensity (kWh/GB) 'I': 500, # Carbon intensity (gCO2e/kWh) } + Tests.import_carbon_intensity_value(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) @@ -347,6 +350,7 @@ def test_sci_calculation(): 'R': 10, # Functional unit count (10 runs) 'R_d': 'test runs' # Functional unit description } + Tests.import_carbon_intensity_value(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) diff --git a/tests/test_functions.py b/tests/test_functions.py index 3ec0a33ff..e53c5e72e 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -219,6 +219,15 @@ def import_demo_data_ee(): reset_db() raise RuntimeError('Import of Demo data into DB failed', ps.stderr) + +def import_carbon_intensity_value(run_id, static_carbon_intensity_value): + DB().query( + "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", + (TEST_MEASUREMENT_START_TIME, TEST_MEASUREMENT_END_TIME, run_id) + ) + from lib.carbon_intensity import store_static_carbon_intensity # pylint: disable=import-outside-toplevel + store_static_carbon_intensity(run_id, static_carbon_intensity_value) + def assertion_info(expected, actual): return f"Expected: {expected}, Actual: {actual}" diff --git a/tools/phase_stats.py b/tools/phase_stats.py index 658f778ac..30b135adf 100644 --- a/tools/phase_stats.py +++ b/tools/phase_stats.py @@ -26,4 +26,4 @@ ''' data = DB().fetch_one(query, params=(args.run_id, ), fetch_mode='dict') - build_and_store_phase_stats(args.run_id, data['measurement_config']['sci'], data['measurement_config']) + build_and_store_phase_stats(args.run_id, data['measurement_config']['sci']) From a1d5de25b14a060ed4ac012d28cfe1120cabe546 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Tue, 23 Sep 2025 18:58:52 +0200 Subject: [PATCH 03/27] Improve storing carbon intensity data --- lib/carbon_intensity.py | 54 ++++++++++++++++++++---------- tests/lib/test_carbon_intensity.py | 30 +++++++++++++++++ 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index e0a83b4e0..8419ed763 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -160,6 +160,33 @@ def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, raise ValueError(f"Could not interpolate carbon intensity for timestamp {target_time}") +def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any]]) -> int: + """ + Calculate sampling rate in milliseconds based on time intervals in carbon intensity data. + + Args: + carbon_intensity_data: List of carbon intensity data points with 'time' field + + Returns: + Sampling rate in milliseconds, or 300000 (5 minutes) as fallback + + Example: + For data: [{"time": "2025-09-23T10:00:00Z"}, {"time": "2025-09-23T11:00:00Z"}] + Returns: 3600000 (1 hour in milliseconds) + """ + if not carbon_intensity_data or len(carbon_intensity_data) < 2: + return 300000 + + try: + time1 = datetime.fromisoformat(carbon_intensity_data[0]['time'].replace('Z', '+00:00')) + time2 = datetime.fromisoformat(carbon_intensity_data[1]['time'].replace('Z', '+00:00')) + + interval_seconds = abs((time2 - time1).total_seconds()) + return int(interval_seconds * 1000) + except (KeyError, ValueError, IndexError): + return 300000 + + def _microseconds_to_iso8601(timestamp_us: int) -> str: """ Convert microsecond timestamp to ISO 8601 format. @@ -212,20 +239,11 @@ def store_static_carbon_intensity(run_id, static_value): carbon_intensity_value = int(float(static_value) * 1000) # Store as constant time series: same value at start and end times - values_data = [ - f"{measurement_metric_id},{carbon_intensity_value},{start_time_us}", - f"{measurement_metric_id},{carbon_intensity_value},{end_time_us}" - ] - - csv_data = '\n'.join(values_data) - f = StringIO(csv_data) - DB().copy_from( - file=f, - table='measurement_values', - columns=['measurement_metric_id', 'value', 'time'], - sep=',' + DB().query( + "INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES (%s, %s, %s), (%s, %s, %s)", + (measurement_metric_id, carbon_intensity_value, start_time_us, + measurement_metric_id, carbon_intensity_value, end_time_us) ) - f.close() print(f"Stored static carbon intensity value {static_value} gCO2e/kWh as constant time series") @@ -267,8 +285,8 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): metric_name = 'grid_carbon_intensity_dynamic' detail_name = grid_carbon_intensity_location unit = 'gCO2e/kWh' - # Estimate sampling rate as 5 minutes (300000ms) based on typical grid data frequency - sampling_rate_configured = 300000 + # Calculate sampling rate based on actual data intervals + sampling_rate_configured = _calculate_sampling_rate_from_data(carbon_intensity_data) measurement_metric_id = DB().fetch_one(''' INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) @@ -287,11 +305,11 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): # Convert carbon intensity to integer (multiply by 1000 for precision) carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) - values_data.append(f"{measurement_metric_id},{carbon_intensity_value},{timestamp_us}") + values_data.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) if values_data: - # Bulk insert measurement values - csv_data = '\n'.join(values_data) + # Bulk insert measurement values using copy_from + csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) f = StringIO(csv_data) DB().copy_from( file=f, diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index b8a839af8..f4285a7c7 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -14,6 +14,7 @@ CarbonIntensityServiceError, CarbonIntensityDataError, _microseconds_to_iso8601, + _calculate_sampling_rate_from_data, interpolate_carbon_intensity, get_carbon_intensity_data_for_run, store_static_carbon_intensity, @@ -60,6 +61,35 @@ def test__microseconds_to_iso8601(self): parsed = datetime.fromisoformat(result.replace('Z', '+00:00')) assert parsed is not None + def test__calculate_sampling_rate_from_data(self): + # Test with 1 hour interval (as in the example) + carbon_data = [ + {"location": "DE", "time": "2025-09-23T10:00:00Z", "carbon_intensity": 253.0}, + {"location": "DE", "time": "2025-09-23T11:00:00Z", "carbon_intensity": 252.0} + ] + result = _calculate_sampling_rate_from_data(carbon_data) + assert result == 3600000 # 1 hour = 3600 seconds = 3600000 ms + + # Test with 30 minute interval + carbon_data_30min = [ + {"location": "DE", "time": "2025-09-23T10:00:00Z", "carbon_intensity": 253.0}, + {"location": "DE", "time": "2025-09-23T10:30:00Z", "carbon_intensity": 252.0} + ] + result = _calculate_sampling_rate_from_data(carbon_data_30min) + assert result == 1800000 # 30 minutes = 1800 seconds = 1800000 ms + + # Test with empty data (should return fallback) + result = _calculate_sampling_rate_from_data([]) + assert result == 300000 # 5 minutes fallback + + # Test with single data point (should return fallback) + result = _calculate_sampling_rate_from_data([{"location": "DE", "time": "2025-09-23T10:00:00Z", "carbon_intensity": 253.0}]) + assert result == 300000 # 5 minutes fallback + + # Test with invalid data (should return fallback) + result = _calculate_sampling_rate_from_data([{"invalid": "data"}, {"also": "invalid"}]) + assert result == 300000 # 5 minutes fallback + def test_interpolate_carbon_intensity_single_point(self): # Test with single data point carbon_data = [ From 372ec94e145fcb6bc0a0a03b57f0f59dac9a4a5b Mon Sep 17 00:00:00 2001 From: David Kopp Date: Tue, 23 Sep 2025 19:43:13 +0200 Subject: [PATCH 04/27] Enhance carbon intensity with timeseries support for phases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add get_carbon_intensity_timeseries_for_phase() to generate carbon intensity data at regular intervals throughout phases - Add store_phase_carbon_intensity_metric() to store phase-specific carbon intensity as measurement metrics using existing tables - Update phase_stats.py to generate and store carbon intensity timeseries per phase - Replace single midpoint interpolation with average of timeseries for more accurate phase carbon calculations - Rename interpolate_carbon_intensity to get_carbon_intensity_at_timestamp for clarity - Store timeseries data using pattern: grid_carbon_intensity_phase / {location}_{index:03}_{phase_name} - Improve logging messages and documentation - Add comprehensive tests for new timeseries functionality - Foundation for future energy×carbon temporal integration calculations 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- lib/carbon_intensity.py | 154 ++++++++++++++++++++++++++++- lib/phase_stats.py | 31 +++++- tests/lib/test_carbon_intensity.py | 112 ++++++++++++++++++--- 3 files changed, 275 insertions(+), 22 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 8419ed763..9c3a64071 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -107,19 +107,24 @@ def get_carbon_intensity_data_for_run(run_id): return None -def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: +def get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: """ - Interpolate carbon intensity value for a specific timestamp. + Get carbon intensity value for a specific timestamp using interpolation/extrapolation. + + This function finds the carbon intensity at a given timestamp by: + - Interpolating between two data points if timestamp falls between them + - Returning the first value if timestamp is before all data points + - Returning the last value if timestamp is after all data points Args: timestamp_us: Target timestamp in microseconds carbon_data: List of carbon intensity data points from service Returns: - Interpolated carbon intensity value in gCO2e/kWh + Carbon intensity value in gCO2e/kWh Raises: - ValueError: If carbon_data is empty or timestamp is outside range + ValueError: If carbon_data is empty """ if not carbon_data: raise ValueError("No carbon intensity data available for interpolation") @@ -160,6 +165,78 @@ def interpolate_carbon_intensity(timestamp_us: int, carbon_data: List[Dict[str, raise ValueError(f"Could not interpolate carbon intensity for timestamp {target_time}") +def get_carbon_intensity_timeseries_for_phase( + phase_start_us: int, + phase_end_us: int, + carbon_data: List[Dict[str, Any]], + target_sampling_rate_ms: int = None +) -> List[Dict[str, Any]]: + """ + Generate carbon intensity timeseries for a specific phase timeframe. + + This function generates carbon intensity values at regular intervals throughout a phase, + which are stored as measurement metrics and used for calculating representative carbon + intensity values for energy calculations. + + Args: + phase_start_us: Phase start timestamp in microseconds + phase_end_us: Phase end timestamp in microseconds + carbon_data: List of carbon intensity data points from service + target_sampling_rate_ms: Target sampling rate in milliseconds for timeseries generation. + If None, uses sampling rate from carbon data or defaults to 5 minutes. + + Returns: + List of carbon intensity timeseries points: + [{"timestamp_us": 1727003400000000, "carbon_intensity": 185.0}, ...] + + Raises: + ValueError: If carbon_data is empty or phase timeframe is invalid + """ + if not carbon_data: + raise ValueError("No carbon intensity data available for timeseries generation") + + if phase_start_us >= phase_end_us: + raise ValueError("Invalid phase timeframe: start must be before end") + + # Determine sampling rate for timeseries generation + if target_sampling_rate_ms is None: + target_sampling_rate_ms = _calculate_sampling_rate_from_data(carbon_data) + + target_sampling_rate_us = target_sampling_rate_ms * 1000 + + # Generate timestamps at regular intervals throughout the phase + timeseries = [] + current_timestamp_us = phase_start_us + + while current_timestamp_us <= phase_end_us: + try: + carbon_intensity = get_carbon_intensity_at_timestamp(current_timestamp_us, carbon_data) + timeseries.append({ + "timestamp_us": current_timestamp_us, + "carbon_intensity": carbon_intensity + }) + except ValueError: + # Skip this timestamp if carbon intensity lookup fails + # This handles edge cases like malformed data or timestamp conversion issues + # Note: Normal out-of-range timestamps are handled gracefully by get_carbon_intensity_at_timestamp + pass + + current_timestamp_us += target_sampling_rate_us + + # Always include the phase end timestamp if it wasn't already included + if timeseries and timeseries[-1]["timestamp_us"] != phase_end_us: + try: + carbon_intensity = get_carbon_intensity_at_timestamp(phase_end_us, carbon_data) + timeseries.append({ + "timestamp_us": phase_end_us, + "carbon_intensity": carbon_intensity + }) + except ValueError: + pass + + return timeseries + + def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any]]) -> int: """ Calculate sampling rate in milliseconds based on time intervals in carbon intensity data. @@ -348,7 +425,7 @@ def _get_stored_carbon_intensity_data(run_id, metric_name, detail_name): if not results: return None - # Convert stored data back to the format expected by interpolate_carbon_intensity + # Convert stored data back to the format expected by get_carbon_intensity_at_timestamp carbon_data = [] for timestamp_us, value_int in results: # Convert back from integer storage (divide by 1000 to restore decimal precision) @@ -364,3 +441,70 @@ def _get_stored_carbon_intensity_data(run_id, metric_name, detail_name): }) return carbon_data + + +def store_phase_carbon_intensity_metric( + run_id, + phase_index: int, + phase_name: str, + location: str, + carbon_timeseries: List[Dict[str, Any]] +): + """ + Store phase-specific carbon intensity timeseries as measurement_metric. + + This creates a new measurement metric specifically for carbon intensity data + within a phase, enabling future energy×carbon timeseries calculations and + frontend visualization of carbon intensity variations within phases. + + Args: + run_id: UUID of the run + phase_index: Index of the phase (e.g., 1, 2, 3...) + phase_name: Name of the phase (e.g., "[SETUP]", "[RUNTIME]") + location: Location code (e.g., "DE", "ES-IB-MA") + carbon_timeseries: List of carbon intensity data points with timestamp_us and carbon_intensity + + """ + if not carbon_timeseries: + return + + # Create phase-specific metric name and detail name + metric_name = 'grid_carbon_intensity_phase' + detail_name = f"{location}_{phase_index:03}_{phase_name}" + unit = 'gCO2e/kWh' + + # Calculate sampling rate from the timeseries data + if len(carbon_timeseries) >= 2: + interval_us = carbon_timeseries[1]['timestamp_us'] - carbon_timeseries[0]['timestamp_us'] + sampling_rate_configured = int(interval_us / 1000) # Convert to milliseconds + else: + sampling_rate_configured = 300000 # 5 minutes default + + # Create measurement_metric entry for phase-specific carbon intensity + measurement_metric_id = DB().fetch_one(''' + INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) + VALUES (%s, %s, %s, %s, %s) + RETURNING id + ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + + # Prepare measurement values for bulk insert + values_data = [] + for data_point in carbon_timeseries: + timestamp_us = data_point['timestamp_us'] + # Convert carbon intensity to integer (multiply by 1000 for precision) + carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) + values_data.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) + + if values_data: + # Bulk insert measurement values using copy_from + csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) + f = StringIO(csv_data) + DB().copy_from( + file=f, + table='measurement_values', + columns=['measurement_metric_id', 'value', 'time'], + sep=',' + ) + f.close() + + print(f"Stored {len(values_data)} carbon intensity data points for phase {phase_name} (location: {location})") diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 76c564ec8..df84569a5 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -13,8 +13,9 @@ from lib.carbon_intensity import ( CarbonIntensityServiceError, CarbonIntensityDataError, - interpolate_carbon_intensity, - get_carbon_intensity_data_for_run + get_carbon_intensity_data_for_run, + get_carbon_intensity_timeseries_for_phase, + store_phase_carbon_intensity_metric ) def reconstruct_runtime_phase(run_id, runtime_phase_idx): @@ -130,12 +131,32 @@ def build_and_store_phase_stats(run_id, sci=None): cpu_utilization_machine = None network_io_carbon_in_ug = None - # Calculate carbon intensity once per phase for reuse in energy and network calculations + # Generate carbon intensity timeseries for this phase and extract representative value phase_carbon_intensity = None if carbon_intensity_data: try: - phase_midpoint_us = (phase['start'] + phase['end']) // 2 - phase_carbon_intensity = Decimal(interpolate_carbon_intensity(phase_midpoint_us, carbon_intensity_data)) + location = carbon_intensity_data[0].get('location', 'unknown') + carbon_timeseries = get_carbon_intensity_timeseries_for_phase( + phase['start'], phase['end'], carbon_intensity_data + ) + + if carbon_timeseries: + # Store the timeseries as measurement metric for future energy×carbon calculations + try: + store_phase_carbon_intensity_metric( + run_id, idx, phase['name'], location, carbon_timeseries + ) + except Exception as e: # pylint: disable=broad-except + error_helpers.log_error(f"Failed to store carbon intensity timeseries for phase {phase['name']}: {e}", run_id=run_id) + + # INTERIM: Calculate representative carbon intensity for current energy calculations + # TODO: Replace this simple average with time-weighted energy×carbon integration + # Future enhancement: Synchronize energy and carbon timeseries for precise temporal calculation + # instead of: energy_total * carbon_average + # use: ∫(energy(t) * carbon_intensity(t))dt over phase duration + total_carbon = sum(point['carbon_intensity'] for point in carbon_timeseries) + phase_carbon_intensity = Decimal(total_carbon / len(carbon_timeseries)) + except (CarbonIntensityServiceError, CarbonIntensityDataError, ValueError) as e: error_helpers.log_error(f"Failed to calculate carbon intensity for phase {phase['name']}: {e}", run_id=run_id) phase_carbon_intensity = None diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index f4285a7c7..6325bf112 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -4,6 +4,7 @@ import requests from unittest.mock import Mock, patch from datetime import datetime +from datetime import timezone GMT_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))+'/../../' @@ -15,10 +16,12 @@ CarbonIntensityDataError, _microseconds_to_iso8601, _calculate_sampling_rate_from_data, - interpolate_carbon_intensity, + get_carbon_intensity_at_timestamp, get_carbon_intensity_data_for_run, store_static_carbon_intensity, - store_dynamic_carbon_intensity + store_dynamic_carbon_intensity, + get_carbon_intensity_timeseries_for_phase, + store_phase_carbon_intensity_metric ) class TestCarbonIntensityClient: @@ -90,16 +93,16 @@ def test__calculate_sampling_rate_from_data(self): result = _calculate_sampling_rate_from_data([{"invalid": "data"}, {"also": "invalid"}]) assert result == 300000 # 5 minutes fallback - def test_interpolate_carbon_intensity_single_point(self): + def test_get_carbon_intensity_at_timestamp_single_point(self): # Test with single data point carbon_data = [ {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} ] timestamp_us = 1727003400000000 # 2024-09-22T10:50:00Z - result = interpolate_carbon_intensity(timestamp_us, carbon_data) + result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 185.0 - def test_interpolate_carbon_intensity_between_points(self): + def test_get_carbon_intensity_at_timestamp_between_points(self): # Test interpolation between two points carbon_data = [ {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 180.0}, @@ -109,31 +112,31 @@ def test_interpolate_carbon_intensity_between_points(self): mid_time = datetime(2024, 9, 22, 10, 30, 0) # UTC time timestamp_us = int(calendar.timegm(mid_time.timetuple()) * 1_000_000) - result = interpolate_carbon_intensity(timestamp_us, carbon_data) + result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 190.0 # Linear interpolation: 180 + (200-180) * 0.5 - def test_interpolate_carbon_intensity_before_range(self): + def test_get_carbon_intensity_at_timestamp_before_range(self): # Test with timestamp before data range carbon_data = [ {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 185.0} ] timestamp_us = 1727001600000000 # 2024-09-22T10:20:00Z (before 11:00) - result = interpolate_carbon_intensity(timestamp_us, carbon_data) + result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 185.0 # Should return first value - def test_interpolate_carbon_intensity_after_range(self): + def test_get_carbon_intensity_at_timestamp_after_range(self): # Test with timestamp after data range carbon_data = [ {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} ] timestamp_us = 1727007000000000 # 2024-09-22T11:50:00Z (after 10:00) - result = interpolate_carbon_intensity(timestamp_us, carbon_data) + result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 185.0 # Should return last value - def test_interpolate_carbon_intensity_empty_data(self): + def test_get_carbon_intensity_at_timestamp_empty_data(self): # Test with empty data with pytest.raises(ValueError, match="No carbon intensity data available"): - interpolate_carbon_intensity(1727003400000000, []) + get_carbon_intensity_at_timestamp(1727003400000000, []) @patch('lib.carbon_intensity.requests.get') def test_carbon_intensity_client_success(self, mock_get): @@ -318,3 +321,88 @@ def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurem # Call the function with None location - should raise an exception or fail gracefully with pytest.raises(Exception): # The method should fail when location is None store_dynamic_carbon_intensity(run_id, None) + + +class TestCarbonIntensityTimeseries: + + def test_get_carbon_intensity_timeseries_for_phase(self): + """Test generating carbon intensity timeseries for a phase""" + # Sample carbon intensity data + carbon_data = [ + {"time": "2025-09-22T10:00:00Z", "carbon_intensity": 185.0, "location": "DE"}, + {"time": "2025-09-22T11:00:00Z", "carbon_intensity": 190.0, "location": "DE"}, + {"time": "2025-09-22T12:00:00Z", "carbon_intensity": 183.0, "location": "DE"} + ] + + # Phase timeframe: 10:30 to 11:30 (90 minutes) in UTC + phase_start_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) + phase_end_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) + + # Generate timeseries with 30-minute intervals + result = get_carbon_intensity_timeseries_for_phase( + phase_start_us, phase_end_us, carbon_data, target_sampling_rate_ms=30*60*1000 + ) + + # Should generate points at 10:30, 11:00, 11:30 + assert len(result) == 3 + assert result[0]["timestamp_us"] == phase_start_us + assert result[-1]["timestamp_us"] == phase_end_us + + # Values should be interpolated appropriately + assert 185.0 <= result[0]["carbon_intensity"] <= 190.0 # Interpolated between 185 and 190 + assert result[1]["carbon_intensity"] == 190.0 # Exact match at 11:00 + assert 183.0 <= result[2]["carbon_intensity"] <= 190.0 # Interpolated between 190 and 183 + + def test_get_carbon_intensity_timeseries_empty_data(self): + """Test error handling with empty carbon data""" + phase_start_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) + phase_end_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) + + with pytest.raises(ValueError, match="No carbon intensity data available"): + get_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, []) + + def test_get_carbon_intensity_timeseries_invalid_timeframe(self): + """Test error handling with invalid phase timeframe""" + carbon_data = [{"time": "2025-09-22T10:00:00Z", "carbon_intensity": 185.0, "location": "DE"}] + + phase_start_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) + phase_end_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) # End before start + + with pytest.raises(ValueError, match="Invalid phase timeframe"): + get_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, carbon_data) + + def test_store_phase_carbon_intensity_metric(self): + """Test storing phase-specific carbon intensity metric in database""" + run_id = Tests.insert_run() + + # Create test carbon timeseries data + carbon_timeseries = [ + {'timestamp_us': int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), 'carbon_intensity': 185.0}, + {'timestamp_us': int(datetime(2025, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), 'carbon_intensity': 190.0}, + {'timestamp_us': int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), 'carbon_intensity': 183.0} + ] + + # Store the phase carbon intensity metric + store_phase_carbon_intensity_metric(run_id, 1, '[SETUP]', 'DE', carbon_timeseries) + + # Verify the measurement_metric was created + metrics = DB().fetch_all( + 'SELECT metric, detail_name, unit FROM measurement_metrics WHERE run_id = %s', + params=(run_id,) + ) + assert len(metrics) == 1 + assert metrics[0] == ('grid_carbon_intensity_phase', 'DE_001_[SETUP]', 'gCO2e/kWh') + + # Verify the measurement_values were stored + values = DB().fetch_all( + '''SELECT mv.value, mv.time FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s ORDER BY mv.time''', + params=(run_id,) + ) + assert len(values) == 3 + + # Verify the values are stored correctly (as integers * 1000) + expected_values = [185000, 190000, 183000] # multiplied by 1000 for precision + actual_values = [v[0] for v in values] + assert actual_values == expected_values From 939edfc1efce6f3df2d7f53da37d0fa6db5e4553 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Wed, 24 Sep 2025 07:00:06 +0200 Subject: [PATCH 05/27] Remove unnecessary storing of carbon phase stats --- lib/carbon_intensity.py | 67 ------------------------------ lib/phase_stats.py | 20 +++------ tests/lib/test_carbon_intensity.py | 37 ----------------- 3 files changed, 5 insertions(+), 119 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 9c3a64071..5b5143771 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -441,70 +441,3 @@ def _get_stored_carbon_intensity_data(run_id, metric_name, detail_name): }) return carbon_data - - -def store_phase_carbon_intensity_metric( - run_id, - phase_index: int, - phase_name: str, - location: str, - carbon_timeseries: List[Dict[str, Any]] -): - """ - Store phase-specific carbon intensity timeseries as measurement_metric. - - This creates a new measurement metric specifically for carbon intensity data - within a phase, enabling future energy×carbon timeseries calculations and - frontend visualization of carbon intensity variations within phases. - - Args: - run_id: UUID of the run - phase_index: Index of the phase (e.g., 1, 2, 3...) - phase_name: Name of the phase (e.g., "[SETUP]", "[RUNTIME]") - location: Location code (e.g., "DE", "ES-IB-MA") - carbon_timeseries: List of carbon intensity data points with timestamp_us and carbon_intensity - - """ - if not carbon_timeseries: - return - - # Create phase-specific metric name and detail name - metric_name = 'grid_carbon_intensity_phase' - detail_name = f"{location}_{phase_index:03}_{phase_name}" - unit = 'gCO2e/kWh' - - # Calculate sampling rate from the timeseries data - if len(carbon_timeseries) >= 2: - interval_us = carbon_timeseries[1]['timestamp_us'] - carbon_timeseries[0]['timestamp_us'] - sampling_rate_configured = int(interval_us / 1000) # Convert to milliseconds - else: - sampling_rate_configured = 300000 # 5 minutes default - - # Create measurement_metric entry for phase-specific carbon intensity - measurement_metric_id = DB().fetch_one(''' - INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) - VALUES (%s, %s, %s, %s, %s) - RETURNING id - ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] - - # Prepare measurement values for bulk insert - values_data = [] - for data_point in carbon_timeseries: - timestamp_us = data_point['timestamp_us'] - # Convert carbon intensity to integer (multiply by 1000 for precision) - carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) - values_data.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) - - if values_data: - # Bulk insert measurement values using copy_from - csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) - f = StringIO(csv_data) - DB().copy_from( - file=f, - table='measurement_values', - columns=['measurement_metric_id', 'value', 'time'], - sep=',' - ) - f.close() - - print(f"Stored {len(values_data)} carbon intensity data points for phase {phase_name} (location: {location})") diff --git a/lib/phase_stats.py b/lib/phase_stats.py index df84569a5..44f9c0a64 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -15,7 +15,6 @@ CarbonIntensityDataError, get_carbon_intensity_data_for_run, get_carbon_intensity_timeseries_for_phase, - store_phase_carbon_intensity_metric ) def reconstruct_runtime_phase(run_id, runtime_phase_idx): @@ -135,25 +134,16 @@ def build_and_store_phase_stats(run_id, sci=None): phase_carbon_intensity = None if carbon_intensity_data: try: - location = carbon_intensity_data[0].get('location', 'unknown') carbon_timeseries = get_carbon_intensity_timeseries_for_phase( phase['start'], phase['end'], carbon_intensity_data ) + # INTERIM: Calculate representative carbon intensity for current energy calculations + # TODO: Replace this simple average with time-weighted energy×carbon integration # pylint: disable=fixme + # Future enhancement: Synchronize energy and carbon timeseries for precise temporal calculation + # instead of: energy_total * carbon_average + # use: ∫(energy(t) * carbon_intensity(t))dt over phase duration if carbon_timeseries: - # Store the timeseries as measurement metric for future energy×carbon calculations - try: - store_phase_carbon_intensity_metric( - run_id, idx, phase['name'], location, carbon_timeseries - ) - except Exception as e: # pylint: disable=broad-except - error_helpers.log_error(f"Failed to store carbon intensity timeseries for phase {phase['name']}: {e}", run_id=run_id) - - # INTERIM: Calculate representative carbon intensity for current energy calculations - # TODO: Replace this simple average with time-weighted energy×carbon integration - # Future enhancement: Synchronize energy and carbon timeseries for precise temporal calculation - # instead of: energy_total * carbon_average - # use: ∫(energy(t) * carbon_intensity(t))dt over phase duration total_carbon = sum(point['carbon_intensity'] for point in carbon_timeseries) phase_carbon_intensity = Decimal(total_carbon / len(carbon_timeseries)) diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 6325bf112..504125f89 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -21,7 +21,6 @@ store_static_carbon_intensity, store_dynamic_carbon_intensity, get_carbon_intensity_timeseries_for_phase, - store_phase_carbon_intensity_metric ) class TestCarbonIntensityClient: @@ -370,39 +369,3 @@ def test_get_carbon_intensity_timeseries_invalid_timeframe(self): with pytest.raises(ValueError, match="Invalid phase timeframe"): get_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, carbon_data) - - def test_store_phase_carbon_intensity_metric(self): - """Test storing phase-specific carbon intensity metric in database""" - run_id = Tests.insert_run() - - # Create test carbon timeseries data - carbon_timeseries = [ - {'timestamp_us': int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), 'carbon_intensity': 185.0}, - {'timestamp_us': int(datetime(2025, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), 'carbon_intensity': 190.0}, - {'timestamp_us': int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), 'carbon_intensity': 183.0} - ] - - # Store the phase carbon intensity metric - store_phase_carbon_intensity_metric(run_id, 1, '[SETUP]', 'DE', carbon_timeseries) - - # Verify the measurement_metric was created - metrics = DB().fetch_all( - 'SELECT metric, detail_name, unit FROM measurement_metrics WHERE run_id = %s', - params=(run_id,) - ) - assert len(metrics) == 1 - assert metrics[0] == ('grid_carbon_intensity_phase', 'DE_001_[SETUP]', 'gCO2e/kWh') - - # Verify the measurement_values were stored - values = DB().fetch_all( - '''SELECT mv.value, mv.time FROM measurement_values mv - JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s ORDER BY mv.time''', - params=(run_id,) - ) - assert len(values) == 3 - - # Verify the values are stored correctly (as integers * 1000) - expected_values = [185000, 190000, 183000] # multiplied by 1000 for precision - actual_values = [v[0] for v in values] - assert actual_values == expected_values From b77520e2c7bd09b096cfe42f18fd16c210eaef3d Mon Sep 17 00:00:00 2001 From: David Kopp Date: Wed, 24 Sep 2025 07:12:27 +0200 Subject: [PATCH 06/27] Cleanup --- lib/carbon_intensity.py | 387 +++++++++++++---------------- lib/phase_stats.py | 9 +- lib/scenario_runner.py | 6 +- tests/lib/test_carbon_intensity.py | 66 ++--- 4 files changed, 221 insertions(+), 247 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 5b5143771..671598fa8 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -79,7 +79,6 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: raise CarbonIntensityDataError(f"Invalid response from carbon intensity service: {e}") from e - def get_carbon_intensity_data_for_run(run_id): """ Get carbon intensity data for a run, automatically detecting dynamic vs static. @@ -88,7 +87,9 @@ def get_carbon_intensity_data_for_run(run_id): run_id: UUID of the run Returns: - List of carbon intensity data points or None if no data found + Tuple of (carbon_data, sampling_rate_ms) where: + - carbon_data: List of carbon intensity data points or None if no data found + - sampling_rate_ms: Sampling rate in milliseconds """ # Auto-detect what carbon intensity data is available for this run # Check for both static and dynamic carbon intensity @@ -98,185 +99,41 @@ def get_carbon_intensity_data_for_run(run_id): WHERE run_id = %s AND metric IN ('grid_carbon_intensity_static', 'grid_carbon_intensity_dynamic') LIMIT 1 """ - result = DB().fetch_one(query, (run_id,)) - - if result: - metric, detail_name = result - return _get_stored_carbon_intensity_data(run_id, metric, detail_name) - - return None - - -def get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: - """ - Get carbon intensity value for a specific timestamp using interpolation/extrapolation. - - This function finds the carbon intensity at a given timestamp by: - - Interpolating between two data points if timestamp falls between them - - Returning the first value if timestamp is before all data points - - Returning the last value if timestamp is after all data points + grid_carbon_intensity_metrics = DB().fetch_one(query, (run_id,)) - Args: - timestamp_us: Target timestamp in microseconds - carbon_data: List of carbon intensity data points from service + if not grid_carbon_intensity_metrics: + return None, None - Returns: - Carbon intensity value in gCO2e/kWh + metric_name, detail_name = grid_carbon_intensity_metrics - Raises: - ValueError: If carbon_data is empty - """ - if not carbon_data: - raise ValueError("No carbon intensity data available for interpolation") - - target_time = datetime.fromtimestamp(timestamp_us / 1_000_000, timezone.utc).replace(tzinfo=None) - - # Convert carbon data times to datetime objects for comparison - data_points = [] - for item in carbon_data: - item_time = datetime.fromisoformat(item['time'].replace('Z', '+00:00')).replace(tzinfo=None) - data_points.append((item_time, float(item['carbon_intensity']))) - - # Sort by time - data_points.sort(key=lambda x: x[0]) - - # Check if target is before first or after last data point - if target_time <= data_points[0][0]: - return data_points[0][1] - if target_time >= data_points[-1][0]: - return data_points[-1][1] - - # Find surrounding data points for interpolation - for i in range(len(data_points) - 1): - time1, value1 = data_points[i] - time2, value2 = data_points[i + 1] - - if time1 <= target_time <= time2: - # Linear interpolation - time_diff = (time2 - time1).total_seconds() - if time_diff == 0: - return value1 - - target_diff = (target_time - time1).total_seconds() - ratio = target_diff / time_diff - - return value1 + (value2 - value1) * ratio - - raise ValueError(f"Could not interpolate carbon intensity for timestamp {target_time}") - - -def get_carbon_intensity_timeseries_for_phase( - phase_start_us: int, - phase_end_us: int, - carbon_data: List[Dict[str, Any]], - target_sampling_rate_ms: int = None -) -> List[Dict[str, Any]]: - """ - Generate carbon intensity timeseries for a specific phase timeframe. - - This function generates carbon intensity values at regular intervals throughout a phase, - which are stored as measurement metrics and used for calculating representative carbon - intensity values for energy calculations. - - Args: - phase_start_us: Phase start timestamp in microseconds - phase_end_us: Phase end timestamp in microseconds - carbon_data: List of carbon intensity data points from service - target_sampling_rate_ms: Target sampling rate in milliseconds for timeseries generation. - If None, uses sampling rate from carbon data or defaults to 5 minutes. - - Returns: - List of carbon intensity timeseries points: - [{"timestamp_us": 1727003400000000, "carbon_intensity": 185.0}, ...] - - Raises: - ValueError: If carbon_data is empty or phase timeframe is invalid - """ - if not carbon_data: - raise ValueError("No carbon intensity data available for timeseries generation") - - if phase_start_us >= phase_end_us: - raise ValueError("Invalid phase timeframe: start must be before end") - - # Determine sampling rate for timeseries generation - if target_sampling_rate_ms is None: - target_sampling_rate_ms = _calculate_sampling_rate_from_data(carbon_data) - - target_sampling_rate_us = target_sampling_rate_ms * 1000 - - # Generate timestamps at regular intervals throughout the phase - timeseries = [] - current_timestamp_us = phase_start_us - - while current_timestamp_us <= phase_end_us: - try: - carbon_intensity = get_carbon_intensity_at_timestamp(current_timestamp_us, carbon_data) - timeseries.append({ - "timestamp_us": current_timestamp_us, - "carbon_intensity": carbon_intensity - }) - except ValueError: - # Skip this timestamp if carbon intensity lookup fails - # This handles edge cases like malformed data or timestamp conversion issues - # Note: Normal out-of-range timestamps are handled gracefully by get_carbon_intensity_at_timestamp - pass - - current_timestamp_us += target_sampling_rate_us - - # Always include the phase end timestamp if it wasn't already included - if timeseries and timeseries[-1]["timestamp_us"] != phase_end_us: - try: - carbon_intensity = get_carbon_intensity_at_timestamp(phase_end_us, carbon_data) - timeseries.append({ - "timestamp_us": phase_end_us, - "carbon_intensity": carbon_intensity - }) - except ValueError: - pass - - return timeseries - - -def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any]]) -> int: - """ - Calculate sampling rate in milliseconds based on time intervals in carbon intensity data. - - Args: - carbon_intensity_data: List of carbon intensity data points with 'time' field - - Returns: - Sampling rate in milliseconds, or 300000 (5 minutes) as fallback - - Example: - For data: [{"time": "2025-09-23T10:00:00Z"}, {"time": "2025-09-23T11:00:00Z"}] - Returns: 3600000 (1 hour in milliseconds) + query = """ + SELECT mv.time, mv.value, mm.sampling_rate_configured + FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s + AND mm.metric = %s + AND mm.detail_name = %s + ORDER BY mv.time ASC """ - if not carbon_intensity_data or len(carbon_intensity_data) < 2: - return 300000 + carbon_intensity_values = DB().fetch_all(query, (run_id, metric_name, detail_name)) - try: - time1 = datetime.fromisoformat(carbon_intensity_data[0]['time'].replace('Z', '+00:00')) - time2 = datetime.fromisoformat(carbon_intensity_data[1]['time'].replace('Z', '+00:00')) + if not carbon_intensity_values: + return None, None - interval_seconds = abs((time2 - time1).total_seconds()) - return int(interval_seconds * 1000) - except (KeyError, ValueError, IndexError): - return 300000 + # Extract sampling rate from first row (all rows have the same sampling_rate_configured) + sampling_rate_ms = carbon_intensity_values[0][2] if carbon_intensity_values else 300000 + # Convert from database format to carbon data format (keep timestamps as microseconds) + carbon_data = [ + { + 'timestamp_us': timestamp_us, + 'carbon_intensity': float(value_int) / 1000.0, + 'location': detail_name + } + for timestamp_us, value_int, _ in carbon_intensity_values # Unpack the third element (sampling_rate_configured) + ] -def _microseconds_to_iso8601(timestamp_us: int) -> str: - """ - Convert microsecond timestamp to ISO 8601 format. - - Args: - timestamp_us: Timestamp in microseconds since epoch - - Returns: - ISO 8601 formatted timestamp string (e.g., "2025-09-22T10:50:00Z") - """ - timestamp_seconds = timestamp_us / 1_000_000 - dt = datetime.fromtimestamp(timestamp_seconds, timezone.utc) - return dt.strftime('%Y-%m-%dT%H:%M:%SZ') + return carbon_data, sampling_rate_ms def store_static_carbon_intensity(run_id, static_value): @@ -362,7 +219,7 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): metric_name = 'grid_carbon_intensity_dynamic' detail_name = grid_carbon_intensity_location unit = 'gCO2e/kWh' - # Calculate sampling rate based on actual data intervals + # Calculate sampling rate based on actual data intervals from API format sampling_rate_configured = _calculate_sampling_rate_from_data(carbon_intensity_data) measurement_metric_id = DB().fetch_one(''' @@ -399,45 +256,159 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): print(f"Stored {len(values_data)} dynamic carbon intensity data points for location {grid_carbon_intensity_location}") -def _get_stored_carbon_intensity_data(run_id, metric_name, detail_name): +def generate_carbon_intensity_timeseries_for_phase( + phase_start_us: int, + phase_end_us: int, + carbon_data: List[Dict[str, Any]], + sampling_rate_ms: int = 300000 +) -> List[Dict[str, Any]]: """ - Retrieve stored carbon intensity data from measurement_metrics for a run. + Generate carbon intensity timeseries for a specific phase timeframe. + + This function generates carbon intensity values at regular intervals throughout a phase, + which are stored as measurement metrics and used for calculating representative carbon + intensity values for energy calculations. Args: - run_id: UUID of the run - metric_name: Either 'grid_carbon_intensity_static' or 'grid_carbon_intensity_dynamic' - detail_name: '[CONFIG]' for static, location code for dynamic (e.g., "DE", "ES-IB-MA") + phase_start_us: Phase start timestamp in microseconds + phase_end_us: Phase end timestamp in microseconds + carbon_data: List of carbon intensity data points from service + sampling_rate_ms: Sampling rate in milliseconds for timeseries generation (default: 300000 = 5 minutes) Returns: - List of carbon intensity data points or None if no data found + List of carbon intensity timeseries points: + [{"timestamp_us": 1727003400000000, "carbon_intensity": 185.0}, ...] + + Raises: + ValueError: If carbon_data is empty or phase timeframe is invalid """ - query = """ - SELECT mv.time, mv.value - FROM measurement_values mv - JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s - AND mm.metric = %s - AND mm.detail_name = %s - ORDER BY mv.time ASC + if not carbon_data: + raise ValueError("No carbon intensity data available for timeseries generation") + + if phase_start_us >= phase_end_us: + raise ValueError("Invalid phase timeframe: start must be before end") + + # Convert sampling rate to microseconds + sampling_rate_us = sampling_rate_ms * 1000 + + # Generate timestamps at regular intervals throughout the phase + timeseries = [] + current_timestamp_us = phase_start_us + + while current_timestamp_us <= phase_end_us: + try: + carbon_intensity = _get_carbon_intensity_at_timestamp(current_timestamp_us, carbon_data) + timeseries.append({ + "timestamp_us": current_timestamp_us, + "carbon_intensity": carbon_intensity + }) + except ValueError: + # Skip this timestamp if carbon intensity lookup fails + # This handles edge cases like malformed data or timestamp conversion issues + # Note: Normal out-of-range timestamps are handled gracefully by _get_carbon_intensity_at_timestamp + pass + + current_timestamp_us += sampling_rate_us + + # Always include the phase end timestamp if it wasn't already included + if timeseries and timeseries[-1]["timestamp_us"] != phase_end_us: + try: + carbon_intensity = _get_carbon_intensity_at_timestamp(phase_end_us, carbon_data) + timeseries.append({ + "timestamp_us": phase_end_us, + "carbon_intensity": carbon_intensity + }) + except ValueError: + pass + + return timeseries + + + +def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: """ - results = DB().fetch_all(query, (run_id, metric_name, detail_name)) - - if not results: - return None - - # Convert stored data back to the format expected by get_carbon_intensity_at_timestamp - carbon_data = [] - for timestamp_us, value_int in results: - # Convert back from integer storage (divide by 1000 to restore decimal precision) - carbon_intensity = float(value_int) / 1000.0 - # Convert timestamp to ISO format for consistency - dt = datetime.fromtimestamp(timestamp_us / 1_000_000, timezone.utc) - iso_time = dt.strftime('%Y-%m-%dT%H:%M:%SZ') - - carbon_data.append({ - 'time': iso_time, - 'carbon_intensity': carbon_intensity, - 'location': detail_name - }) + Get carbon intensity value for a specific timestamp using interpolation/extrapolation. - return carbon_data + This function finds the carbon intensity at a given timestamp by: + - Interpolating between two data points if timestamp falls between them + - Returning the first value if timestamp is before all data points + - Returning the last value if timestamp is after all data points + + Args: + timestamp_us: Target timestamp in microseconds + carbon_data: List of carbon intensity data points with 'timestamp_us' and 'carbon_intensity' fields + + Returns: + Carbon intensity value in gCO2e/kWh + + Raises: + ValueError: If carbon_data is empty + """ + if not carbon_data: + raise ValueError("No carbon intensity data available for interpolation") + + # Extract and sort data points by timestamp + data_points = [(item['timestamp_us'], float(item['carbon_intensity'])) for item in carbon_data] + data_points.sort(key=lambda x: x[0]) + + # Check if target is before first or after last data point + if timestamp_us <= data_points[0][0]: + return data_points[0][1] + if timestamp_us >= data_points[-1][0]: + return data_points[-1][1] + + # Find surrounding data points for interpolation + for i in range(len(data_points) - 1): + time1_us, value1 = data_points[i] + time2_us, value2 = data_points[i + 1] + + if time1_us <= timestamp_us <= time2_us: + # Linear interpolation + if time1_us == time2_us: + return value1 + + ratio = (timestamp_us - time1_us) / (time2_us - time1_us) + return value1 + (value2 - value1) * ratio + + raise ValueError(f"Could not interpolate carbon intensity for timestamp {timestamp_us}") + + +def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any]]) -> int: + """ + Calculate sampling rate in milliseconds based on time intervals in carbon intensity data. + + Args: + carbon_intensity_data: List of carbon intensity data points with 'time' field (API format) + + Returns: + Sampling rate in milliseconds, or 300000 (5 minutes) as fallback + + Example: + For data with 1 hour intervals: Returns 3600000 (1 hour in milliseconds) + """ + if not carbon_intensity_data or len(carbon_intensity_data) < 2: + return 300000 + + try: + time1 = datetime.fromisoformat(carbon_intensity_data[0]['time'].replace('Z', '+00:00')) + time2 = datetime.fromisoformat(carbon_intensity_data[1]['time'].replace('Z', '+00:00')) + interval_seconds = abs((time2 - time1).total_seconds()) + sampling_rate_configured = int(interval_seconds * 1000) + return sampling_rate_configured + except (KeyError, ValueError, IndexError): + return 300000 + + +def _microseconds_to_iso8601(timestamp_us: int) -> str: + """ + Convert microsecond timestamp to ISO 8601 format. + + Args: + timestamp_us: Timestamp in microseconds since epoch + + Returns: + ISO 8601 formatted timestamp string (e.g., "2025-09-22T10:50:00Z") + """ + timestamp_seconds = timestamp_us / 1_000_000 + dt = datetime.fromtimestamp(timestamp_seconds, timezone.utc) + return dt.strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 44f9c0a64..55c81d89e 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -14,7 +14,7 @@ CarbonIntensityServiceError, CarbonIntensityDataError, get_carbon_intensity_data_for_run, - get_carbon_intensity_timeseries_for_phase, + generate_carbon_intensity_timeseries_for_phase, ) def reconstruct_runtime_phase(run_id, runtime_phase_idx): @@ -84,7 +84,7 @@ def build_and_store_phase_stats(run_id, sci=None): # Load carbon intensity time series data once to use as lookup table during energy metric processing. # This data is used for interpolation at specific timestamps rather than being aggregated like regular metrics. # Auto-detects whether dynamic (API) or static (config) carbon intensity data is available. - carbon_intensity_data = get_carbon_intensity_data_for_run(run_id) + carbon_intensity_data, carbon_sampling_rate_ms = get_carbon_intensity_data_for_run(run_id) query = """ SELECT id, metric, unit, detail_name, sampling_rate_configured @@ -134,8 +134,9 @@ def build_and_store_phase_stats(run_id, sci=None): phase_carbon_intensity = None if carbon_intensity_data: try: - carbon_timeseries = get_carbon_intensity_timeseries_for_phase( - phase['start'], phase['end'], carbon_intensity_data + carbon_timeseries = generate_carbon_intensity_timeseries_for_phase( + phase['start'], phase['end'], carbon_intensity_data, + sampling_rate_ms=carbon_sampling_rate_ms ) # INTERIM: Calculate representative carbon intensity for current energy calculations diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index 6747325a7..92e9c2312 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -163,7 +163,7 @@ def __init__(self, ('_save_notes_runner', {}), ('_save_run_logs', {}), ('_save_warnings', {}), - ('_save_grid_carbon_intensity_metrics', {}), + ('_process_grid_carbon_intensity', {}), ('_process_phase_stats', {}), ) @@ -2072,11 +2072,11 @@ def _patch_phases(self): if self.__phases.get('[RUNTIME]', None) is not None and self.__phases['[RUNTIME]'].get('end', None) is None: self.__phases['[RUNTIME]']['end'] = int(time.time_ns() / 1_000) - def _save_grid_carbon_intensity_metrics(self): + def _process_grid_carbon_intensity(self): if not self._run_id or self._dev_no_save: return - print(TerminalColors.HEADER, '\nStore grid carbon intensity metrics', TerminalColors.ENDC) + print(TerminalColors.HEADER, '\nProcess grid carbon intensity values', TerminalColors.ENDC) # pylint: disable=import-outside-toplevel from lib.carbon_intensity import store_static_carbon_intensity, store_dynamic_carbon_intensity diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 504125f89..f3e33f472 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -16,11 +16,11 @@ CarbonIntensityDataError, _microseconds_to_iso8601, _calculate_sampling_rate_from_data, - get_carbon_intensity_at_timestamp, + _get_carbon_intensity_at_timestamp, get_carbon_intensity_data_for_run, store_static_carbon_intensity, store_dynamic_carbon_intensity, - get_carbon_intensity_timeseries_for_phase, + generate_carbon_intensity_timeseries_for_phase, ) class TestCarbonIntensityClient: @@ -64,7 +64,7 @@ def test__microseconds_to_iso8601(self): assert parsed is not None def test__calculate_sampling_rate_from_data(self): - # Test with 1 hour interval (as in the example) + # Test with 1 hour interval using API format with 'time' field carbon_data = [ {"location": "DE", "time": "2025-09-23T10:00:00Z", "carbon_intensity": 253.0}, {"location": "DE", "time": "2025-09-23T11:00:00Z", "carbon_intensity": 252.0} @@ -92,50 +92,50 @@ def test__calculate_sampling_rate_from_data(self): result = _calculate_sampling_rate_from_data([{"invalid": "data"}, {"also": "invalid"}]) assert result == 300000 # 5 minutes fallback - def test_get_carbon_intensity_at_timestamp_single_point(self): + def test__get_carbon_intensity_at_timestamp_single_point(self): # Test with single data point carbon_data = [ - {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} + {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 10, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 185.0, "sampling_rate_ms": 300000} ] timestamp_us = 1727003400000000 # 2024-09-22T10:50:00Z - result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) + result = _get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 185.0 - def test_get_carbon_intensity_at_timestamp_between_points(self): + def test__get_carbon_intensity_at_timestamp_between_points(self): # Test interpolation between two points carbon_data = [ - {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 180.0}, - {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 200.0} + {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 10, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 180.0}, + {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 200.0} ] # Calculate correct timestamp for 10:30:00 UTC mid_time = datetime(2024, 9, 22, 10, 30, 0) # UTC time timestamp_us = int(calendar.timegm(mid_time.timetuple()) * 1_000_000) - result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) + result = _get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 190.0 # Linear interpolation: 180 + (200-180) * 0.5 - def test_get_carbon_intensity_at_timestamp_before_range(self): + def test__get_carbon_intensity_at_timestamp_before_range(self): # Test with timestamp before data range carbon_data = [ - {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 185.0} + {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 185.0} ] timestamp_us = 1727001600000000 # 2024-09-22T10:20:00Z (before 11:00) - result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) + result = _get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 185.0 # Should return first value - def test_get_carbon_intensity_at_timestamp_after_range(self): + def test__get_carbon_intensity_at_timestamp_after_range(self): # Test with timestamp after data range carbon_data = [ - {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0} + {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 10, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 185.0} ] timestamp_us = 1727007000000000 # 2024-09-22T11:50:00Z (after 10:00) - result = get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) + result = _get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) assert result == 185.0 # Should return last value - def test_get_carbon_intensity_at_timestamp_empty_data(self): + def test__get_carbon_intensity_at_timestamp_empty_data(self): # Test with empty data with pytest.raises(ValueError, match="No carbon intensity data available"): - get_carbon_intensity_at_timestamp(1727003400000000, []) + _get_carbon_intensity_at_timestamp(1727003400000000, []) @patch('lib.carbon_intensity.requests.get') def test_carbon_intensity_client_success(self, mock_get): @@ -193,8 +193,9 @@ class TestGetCarbonIntensityDataForRun: def test_no_carbon_intensity_data(self): # Test with run that has no carbon intensity data run_id = Tests.insert_run() - result = get_carbon_intensity_data_for_run(run_id) - assert result is None + carbon_data, sampling_rate_ms = get_carbon_intensity_data_for_run(run_id) + assert carbon_data is None + assert sampling_rate_ms is None def test_with_dynamic_carbon_intensity_data(self): # Test with run that has dynamic carbon intensity data stored @@ -213,11 +214,12 @@ def test_with_dynamic_carbon_intensity_data(self): (metric_id, 185, 1727003400000000) ) - result = get_carbon_intensity_data_for_run(run_id) + carbon_data, sampling_rate_ms = get_carbon_intensity_data_for_run(run_id) # Should return the stored carbon intensity data - assert result is not None - assert len(result) > 0 + assert carbon_data is not None + assert len(carbon_data) > 0 + assert sampling_rate_ms == 1000 # Should match the sampling rate we inserted class TestStoreCarbonIntensityAsMetrics: @@ -324,13 +326,13 @@ def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurem class TestCarbonIntensityTimeseries: - def test_get_carbon_intensity_timeseries_for_phase(self): + def test_generate_carbon_intensity_timeseries_for_phase(self): """Test generating carbon intensity timeseries for a phase""" - # Sample carbon intensity data + # Sample carbon intensity data with microsecond timestamps carbon_data = [ - {"time": "2025-09-22T10:00:00Z", "carbon_intensity": 185.0, "location": "DE"}, - {"time": "2025-09-22T11:00:00Z", "carbon_intensity": 190.0, "location": "DE"}, - {"time": "2025-09-22T12:00:00Z", "carbon_intensity": 183.0, "location": "DE"} + {"timestamp_us": int(datetime(2025, 9, 22, 10, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 185.0, "location": "DE"}, + {"timestamp_us": int(datetime(2025, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 190.0, "location": "DE"}, + {"timestamp_us": int(datetime(2025, 9, 22, 12, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 183.0, "location": "DE"} ] # Phase timeframe: 10:30 to 11:30 (90 minutes) in UTC @@ -338,8 +340,8 @@ def test_get_carbon_intensity_timeseries_for_phase(self): phase_end_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) # Generate timeseries with 30-minute intervals - result = get_carbon_intensity_timeseries_for_phase( - phase_start_us, phase_end_us, carbon_data, target_sampling_rate_ms=30*60*1000 + result = generate_carbon_intensity_timeseries_for_phase( + phase_start_us, phase_end_us, carbon_data, sampling_rate_ms=30*60*1000 ) # Should generate points at 10:30, 11:00, 11:30 @@ -358,7 +360,7 @@ def test_get_carbon_intensity_timeseries_empty_data(self): phase_end_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) with pytest.raises(ValueError, match="No carbon intensity data available"): - get_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, []) + generate_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, []) def test_get_carbon_intensity_timeseries_invalid_timeframe(self): """Test error handling with invalid phase timeframe""" @@ -368,4 +370,4 @@ def test_get_carbon_intensity_timeseries_invalid_timeframe(self): phase_end_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) # End before start with pytest.raises(ValueError, match="Invalid phase timeframe"): - get_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, carbon_data) + generate_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, carbon_data) From 22ae9c1e02ae11062546ac7890c4f9a2f7a037df Mon Sep 17 00:00:00 2001 From: David Kopp Date: Wed, 24 Sep 2025 14:34:31 +0200 Subject: [PATCH 07/27] Ensure dynamic carbon intensity storage always has data points at the measurement start and end times --- lib/carbon_intensity.py | 50 ++++++++++++++-- tests/lib/test_carbon_intensity.py | 94 ++++++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 11 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 671598fa8..982f3a875 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -228,18 +228,58 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): RETURNING id ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] - # Prepare measurement values for bulk insert - values_data = [] + # Convert API data to format expected by _get_carbon_intensity_at_timestamp + carbon_data_for_lookup = [] for data_point in carbon_intensity_data: # Convert ISO timestamp to microseconds iso_time = data_point['time'] dt = datetime.fromisoformat(iso_time.replace('Z', '+00:00')) timestamp_us = int(dt.timestamp() * 1_000_000) - # Convert carbon intensity to integer (multiply by 1000 for precision) - carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) + carbon_data_for_lookup.append({ + 'timestamp_us': timestamp_us, + 'carbon_intensity': float(data_point['carbon_intensity']) + }) - values_data.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) + # Sort by timestamp for interpolation + carbon_data_for_lookup.sort(key=lambda x: x['timestamp_us']) + + # Prepare measurement values for bulk insert + values_data = [] + + # Always ensure we have data points at measurement start and end times + try: + # Get carbon intensity at measurement start time + start_carbon_intensity = _get_carbon_intensity_at_timestamp(start_time_us, carbon_data_for_lookup) + start_carbon_intensity_value = int(start_carbon_intensity * 1000) + values_data.append((measurement_metric_id, start_carbon_intensity_value, start_time_us)) + + # Get carbon intensity at measurement end time + end_carbon_intensity = _get_carbon_intensity_at_timestamp(end_time_us, carbon_data_for_lookup) + end_carbon_intensity_value = int(end_carbon_intensity * 1000) + + # Add intermediate data points that fall within measurement timeframe + intermediate_points = [] + for data_point in carbon_data_for_lookup: + timestamp_us = data_point['timestamp_us'] + # Only include points strictly within the timeframe (not at boundaries) + if start_time_us < timestamp_us < end_time_us: + carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) + intermediate_points.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) + + # Sort intermediate points by time and add them + intermediate_points.sort(key=lambda x: x[2]) # Sort by timestamp + values_data.extend(intermediate_points) + + # Add end time point (ensure it's different from start time) + if start_time_us != end_time_us: + values_data.append((measurement_metric_id, end_carbon_intensity_value, end_time_us)) + + print(f"Stored dynamic carbon intensity: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") + + except ValueError as e: + error_helpers.log_error(f"Failed to interpolate carbon intensity data: {e}", run_id=run_id) + return if values_data: # Bulk insert measurement values using copy_from diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index f3e33f472..3d95d708f 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -276,13 +276,15 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_ # Mock the carbon intensity API call + # Use timestamps that align with the measurement timeframe (2024-12-24T13:33:10Z to 2024-12-24T13:41:00Z) with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: mock_client = Mock() mock_client_class.return_value = mock_client mock_client.get_carbon_intensity_history.return_value = [ - {"location": "DE", "time": "2024-09-22T10:00:00Z", "carbon_intensity": 185.0}, - {"location": "DE", "time": "2024-09-22T10:30:00Z", "carbon_intensity": 190.0}, - {"location": "DE", "time": "2024-09-22T11:00:00Z", "carbon_intensity": 183.0} + {"location": "DE", "time": "2024-12-24T13:32:00Z", "carbon_intensity": 185.0}, # Before start (for extrapolation) + {"location": "DE", "time": "2024-12-24T13:35:00Z", "carbon_intensity": 190.0}, # Within timeframe + {"location": "DE", "time": "2024-12-24T13:38:00Z", "carbon_intensity": 188.0}, # Within timeframe + {"location": "DE", "time": "2024-12-24T13:42:00Z", "carbon_intensity": 183.0} # After end (for extrapolation) ] # Call the function under test @@ -309,11 +311,91 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_ (run_id,) ) - assert len(values_result) == 3 + # Should have 4 data points: start boundary + 2 intermediate points + end boundary + assert len(values_result) == 4 # Values should be stored as integers (multiplied by 1000) - assert values_result[0][0] == 185000 # 185.0 * 1000 + # First point: interpolated start boundary (between 185.0 and 190.0) + # Second point: 190.0 (intermediate point at 13:35:00) assert values_result[1][0] == 190000 # 190.0 * 1000 - assert values_result[2][0] == 183000 # 183.0 * 1000 + # Third point: 188.0 (intermediate point at 13:38:00) + assert values_result[2][0] == 188000 # 188.0 * 1000 + # Fourth point: interpolated end boundary (between 188.0 and 183.0) + + def test_store_carbon_intensity_dynamic_single_data_point(self, run_with_measurement_times): + run_id = run_with_measurement_times + + # Mock the carbon intensity API call with only one data point within timeframe + with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: + mock_client = Mock() + mock_client_class.return_value = mock_client + mock_client.get_carbon_intensity_history.return_value = [ + {"location": "DE", "time": "2024-12-24T13:37:00Z", "carbon_intensity": 185.0} # Within measurement timeframe + ] + + # Call the function under test + store_dynamic_carbon_intensity(run_id, 'DE') + + # Verify that measurement_metrics entry was created for dynamic carbon intensity + metric_result = DB().fetch_one( + "SELECT metric, detail_name, unit FROM measurement_metrics WHERE run_id = %s", + (run_id,) + ) + + assert metric_result is not None + assert metric_result[0] == 'grid_carbon_intensity_dynamic' + assert metric_result[1] == 'DE' + assert metric_result[2] == 'gCO2e/kWh' + + # Verify that measurement values were stored + values_result = DB().fetch_all( + """SELECT mv.value, mv.time + FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_dynamic' + ORDER BY mv.time""", + (run_id,) + ) + + assert len(values_result) >= 2, "Dynamic carbon intensity requires at least 2 data points" + + def test_store_carbon_intensity_dynamic_data_outside_timeframe(self, run_with_measurement_times): + # Test that dynamic carbon intensity properly handles data outside measurement timeframe using extrapolation + run_id = run_with_measurement_times + + # Mock API data that is completely outside the measurement timeframe + with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: + mock_client = Mock() + mock_client_class.return_value = mock_client + mock_client.get_carbon_intensity_history.return_value = [ + {"location": "DE", "time": "2024-12-24T12:00:00Z", "carbon_intensity": 200.0}, # Well before start + {"location": "DE", "time": "2024-12-24T12:30:00Z", "carbon_intensity": 210.0} # Still before start + ] + + # Call the function under test + store_dynamic_carbon_intensity(run_id, 'DE') + + # Verify that measurement_metrics entry was created + metric_result = DB().fetch_one( + "SELECT metric, detail_name, unit FROM measurement_metrics WHERE run_id = %s", + (run_id,) + ) + + assert metric_result is not None + assert metric_result[0] == 'grid_carbon_intensity_dynamic' + + # Verify that measurement values were stored using extrapolation + values_result = DB().fetch_all( + """SELECT mv.value, mv.time + FROM measurement_values mv + JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_dynamic' + ORDER BY mv.time""", + (run_id,) + ) + + # Should have exactly 2 data points (start and end boundaries) since no intermediate points in timeframe + assert len(values_result) == 2 + # Both values should be extrapolated from the trend in the API data (210 is higher than 200) def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurement_times): # Test error handling when dynamic method is called with None location From e89e1d59970a7cadc94e6e229edafc09b3d6ec1d Mon Sep 17 00:00:00 2001 From: David Kopp Date: Wed, 24 Sep 2025 14:51:19 +0200 Subject: [PATCH 08/27] Simplify get timestamp function - use nearest data point instead of interpolation --- lib/carbon_intensity.py | 100 +++++++++++------------------ tests/lib/test_carbon_intensity.py | 4 +- 2 files changed, 40 insertions(+), 64 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 982f3a875..407212e6d 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -241,45 +241,39 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): 'carbon_intensity': float(data_point['carbon_intensity']) }) - # Sort by timestamp for interpolation + # Sort by timestamp for consistent processing carbon_data_for_lookup.sort(key=lambda x: x['timestamp_us']) # Prepare measurement values for bulk insert values_data = [] # Always ensure we have data points at measurement start and end times - try: - # Get carbon intensity at measurement start time - start_carbon_intensity = _get_carbon_intensity_at_timestamp(start_time_us, carbon_data_for_lookup) - start_carbon_intensity_value = int(start_carbon_intensity * 1000) - values_data.append((measurement_metric_id, start_carbon_intensity_value, start_time_us)) - - # Get carbon intensity at measurement end time - end_carbon_intensity = _get_carbon_intensity_at_timestamp(end_time_us, carbon_data_for_lookup) - end_carbon_intensity_value = int(end_carbon_intensity * 1000) - - # Add intermediate data points that fall within measurement timeframe - intermediate_points = [] - for data_point in carbon_data_for_lookup: - timestamp_us = data_point['timestamp_us'] - # Only include points strictly within the timeframe (not at boundaries) - if start_time_us < timestamp_us < end_time_us: - carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) - intermediate_points.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) - - # Sort intermediate points by time and add them - intermediate_points.sort(key=lambda x: x[2]) # Sort by timestamp - values_data.extend(intermediate_points) - - # Add end time point (ensure it's different from start time) - if start_time_us != end_time_us: - values_data.append((measurement_metric_id, end_carbon_intensity_value, end_time_us)) - - print(f"Stored dynamic carbon intensity: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") - - except ValueError as e: - error_helpers.log_error(f"Failed to interpolate carbon intensity data: {e}", run_id=run_id) - return + # Get carbon intensity at measurement start time + start_carbon_intensity = _get_carbon_intensity_at_timestamp(start_time_us, carbon_data_for_lookup) + start_carbon_intensity_value = int(start_carbon_intensity * 1000) + values_data.append((measurement_metric_id, start_carbon_intensity_value, start_time_us)) + + # Get carbon intensity at measurement end time + end_carbon_intensity = _get_carbon_intensity_at_timestamp(end_time_us, carbon_data_for_lookup) + end_carbon_intensity_value = int(end_carbon_intensity * 1000) + + # Add intermediate data points that fall within measurement timeframe + intermediate_points = [] + for data_point in carbon_data_for_lookup: + timestamp_us = data_point['timestamp_us'] + # Only include points strictly within the timeframe (not at boundaries) + if start_time_us < timestamp_us < end_time_us: + carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) + intermediate_points.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) + + # Sort intermediate points by time and add them + intermediate_points.sort(key=lambda x: x[2]) # Sort by timestamp + values_data.extend(intermediate_points) + + # Add end time point (ensure it's different from start time) + if start_time_us != end_time_us: + values_data.append((measurement_metric_id, end_carbon_intensity_value, end_time_us)) + if values_data: # Bulk insert measurement values using copy_from @@ -293,7 +287,7 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): ) f.close() - print(f"Stored {len(values_data)} dynamic carbon intensity data points for location {grid_carbon_intensity_location}") + print(f"Stored dynamic carbon intensity for location {grid_carbon_intensity_location}: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") def generate_carbon_intensity_timeseries_for_phase( @@ -367,12 +361,11 @@ def generate_carbon_intensity_timeseries_for_phase( def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: """ - Get carbon intensity value for a specific timestamp using interpolation/extrapolation. + Get carbon intensity value for a specific timestamp using nearest data point. This function finds the carbon intensity at a given timestamp by: - - Interpolating between two data points if timestamp falls between them - - Returning the first value if timestamp is before all data points - - Returning the last value if timestamp is after all data points + - Finding the data point with timestamp closest to the target timestamp + - Returning the carbon intensity of that nearest data point Args: timestamp_us: Target timestamp in microseconds @@ -385,32 +378,15 @@ def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict ValueError: If carbon_data is empty """ if not carbon_data: - raise ValueError("No carbon intensity data available for interpolation") - - # Extract and sort data points by timestamp - data_points = [(item['timestamp_us'], float(item['carbon_intensity'])) for item in carbon_data] - data_points.sort(key=lambda x: x[0]) - - # Check if target is before first or after last data point - if timestamp_us <= data_points[0][0]: - return data_points[0][1] - if timestamp_us >= data_points[-1][0]: - return data_points[-1][1] + raise ValueError("No carbon intensity data available") - # Find surrounding data points for interpolation - for i in range(len(data_points) - 1): - time1_us, value1 = data_points[i] - time2_us, value2 = data_points[i + 1] - - if time1_us <= timestamp_us <= time2_us: - # Linear interpolation - if time1_us == time2_us: - return value1 - - ratio = (timestamp_us - time1_us) / (time2_us - time1_us) - return value1 + (value2 - value1) * ratio + # Find the data point with timestamp closest to target timestamp + closest_point = min( + carbon_data, + key=lambda point: abs(point['timestamp_us'] - timestamp_us) + ) - raise ValueError(f"Could not interpolate carbon intensity for timestamp {timestamp_us}") + return float(closest_point['carbon_intensity']) def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any]]) -> int: diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 3d95d708f..1467fb0fb 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -102,7 +102,7 @@ def test__get_carbon_intensity_at_timestamp_single_point(self): assert result == 185.0 def test__get_carbon_intensity_at_timestamp_between_points(self): - # Test interpolation between two points + # Test nearest point selection between two points carbon_data = [ {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 10, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 180.0}, {"location": "DE", "timestamp_us": int(datetime(2024, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 200.0} @@ -112,7 +112,7 @@ def test__get_carbon_intensity_at_timestamp_between_points(self): timestamp_us = int(calendar.timegm(mid_time.timetuple()) * 1_000_000) result = _get_carbon_intensity_at_timestamp(timestamp_us, carbon_data) - assert result == 190.0 # Linear interpolation: 180 + (200-180) * 0.5 + assert result == 180.0 # Nearest point: 10:30 is closer to 10:00 than 11:00 def test__get_carbon_intensity_at_timestamp_before_range(self): # Test with timestamp before data range From c6bf1deac0280de92d14e9155a9004dd1a46d6d4 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 08:30:16 +0200 Subject: [PATCH 09/27] Improve error handling - fail hard if an error occurs --- lib/carbon_intensity.py | 80 ++++++++++-------------------- lib/phase_stats.py | 7 +-- lib/scenario_runner.py | 34 ++++++------- tests/lib/test_carbon_intensity.py | 8 ++- 4 files changed, 47 insertions(+), 82 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 407212e6d..19ba3434f 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -5,17 +5,10 @@ from datetime import datetime, timezone from typing import List, Dict, Any from io import StringIO -from lib import error_helpers from lib.global_config import GlobalConfig from lib.db import DB -class CarbonIntensityServiceError(Exception): - """Raised when carbon intensity service request fails.""" - -class CarbonIntensityDataError(Exception): - """Raised when carbon intensity service returns invalid data.""" - class CarbonIntensityClient: def __init__(self, base_url: str = None): """ @@ -58,25 +51,19 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: 'interpolate': 'true' } - try: - response = requests.get(url, params=params, timeout=30) - response.raise_for_status() - - data = response.json() + response = requests.get(url, params=params, timeout=30) + response.raise_for_status() - if not isinstance(data, list): - raise ValueError(f"Expected list response from carbon intensity service, got {type(data)}") + data = response.json() - for item in data: - if not all(key in item for key in ['location', 'time', 'carbon_intensity']): - raise ValueError(f"Invalid carbon intensity data format: missing required fields in {item}") + if not isinstance(data, list): + raise ValueError(f"Expected list response from carbon intensity service, got {type(data)}") - return data + for item in data: + if not all(key in item for key in ['location', 'time', 'carbon_intensity']): + raise ValueError(f"Invalid carbon intensity data format: missing required fields in {item}") - except requests.exceptions.RequestException as e: - raise CarbonIntensityServiceError(f"Failed to fetch carbon intensity data: {e}") from e - except (ValueError, KeyError) as e: - raise CarbonIntensityDataError(f"Invalid response from carbon intensity service: {e}") from e + return data def get_carbon_intensity_data_for_run(run_id): @@ -152,8 +139,7 @@ def store_static_carbon_intensity(run_id, static_value): """ run_data = DB().fetch_one(run_query, (run_id,)) if not run_data or not run_data[0] or not run_data[1]: - error_helpers.log_error(f"Run {run_id} does not have valid start_measurement and end_measurement times", run_id=run_id) - return + raise ValueError(f"Run {run_id} does not have valid start_measurement and end_measurement times") start_time_us, end_time_us = run_data @@ -198,8 +184,7 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): """ run_data = DB().fetch_one(run_query, (run_id,)) if not run_data or not run_data[0] or not run_data[1]: - error_helpers.log_error(f"Run {run_id} does not have valid start_measurement and end_measurement times", run_id=run_id) - return + raise ValueError(f"Run {run_id} does not have valid start_measurement and end_measurement times") start_time_us, end_time_us = run_data start_time_iso = _microseconds_to_iso8601(start_time_us) @@ -212,8 +197,10 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): ) if not carbon_intensity_data: - error_helpers.log_error("No carbon intensity data received from service", run_id=run_id) - return + raise ValueError( + f"No carbon intensity data received from service for location '{grid_carbon_intensity_location}' " + f"between {start_time_iso} and {end_time_iso}. The service returned an empty dataset." + ) # Create measurement_metric entry for dynamic carbon intensity metric_name = 'grid_carbon_intensity_dynamic' @@ -330,30 +317,20 @@ def generate_carbon_intensity_timeseries_for_phase( current_timestamp_us = phase_start_us while current_timestamp_us <= phase_end_us: - try: - carbon_intensity = _get_carbon_intensity_at_timestamp(current_timestamp_us, carbon_data) - timeseries.append({ - "timestamp_us": current_timestamp_us, - "carbon_intensity": carbon_intensity - }) - except ValueError: - # Skip this timestamp if carbon intensity lookup fails - # This handles edge cases like malformed data or timestamp conversion issues - # Note: Normal out-of-range timestamps are handled gracefully by _get_carbon_intensity_at_timestamp - pass - + carbon_intensity = _get_carbon_intensity_at_timestamp(current_timestamp_us, carbon_data) + timeseries.append({ + "timestamp_us": current_timestamp_us, + "carbon_intensity": carbon_intensity + }) current_timestamp_us += sampling_rate_us # Always include the phase end timestamp if it wasn't already included if timeseries and timeseries[-1]["timestamp_us"] != phase_end_us: - try: - carbon_intensity = _get_carbon_intensity_at_timestamp(phase_end_us, carbon_data) - timeseries.append({ - "timestamp_us": phase_end_us, - "carbon_intensity": carbon_intensity - }) - except ValueError: - pass + carbon_intensity = _get_carbon_intensity_at_timestamp(phase_end_us, carbon_data) + timeseries.append({ + "timestamp_us": phase_end_us, + "carbon_intensity": carbon_intensity + }) return timeseries @@ -370,16 +347,11 @@ def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict Args: timestamp_us: Target timestamp in microseconds carbon_data: List of carbon intensity data points with 'timestamp_us' and 'carbon_intensity' fields + (guaranteed to be non-empty by calling functions) Returns: Carbon intensity value in gCO2e/kWh - - Raises: - ValueError: If carbon_data is empty """ - if not carbon_data: - raise ValueError("No carbon intensity data available") - # Find the data point with timestamp closest to target timestamp closest_point = min( carbon_data, diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 55c81d89e..642953987 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -11,8 +11,6 @@ from lib.db import DB from lib import error_helpers from lib.carbon_intensity import ( - CarbonIntensityServiceError, - CarbonIntensityDataError, get_carbon_intensity_data_for_run, generate_carbon_intensity_timeseries_for_phase, ) @@ -148,9 +146,8 @@ def build_and_store_phase_stats(run_id, sci=None): total_carbon = sum(point['carbon_intensity'] for point in carbon_timeseries) phase_carbon_intensity = Decimal(total_carbon / len(carbon_timeseries)) - except (CarbonIntensityServiceError, CarbonIntensityDataError, ValueError) as e: - error_helpers.log_error(f"Failed to calculate carbon intensity for phase {phase['name']}: {e}", run_id=run_id) - phase_carbon_intensity = None + except Exception as e: # pylint: disable=broad-except + raise RuntimeError(f"Failed to calculate carbon intensity for phase {phase['name']}: {e}") from e select_query = """ WITH lag_table as ( diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index 92e9c2312..dfe728518 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -2081,24 +2081,22 @@ def _process_grid_carbon_intensity(self): # pylint: disable=import-outside-toplevel from lib.carbon_intensity import store_static_carbon_intensity, store_dynamic_carbon_intensity - try: - if self._use_dynamic_grid_carbon_intensity: - # Store dynamic carbon intensity from API - if self._grid_carbon_intensity_location is None: - error_helpers.log_error("Dynamic grid carbon intensity is enabled, but location is missing! Carbon footprint calculations will be skipped.", run_id=self._run_id) - return - - store_dynamic_carbon_intensity(self._run_id, self._grid_carbon_intensity_location) - elif self._sci['I']: - # Store static carbon intensity from config as constant time series - store_static_carbon_intensity(self._run_id, self._sci['I']) - else: - # No carbon intensity configured - this will prevent carbon calculations - # This is only acceptable if no energy metrics are being collected - error_helpers.log_error("No grid carbon intensity configured. Carbon footprint calculations will be skipped.", run_id=self._run_id) - - except Exception as e: # pylint: disable=broad-except - error_helpers.log_error(f"Unexpected error storing grid carbon intensity metrics: {e}", run_id=self._run_id) + if self._use_dynamic_grid_carbon_intensity: + # Store dynamic carbon intensity from API + if self._grid_carbon_intensity_location is None: + raise ValueError("Dynamic grid carbon intensity is enabled, but location configuration is missing! Ensure it is set in your config.yml.") + + store_dynamic_carbon_intensity(self._run_id, self._grid_carbon_intensity_location) + elif self._sci['I']: + # Store static carbon intensity from config as constant time series + store_static_carbon_intensity(self._run_id, self._sci['I']) + else: + raise ValueError( + "No grid carbon intensity configured. Cannot proceed with carbon footprint calculations. " + "Please configure either: (1) Static carbon intensity by setting 'sci.I' in your config, " + "or (2) Dynamic carbon intensity by enabling 'grid_carbon_intensity.dynamic' and setting " + "'grid_carbon_intensity.location'." + ) def _process_phase_stats(self): if not self._run_id or self._dev_no_phase_stats or self._dev_no_save: diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 1467fb0fb..4e2d49690 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -12,8 +12,6 @@ from lib.db import DB from lib.carbon_intensity import ( CarbonIntensityClient, - CarbonIntensityServiceError, - CarbonIntensityDataError, _microseconds_to_iso8601, _calculate_sampling_rate_from_data, _get_carbon_intensity_at_timestamp, @@ -134,7 +132,7 @@ def test__get_carbon_intensity_at_timestamp_after_range(self): def test__get_carbon_intensity_at_timestamp_empty_data(self): # Test with empty data - with pytest.raises(ValueError, match="No carbon intensity data available"): + with pytest.raises(ValueError, match="min\\(\\) iterable argument is empty"): _get_carbon_intensity_at_timestamp(1727003400000000, []) @patch('lib.carbon_intensity.requests.get') @@ -172,7 +170,7 @@ def test_carbon_intensity_client_network_error(self, mock_get): mock_get.side_effect = requests.exceptions.RequestException("Network error") client = CarbonIntensityClient("http://localhost:8000") - with pytest.raises(CarbonIntensityServiceError, match="Failed to fetch carbon intensity data"): + with pytest.raises(requests.exceptions.RequestException): client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") @patch('lib.carbon_intensity.requests.get') @@ -184,7 +182,7 @@ def test_carbon_intensity_client_invalid_response(self, mock_get): mock_get.return_value = mock_response client = CarbonIntensityClient("http://localhost:8000") - with pytest.raises(CarbonIntensityDataError, match="Invalid response from carbon intensity service"): + with pytest.raises(ValueError, match="Expected list response from carbon intensity service"): client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") From 8c4dd9981473e4cccca75f99cfe69eeb3fe3b964 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 09:46:47 +0200 Subject: [PATCH 10/27] Use config.yml instead of user settings --- config.yml.example | 17 +++++++++- docker/structure.sql | 4 --- frontend/js/helpers/config.js.example | 10 ------ frontend/js/settings.js | 5 --- frontend/settings.html | 14 -------- lib/carbon_intensity.py | 12 +++---- lib/job/run.py | 2 -- lib/scenario_runner.py | 15 ++++---- lib/user.py | 6 +--- .../2025_09_22_dynamic_carbon_intensity.sql | 34 ------------------- runner.py | 4 --- tests/frontend/test_frontend.py | 12 ------- 12 files changed, 30 insertions(+), 105 deletions(-) delete mode 100644 migrations/2025_09_22_dynamic_carbon_intensity.sql diff --git a/config.yml.example b/config.yml.example index 1bcdda8ac..fb20fa683 100644 --- a/config.yml.example +++ b/config.yml.example @@ -242,7 +242,8 @@ sci: # The default is the value for a developer machine (Pro Laptop - https://dataviz.boavizta.org/terminalimpact) TE: 181000 # I is the Carbon Intensity at the location of this machine - # This is a static value in gCO2e/kWh. For dynamic carbon intensity, see frontend user settings (Documentation: https://docs.green-coding.io/docs/measuring/carbon/grid-carbon-intensity/). + # This is a static value in gCO2e/kWh. If you want to use the current dynamic grid carbon intensity, + # uncomment the option 'dynamic_grid_carbon_intensity' below. # For fixed world-wide values get the number from https://ember-climate.org/insights/research/global-electricity-review-2025/ # The number worldwide for 2024 is 473 # The number 334 that comes as default is for Germany from 2024 and comes from https://app.electricitymaps.com/zone/DE/all/yearly @@ -252,6 +253,20 @@ sci: # See https://www.green-coding.io/co2-formulas/ for details N: 0.04106063 +# If you want to use the current dynamic grid carbon intensity for the carbon metrics instead of the fixed number above (SCI.I), +# uncomment the following lines and set your location. The location needs to be a valid grid zone code. +# See https://portal.electricitymaps.com/developer-hub/api/getting-started#geographical-coverage for valid zone codes. +# Documentation of the feature: https://docs.green-coding.io/docs/measuring/carbon/grid-carbon-intensity/ +# Note, that for this functionality our 'Elephant' service needs to be running and configured correctly. +#dynamic_grid_carbon_intensity: +# location: 'DE' + +# The following configuration is an enterprise feature: +# In order to get the carbon intensity we use electricity maps which requires a token. +# You can get this under https://api-portal.electricitymaps.com/ +# This is a free service please note that you need to pay if you want to use this commercially! +#electricity_maps_token: '123' + #optimization: # ignore: # - example_optimization_test diff --git a/docker/structure.sql b/docker/structure.sql index dd05c9d89..81ed2fe02 100644 --- a/docker/structure.sql +++ b/docker/structure.sql @@ -55,8 +55,6 @@ VALUES ( "measurement.phase_transition_time", "measurement.wait_time_dependencies", "measurement.skip_volume_inspect", - "measurement.use_dynamic_grid_carbon_intensity", - "measurement.grid_carbon_intensity_location" ] }, "api": { @@ -117,8 +115,6 @@ VALUES ( }, "machines": [1], "measurement": { - "use_dynamic_grid_carbon_intensity": false, - "grid_carbon_intensity_location": "DE", "phase_padding": true, "quotas": {}, "dev_no_sleeps": false, diff --git a/frontend/js/helpers/config.js.example b/frontend/js/helpers/config.js.example index df9976a95..b62a99280 100644 --- a/frontend/js/helpers/config.js.example +++ b/frontend/js/helpers/config.js.example @@ -621,15 +621,5 @@ METRIC_MAPPINGS = { "clean_name": "Total System Disk Writes", "source": "cgroup", "explanation": "Total data written to disk for the system via cgroup" - }, - "grid_carbon_intensity_static": { - "clean_name": "Grid Carbon Intensity", - "source": "Config (Static)", - "explanation": "Static grid carbon intensity used to calculate the carbon emissions" - }, - "grid_carbon_intensity_dynamic": { - "clean_name": "Grid Carbon Intensity", - "source": "External Provider (Dynamic)", - "explanation": "Dynamic grid carbon intensity during the run retrieved from external carbon intensity provider" } } // PLEASE DO NOT REMOVE THIS COMMENT -- END METRIC_MAPPINGS diff --git a/frontend/js/settings.js b/frontend/js/settings.js index 81a605c3c..613495736 100644 --- a/frontend/js/settings.js +++ b/frontend/js/settings.js @@ -23,15 +23,10 @@ const getSettings = async () => { try { const data = await makeAPICall('/v1/user/settings'); - // Checkboxes document.querySelector('#measurement-dev-no-optimizations').checked = data?.data?._capabilities?.measurement?.dev_no_optimizations === true; document.querySelector('#measurement-dev-no-sleeps').checked = data?.data?._capabilities?.measurement?.dev_no_sleeps === true; document.querySelector('#measurement-phase-padding').checked = data?.data?._capabilities?.measurement?.phase_padding === true; document.querySelector('#measurement-skip-volume-inspect').checked = data?.data?._capabilities?.measurement?.skip_volume_inspect === true; - document.querySelector('#measurement-use-dynamic-grid-carbon-intensity').checked = data?.data?._capabilities?.measurement?.use_dynamic_grid_carbon_intensity === true; - - // Text - document.querySelector('#measurement-grid-carbon-intensity-location').value = data?.data?._capabilities?.measurement?.grid_carbon_intensity_location; document.querySelector('#measurement-flow-process-duration').value = data?.data?._capabilities?.measurement?.flow_process_duration; document.querySelector('#measurement-total-duration').value = data?.data?._capabilities?.measurement?.total_duration; $('#measurement-disabled-metric-providers').dropdown('set exactly', data?.data?._capabilities?.measurement?.disabled_metric_providers); diff --git a/frontend/settings.html b/frontend/settings.html index 4a680bc86..60fd3e9f4 100644 --- a/frontend/settings.html +++ b/frontend/settings.html @@ -95,20 +95,6 @@

Determines if the live dynamic grid carbon intensity is for the calculation of carbon emissions or a static value - - - - - - Grid location
- Only relevant if dynamic grid carbon intensity is used. Location is relevant to get the live grid carbon intensity. Value needs to be a valid grid zone code. See
ElectricityMaps for valid zone codes. - - - - System check threshold
Can be 1=INFO, 2=WARN or 3=ERROR. When set on 3 runs will fail only on erros, when 2 then also on warnings and 1 also on pure info statements. diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 19ba3434f..47a3eb61f 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -168,13 +168,13 @@ def store_static_carbon_intensity(run_id, static_value): print(f"Stored static carbon intensity value {static_value} gCO2e/kWh as constant time series") -def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): +def store_dynamic_carbon_intensity(run_id, location): """ Store dynamic carbon intensity data from API as time series. Args: run_id: UUID of the run - grid_carbon_intensity_location: Location code (e.g., "DE", "ES-IB-MA") + location: Location code (e.g., "DE", "ES-IB-MA") """ # Get run start and end times run_query = """ @@ -193,18 +193,18 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): # Fetch carbon intensity data carbon_client = CarbonIntensityClient() carbon_intensity_data = carbon_client.get_carbon_intensity_history( - grid_carbon_intensity_location, start_time_iso, end_time_iso + location, start_time_iso, end_time_iso ) if not carbon_intensity_data: raise ValueError( - f"No carbon intensity data received from service for location '{grid_carbon_intensity_location}' " + f"No carbon intensity data received from service for location '{location}' " f"between {start_time_iso} and {end_time_iso}. The service returned an empty dataset." ) # Create measurement_metric entry for dynamic carbon intensity metric_name = 'grid_carbon_intensity_dynamic' - detail_name = grid_carbon_intensity_location + detail_name = location unit = 'gCO2e/kWh' # Calculate sampling rate based on actual data intervals from API format sampling_rate_configured = _calculate_sampling_rate_from_data(carbon_intensity_data) @@ -274,7 +274,7 @@ def store_dynamic_carbon_intensity(run_id, grid_carbon_intensity_location): ) f.close() - print(f"Stored dynamic carbon intensity for location {grid_carbon_intensity_location}: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") + print(f"Stored dynamic carbon intensity for location {location}: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") def generate_carbon_intensity_timeseries_for_phase( diff --git a/lib/job/run.py b/lib/job/run.py index 1399bf08d..bc9b11183 100644 --- a/lib/job/run.py +++ b/lib/job/run.py @@ -64,8 +64,6 @@ def _process(self, docker_prune=False, full_docker_prune=False): dev_no_optimizations=user._capabilities['measurement']['dev_no_optimizations'], disabled_metric_providers=user._capabilities['measurement']['disabled_metric_providers'], allowed_run_args=user._capabilities['measurement']['orchestrators']['docker']['allowed_run_args'], # They are specific to the orchestrator. However currently we only have one. As soon as we support more orchestrators we will sub-class Runner with dedicated child classes (DockerRunner, PodmanRunner etc.) - use_dynamic_grid_carbon_intensity=user._capabilities['measurement']['use_dynamic_grid_carbon_intensity'], - grid_carbon_intensity_location=user._capabilities['measurement']['grid_carbon_intensity_location'], ) diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index dfe728518..ad1336a04 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -75,7 +75,7 @@ def __init__(self, skip_volume_inspect=False, commit_hash_folder=None, usage_scenario_variables=None, phase_padding=True, measurement_system_check_threshold=3, measurement_pre_test_sleep=5, measurement_idle_duration=60, measurement_baseline_duration=60, measurement_post_test_sleep=5, measurement_phase_transition_time=1, - measurement_wait_time_dependencies=60, use_dynamic_grid_carbon_intensity=False, grid_carbon_intensity_location=None): + measurement_wait_time_dependencies=60): config = GlobalConfig().config @@ -143,8 +143,6 @@ def __init__(self, self._measurement_post_test_sleep = measurement_post_test_sleep self._measurement_phase_transition_time = measurement_phase_transition_time self._measurement_wait_time_dependencies = measurement_wait_time_dependencies - self._use_dynamic_grid_carbon_intensity = use_dynamic_grid_carbon_intensity - self._grid_carbon_intensity_location = grid_carbon_intensity_location self._last_measurement_duration = 0 self._phase_padding = phase_padding self._phase_padding_ms = max( @@ -638,8 +636,6 @@ def _initialize_run(self): measurement_config['disabled_metric_providers'] = self._disabled_metric_providers measurement_config['sci'] = self._sci measurement_config['phase_padding'] = self._phase_padding_ms - measurement_config['use_dynamic_grid_carbon_intensity'] = self._use_dynamic_grid_carbon_intensity - measurement_config['grid_carbon_intensity_location'] = self._grid_carbon_intensity_location # We issue a fetch_one() instead of a query() here, cause we want to get the RUN_ID self._run_id = DB().fetch_one(""" @@ -2081,12 +2077,15 @@ def _process_grid_carbon_intensity(self): # pylint: disable=import-outside-toplevel from lib.carbon_intensity import store_static_carbon_intensity, store_dynamic_carbon_intensity - if self._use_dynamic_grid_carbon_intensity: + config = GlobalConfig().config + dynamic_grid_carbon_intensity = config.get('dynamic_grid_carbon_intensity', None) + if dynamic_grid_carbon_intensity: # Store dynamic carbon intensity from API - if self._grid_carbon_intensity_location is None: + location = dynamic_grid_carbon_intensity.get('location', None) + if location is None: raise ValueError("Dynamic grid carbon intensity is enabled, but location configuration is missing! Ensure it is set in your config.yml.") - store_dynamic_carbon_intensity(self._run_id, self._grid_carbon_intensity_location) + store_dynamic_carbon_intensity(self._run_id, location) elif self._sci['I']: # Store static carbon intensity from config as constant time series store_static_carbon_intensity(self._run_id, self._sci['I']) diff --git a/lib/user.py b/lib/user.py index e4040b221..b9c1f0865 100644 --- a/lib/user.py +++ b/lib/user.py @@ -64,7 +64,7 @@ def change_setting(self, name, value): raise ValueError(f"You cannot change this setting: {name}") match name: - case 'measurement.dev_no_optimizations' | 'measurement.dev_no_sleeps' | 'measurement.phase_padding' | 'measurement.skip_volume_inspect' | 'measurement.use_dynamic_grid_carbon_intensity': + case 'measurement.dev_no_optimizations' | 'measurement.dev_no_sleeps' | 'measurement.phase_padding' | 'measurement.skip_volume_inspect': if not isinstance(value, bool): raise ValueError(f'The setting {name} must be boolean') case 'measurement.flow_process_duration' | 'measurement.total_duration': @@ -85,10 +85,6 @@ def change_setting(self, name, value): if not (isinstance(value, int) or value.isdigit()) or int(value) <= 0 or int(value) > 86400: raise ValueError(f'The setting {name} must be between 1 and 86400') value = int(value) - case 'measurement.grid_carbon_intensity_location': - if not isinstance(value, str) or not value.strip(): - raise ValueError(f'The setting {name} must be a non-empty string (electricity grid zone code)') - value = value.strip() case _: raise ValueError(f'The setting {name} is unknown') diff --git a/migrations/2025_09_22_dynamic_carbon_intensity.sql b/migrations/2025_09_22_dynamic_carbon_intensity.sql deleted file mode 100644 index b62fe7c5f..000000000 --- a/migrations/2025_09_22_dynamic_carbon_intensity.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Migration: Add dynamic carbon intensity capabilities and separate metric names --- Date: 2025-09-22 --- Description: Adds measurement.use_dynamic_grid_carbon_intensity and measurement.grid_carbon_intensity_location --- to user updateable_settings and separates static/dynamic carbon intensity metrics - --- Add new settings to updateable_settings for all users (excluding system user 0) -UPDATE users -SET capabilities = jsonb_set( - capabilities, - '{user,updateable_settings}', - ( - COALESCE(capabilities->'user'->'updateable_settings', '[]'::jsonb) || - '["measurement.use_dynamic_grid_carbon_intensity", "measurement.grid_carbon_intensity_location"]'::jsonb - ), - true -) WHERE id != 0; - --- Set default value for use_dynamic_grid_carbon_intensity (disabled by default) -UPDATE users -SET capabilities = jsonb_set( - capabilities, - '{measurement,use_dynamic_grid_carbon_intensity}', - 'false', - true -) WHERE id != 0; - --- Set default value for grid_carbon_intensity_location (default to DE) -UPDATE users -SET capabilities = jsonb_set( - capabilities, - '{measurement,grid_carbon_intensity_location}', - '"DE"', - true -) WHERE id != 0; diff --git a/runner.py b/runner.py index 9da27783a..58ef6aaa9 100755 --- a/runner.py +++ b/runner.py @@ -60,8 +60,6 @@ parser.add_argument('--print-logs', action='store_true', help='Prints the container and process logs to stdout') parser.add_argument('--iterations', type=int, default=1, help='Specify how many times each scenario should be run. Default is 1. With multiple files, all files are processed sequentially, then the entire sequence is repeated N times. Example: with files A.yml, B.yml and --iterations 2, the execution order is A, B, A, B.') - parser.add_argument('--use-dynamic-grid-carbon-intensity', action='store_true', help='Use dynamic grid carbon intensity value instead of static value from config') - parser.add_argument('--grid-carbon-intensity-location', type=str, help='Location of the electricity grid (e.g. DE), used for the dynamic carbon intensity calculation') # Measurement settings parser.add_argument('--measurement-system-check-threshold', type=int, default=3, help='System check threshold when to issue warning and when to fail. When set on 3 runs will fail only on erros, when 2 then also on warnings and 1 also on pure info statements. Can be 1=INFO, 2=WARN or 3=ERROR') @@ -160,8 +158,6 @@ docker_prune=args.docker_prune, dev_no_phase_stats=args.dev_no_phase_stats, user_id=args.user_id, skip_volume_inspect=args.skip_volume_inspect, commit_hash_folder=args.commit_hash_folder, usage_scenario_variables=variables_dict, phase_padding=not args.no_phase_padding, - use_dynamic_grid_carbon_intensity=args.use_dynamic_grid_carbon_intensity, - grid_carbon_intensity_location=args.grid_carbon_intensity_location, measurement_system_check_threshold=args.measurement_system_check_threshold, measurement_pre_test_sleep=args.measurement_pre_test_sleep, measurement_idle_duration=args.measurement_idle_duration, diff --git a/tests/frontend/test_frontend.py b/tests/frontend/test_frontend.py index 0819dcd16..cf0271357 100644 --- a/tests/frontend/test_frontend.py +++ b/tests/frontend/test_frontend.py @@ -659,12 +659,6 @@ def test_settings_measurement(self): assert user._capabilities['measurement']['skip_volume_inspect'] is False - value = page.locator('#measurement-use-dynamic-grid-carbon-intensity').is_checked() - assert value is user._capabilities['measurement']['use_dynamic_grid_carbon_intensity'] - - value = page.locator('#measurement-grid-carbon-intensity-location').input_value() - assert value.strip() == user._capabilities['measurement']['grid_carbon_intensity_location'] - value = page.locator('#measurement-disabled-metric-providers').input_value() providers = [] if value.strip() == '' else [value.strip()] assert providers == user._capabilities['measurement']['disabled_metric_providers'] @@ -709,8 +703,6 @@ def test_settings_measurement(self): assert value is user._capabilities['measurement']['skip_volume_inspect'] - page.locator('#measurement-use-dynamic-grid-carbon-intensity').click() - page.locator('#measurement-grid-carbon-intensity-location').fill('DE') page.locator('#measurement-system-check-threshold').fill('2') page.evaluate('$("#measurement-disabled-metric-providers").dropdown("set exactly", "NetworkConnectionsProxyContainerProvider");') page.locator('#measurement-flow-process-duration').fill('456') @@ -726,8 +718,6 @@ def test_settings_measurement(self): page.locator('#measurement-dev-no-optimizations').click() page.locator('#measurement-skip-volume-inspect').click() - page.locator('#save-measurement-use-dynamic-grid-carbon-intensity').click() - page.locator('#save-measurement-grid-carbon-intensity-location').click() page.locator('#save-measurement-system-check-threshold').click() page.locator('#save-measurement-disabled-metric-providers').click() page.locator('#save-measurement-flow-process-duration').click() @@ -747,8 +737,6 @@ def test_settings_measurement(self): time.sleep(1) user = User(1) - assert user._capabilities['measurement']['use_dynamic_grid_carbon_intensity'] is True - assert user._capabilities['measurement']['grid_carbon_intensity_location'] == 'DE' assert user._capabilities['measurement']['disabled_metric_providers'] == ['NetworkConnectionsProxyContainerProvider'] assert user._capabilities['measurement']['flow_process_duration'] == 456 assert user._capabilities['measurement']['total_duration'] == 123 From 65b863a4b3ccafc0874bb03f3c002c37c420aa5c Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 10:17:19 +0200 Subject: [PATCH 11/27] Cleanup --- config.yml.example | 16 +++++++--------- docker/structure.sql | 2 +- frontend/js/settings.js | 8 ++++---- lib/phase_stats.py | 3 --- lib/scenario_runner.py | 1 - lib/user.py | 1 + 6 files changed, 13 insertions(+), 18 deletions(-) diff --git a/config.yml.example b/config.yml.example index fb20fa683..5d9a47ce8 100644 --- a/config.yml.example +++ b/config.yml.example @@ -10,11 +10,6 @@ redis: host: green-coding-redis-container port: 6379 -elephant: - host: localhost - port: 8000 - protocol: http - smtp: server: SMTP_SERVER sender: SMTP_SENDER @@ -254,12 +249,15 @@ sci: N: 0.04106063 # If you want to use the current dynamic grid carbon intensity for the carbon metrics instead of the fixed number above (SCI.I), -# uncomment the following lines and set your location. The location needs to be a valid grid zone code. -# See https://portal.electricitymaps.com/developer-hub/api/getting-started#geographical-coverage for valid zone codes. -# Documentation of the feature: https://docs.green-coding.io/docs/measuring/carbon/grid-carbon-intensity/ -# Note, that for this functionality our 'Elephant' service needs to be running and configured correctly. +# uncomment the following lines and set your location and ensure the Elephant service is setup correctly. +# The location needs to be a valid grid zone code. +# For more information see our documentation: https://docs.green-coding.io/docs/measuring/carbon/grid-carbon-intensity/ #dynamic_grid_carbon_intensity: # location: 'DE' +# elephant: +# host: localhost +# port: 8000 +# protocol: http # The following configuration is an enterprise feature: # In order to get the carbon intensity we use electricity maps which requires a token. diff --git a/docker/structure.sql b/docker/structure.sql index 81ed2fe02..31c14112c 100644 --- a/docker/structure.sql +++ b/docker/structure.sql @@ -54,7 +54,7 @@ VALUES ( "measurement.post_test_sleep", "measurement.phase_transition_time", "measurement.wait_time_dependencies", - "measurement.skip_volume_inspect", + "measurement.skip_volume_inspect" ] }, "api": { diff --git a/frontend/js/settings.js b/frontend/js/settings.js index 613495736..0bc8c69ed 100644 --- a/frontend/js/settings.js +++ b/frontend/js/settings.js @@ -23,10 +23,10 @@ const getSettings = async () => { try { const data = await makeAPICall('/v1/user/settings'); - document.querySelector('#measurement-dev-no-optimizations').checked = data?.data?._capabilities?.measurement?.dev_no_optimizations === true; - document.querySelector('#measurement-dev-no-sleeps').checked = data?.data?._capabilities?.measurement?.dev_no_sleeps === true; - document.querySelector('#measurement-phase-padding').checked = data?.data?._capabilities?.measurement?.phase_padding === true; - document.querySelector('#measurement-skip-volume-inspect').checked = data?.data?._capabilities?.measurement?.skip_volume_inspect === true; + if (data?.data?._capabilities?.measurement?.dev_no_optimizations === true) document.querySelector('#measurement-dev-no-optimizations').checked = true; + if (data?.data?._capabilities?.measurement?.dev_no_sleeps === true) document.querySelector('#measurement-dev-no-sleeps').checked = true; + if (data?.data?._capabilities?.measurement?.phase_padding === true) document.querySelector('#measurement-phase-padding').checked = true; + if (data?.data?._capabilities?.measurement?.skip_volume_inspect === true) document.querySelector('#measurement-skip-volume-inspect').checked = true; document.querySelector('#measurement-flow-process-duration').value = data?.data?._capabilities?.measurement?.flow_process_duration; document.querySelector('#measurement-total-duration').value = data?.data?._capabilities?.measurement?.total_duration; $('#measurement-disabled-metric-providers').dropdown('set exactly', data?.data?._capabilities?.measurement?.disabled_metric_providers); diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 642953987..a844eda56 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -70,9 +70,6 @@ def generate_csv_line(run_id, metric, detail_name, phase_name, value, value_type # else '' resolves to NULL return f"{run_id},{metric},{detail_name},{phase_name},{round(value)},{value_type},{round(max_value) if max_value is not None else ''},{round(min_value) if min_value is not None else ''},{round(sampling_rate_avg) if sampling_rate_avg is not None else ''},{round(sampling_rate_max) if sampling_rate_max is not None else ''},{round(sampling_rate_95p) if sampling_rate_95p is not None else ''},{unit},NOW()\n" - - - def build_and_store_phase_stats(run_id, sci=None): if not sci: sci = {} diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index 3fdd1e6f9..bc3d41c39 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -2110,7 +2110,6 @@ def _process_phase_stats(self): # get all the metrics from the measurements table grouped by metric # loop over them issuing separate queries to the DB from tools.phase_stats import build_and_store_phase_stats # pylint: disable=import-outside-toplevel - build_and_store_phase_stats(self._run_id, self._sci) def _post_process(self, index): diff --git a/lib/user.py b/lib/user.py index b9c1f0865..28fc3ec27 100644 --- a/lib/user.py +++ b/lib/user.py @@ -1,5 +1,6 @@ import json import hashlib +import uuid from lib.secure_variable import SecureVariable from lib.db import DB From 167ca6bf68cd5d0a4df3a37448c044d2d0c8e431 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 13:13:21 +0200 Subject: [PATCH 12/27] Add grid carbon intensity as metric --- frontend/js/helpers/config.js.example | 10 +++ lib/carbon_intensity.py | 12 +-- lib/phase_stats.py | 4 +- .../dynamic_grid_carbon_intensity_single.log | 1 + tests/lib/test_phase_stats.py | 25 +++++- tests/test_functions.py | 88 ++++++++++++++++++- 6 files changed, 129 insertions(+), 11 deletions(-) create mode 100644 tests/data/metrics/dynamic_grid_carbon_intensity_single.log diff --git a/frontend/js/helpers/config.js.example b/frontend/js/helpers/config.js.example index b62a99280..df9976a95 100644 --- a/frontend/js/helpers/config.js.example +++ b/frontend/js/helpers/config.js.example @@ -621,5 +621,15 @@ METRIC_MAPPINGS = { "clean_name": "Total System Disk Writes", "source": "cgroup", "explanation": "Total data written to disk for the system via cgroup" + }, + "grid_carbon_intensity_static": { + "clean_name": "Grid Carbon Intensity", + "source": "Config (Static)", + "explanation": "Static grid carbon intensity used to calculate the carbon emissions" + }, + "grid_carbon_intensity_dynamic": { + "clean_name": "Grid Carbon Intensity", + "source": "External Provider (Dynamic)", + "explanation": "Dynamic grid carbon intensity during the run retrieved from external carbon intensity provider" } } // PLEASE DO NOT REMOVE THIS COMMENT -- END METRIC_MAPPINGS diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 47a3eb61f..ceb1d287d 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -114,7 +114,7 @@ def get_carbon_intensity_data_for_run(run_id): carbon_data = [ { 'timestamp_us': timestamp_us, - 'carbon_intensity': float(value_int) / 1000.0, + 'carbon_intensity': float(value_int), 'location': detail_name } for timestamp_us, value_int, _ in carbon_intensity_values # Unpack the third element (sampling_rate_configured) @@ -155,8 +155,8 @@ def store_static_carbon_intensity(run_id, static_value): RETURNING id ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] - # Convert static value to integer (multiply by 1000 for precision) - carbon_intensity_value = int(float(static_value) * 1000) + # Convert static value to integer + carbon_intensity_value = int(float(static_value)) # Store as constant time series: same value at start and end times DB().query( @@ -237,12 +237,12 @@ def store_dynamic_carbon_intensity(run_id, location): # Always ensure we have data points at measurement start and end times # Get carbon intensity at measurement start time start_carbon_intensity = _get_carbon_intensity_at_timestamp(start_time_us, carbon_data_for_lookup) - start_carbon_intensity_value = int(start_carbon_intensity * 1000) + start_carbon_intensity_value = int(start_carbon_intensity) values_data.append((measurement_metric_id, start_carbon_intensity_value, start_time_us)) # Get carbon intensity at measurement end time end_carbon_intensity = _get_carbon_intensity_at_timestamp(end_time_us, carbon_data_for_lookup) - end_carbon_intensity_value = int(end_carbon_intensity * 1000) + end_carbon_intensity_value = int(end_carbon_intensity) # Add intermediate data points that fall within measurement timeframe intermediate_points = [] @@ -250,7 +250,7 @@ def store_dynamic_carbon_intensity(run_id, location): timestamp_us = data_point['timestamp_us'] # Only include points strictly within the timeframe (not at boundaries) if start_time_us < timestamp_us < end_time_us: - carbon_intensity_value = int(float(data_point['carbon_intensity']) * 1000) + carbon_intensity_value = int(float(data_point['carbon_intensity'])) intermediate_points.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) # Sort intermediate points by time and add them diff --git a/lib/phase_stats.py b/lib/phase_stats.py index a844eda56..b50ab0169 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -85,7 +85,6 @@ def build_and_store_phase_stats(run_id, sci=None): SELECT id, metric, unit, detail_name, sampling_rate_configured FROM measurement_metrics WHERE run_id = %s - AND metric NOT IN ('grid_carbon_intensity_static', 'grid_carbon_intensity_dynamic') -- Exclude carbon intensity from phase stats processing ORDER BY metric ASC -- we need this ordering for later, when we read again """ metrics = DB().fetch_all(query, (run_id, )) @@ -288,6 +287,9 @@ def build_and_store_phase_stats(run_id, sci=None): machine_energy_current_phase = value_sum machine_power_current_phase = power_avg + elif "grid_carbon_intensity" in metric: + csv_buffer.write(generate_csv_line(run_id, metric, detail_name, f"{idx:03}_{phase['name']}", avg_value, 'MEAN', max_value, min_value, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, unit)) + else: # Default if metric not in ('cpu_time_powermetrics_vm', ): error_helpers.log_error('Unmapped phase_stat found, using default', metric=metric, detail_name=detail_name, run_id=run_id) diff --git a/tests/data/metrics/dynamic_grid_carbon_intensity_single.log b/tests/data/metrics/dynamic_grid_carbon_intensity_single.log new file mode 100644 index 000000000..b0905352d --- /dev/null +++ b/tests/data/metrics/dynamic_grid_carbon_intensity_single.log @@ -0,0 +1 @@ +1735047190000005 300 DE diff --git a/tests/lib/test_phase_stats.py b/tests/lib/test_phase_stats.py index 38ceef3ea..614d59bb7 100644 --- a/tests/lib/test_phase_stats.py +++ b/tests/lib/test_phase_stats.py @@ -130,7 +130,7 @@ def test_phase_embodied_and_operational_carbon(): Tests.import_machine_energy(run_id) sci = {"I":436,"R":0,"EL":4,"RS":1,"TE":181000,"R_d":"page request"} - Tests.import_carbon_intensity_value(run_id, sci['I']) + Tests.import_static_carbon_intensity_value(run_id, sci['I']) build_and_store_phase_stats(run_id, sci=sci) @@ -290,7 +290,7 @@ def test_phase_stats_network_data(): 'N': 0.001, # Network energy intensity (kWh/GB) 'I': 500, # Carbon intensity (gCO2e/kWh) } - Tests.import_carbon_intensity_value(run_id, test_sci_config['I']) + Tests.import_static_carbon_intensity_value(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) @@ -335,6 +335,25 @@ def test_phase_stats_network_data(): assert network_carbon_entry['type'] == 'TOTAL' assert math.isclose(network_carbon_entry['value'], expected_network_carbon_ug, rel_tol=1e-5), f"Expected network carbon: {expected_network_carbon_ug}, got: {network_carbon_entry['value']}" + +def test_phase_stats_dynamic_grid_carbon_intensity(): + run_id = Tests.insert_run() + Tests.import_dynamic_carbon_intensity_value(run_id) + + build_and_store_phase_stats(run_id) + + data = DB().fetch_all('SELECT metric, detail_name, unit, value, type, sampling_rate_avg, sampling_rate_max, sampling_rate_95p FROM phase_stats WHERE phase = %s ', params=('004_[RUNTIME]', ), fetch_mode='dict') + + assert len(data) == 2 + assert data[1]['metric'] == 'grid_carbon_intensity_dynamic' + assert data[1]['detail_name'] == 'DE' + assert data[1]['unit'] == 'gCO2e/kWh' + assert data[1]['value'] == 270 + assert data[1]['type'] == 'MEAN' + assert data[1]['sampling_rate_avg'] == 60000000, 'AVG sampling rate not in expected range' + assert data[1]['sampling_rate_max'] == 60000000, 'MAX sampling rate not in expected range' + assert data[1]['sampling_rate_95p'] == 60000000, '95p sampling rate not in expected range' + def test_sci_calculation(): run_id = Tests.insert_run() Tests.import_machine_energy(run_id) # Machine energy component @@ -350,7 +369,7 @@ def test_sci_calculation(): 'R': 10, # Functional unit count (10 runs) 'R_d': 'test runs' # Functional unit description } - Tests.import_carbon_intensity_value(run_id, test_sci_config['I']) + Tests.import_static_carbon_intensity_value(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) diff --git a/tests/test_functions.py b/tests/test_functions.py index e53c5e72e..08c32f784 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -3,6 +3,7 @@ import hashlib import json +from io import StringIO from lib.db import DB from lib.global_config import GlobalConfig from lib.log_types import LogType @@ -220,7 +221,8 @@ def import_demo_data_ee(): raise RuntimeError('Import of Demo data into DB failed', ps.stderr) -def import_carbon_intensity_value(run_id, static_carbon_intensity_value): +def import_static_carbon_intensity_value(run_id, static_carbon_intensity_value): + # TODO: Refactor DB().query( "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", (TEST_MEASUREMENT_START_TIME, TEST_MEASUREMENT_END_TIME, run_id) @@ -228,6 +230,90 @@ def import_carbon_intensity_value(run_id, static_carbon_intensity_value): from lib.carbon_intensity import store_static_carbon_intensity # pylint: disable=import-outside-toplevel store_static_carbon_intensity(run_id, static_carbon_intensity_value) + +def import_dynamic_carbon_intensity_value(run_id): + """ + Import sample dynamic carbon intensity data for a test run. + Creates multiple measurement values aligned with phase timestamps. + """ + + # TODO: Refactor + # Update the run to have measurement times + DB().query( + "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", + (TEST_MEASUREMENT_START_TIME, TEST_MEASUREMENT_END_TIME, run_id) + ) + + # Create measurement_metric entry for dynamic carbon intensity + metric_name = 'grid_carbon_intensity_dynamic' + detail_name = 'DE' # German location + unit = 'gCO2e/kWh' + sampling_rate_configured = 300000 # 5 minutes in milliseconds + + measurement_metric_id = DB().fetch_one(''' + INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) + VALUES (%s, %s, %s, %s, %s) + RETURNING id + ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + + # Create sample carbon intensity data points aligned with phase timestamps + # Values are in integer format (multiply by 1000 for precision) + values_data = [] + + # Phase timestamps from insert_run function: + phase_timestamps = [ + TEST_MEASUREMENT_START_TIME-8, # [BASELINE] start + TEST_MEASUREMENT_START_TIME-7, # [BASELINE] end + TEST_MEASUREMENT_START_TIME-6, # [INSTALL] start + TEST_MEASUREMENT_START_TIME-5, # [INSTALL] end + TEST_MEASUREMENT_START_TIME-4, # [BOOT] start + TEST_MEASUREMENT_START_TIME-3, # [BOOT] end + TEST_MEASUREMENT_START_TIME-2, # [IDLE] start + TEST_MEASUREMENT_START_TIME-1, # [IDLE] end + TEST_MEASUREMENT_START_TIME, # [RUNTIME]/Only Phase start + TEST_MEASUREMENT_START_TIME + 60000000, # Mid-runtime - 1 minute + TEST_MEASUREMENT_START_TIME + 120000000, # Mid-runtime - 2 minutes + TEST_MEASUREMENT_END_TIME, # [RUNTIME]/Only Phase end + TEST_MEASUREMENT_END_TIME+1, # [REMOVE] start + TEST_MEASUREMENT_END_TIME+2, # [REMOVE] end + ] + + # Sample carbon intensity values that vary over time (realistic grid data) + carbon_intensity_values = [ + 180, # 180.0 gCO2e/kWh (baseline - low demand) + 175, # 175.0 gCO2e/kWh + 220, # 220.0 gCO2e/kWh (install - higher demand) + 230, # 230.0 gCO2e/kWh + 250, # 250.0 gCO2e/kWh (boot - peak demand) + 240, # 240.0 gCO2e/kWh + 190, # 190.0 gCO2e/kWh (idle - lower demand) + 185, # 185.0 gCO2e/kWh + 300, # 300.0 gCO2e/kWh (runtime start - high demand) + 280, # 280.0 gCO2e/kWh (mid-runtime) + 260, # 260.0 gCO2e/kWh (mid-runtime 2) + 240, # 240.0 gCO2e/kWh (runtime end) + 200, # 200.0 gCO2e/kWh (remove start) + 180, # 180.0 gCO2e/kWh (remove end - back to baseline) + ] + + # Create measurement values + for timestamp, value in zip(phase_timestamps, carbon_intensity_values): + values_data.append((measurement_metric_id, value, timestamp)) + + # Bulk insert measurement values using copy_from + if values_data: + csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) + f = StringIO(csv_data) + DB().copy_from( + file=f, + table='measurement_values', + columns=['measurement_metric_id', 'value', 'time'], + sep=',' + ) + f.close() + + print(f"Imported {len(values_data)} dynamic carbon intensity data points for run {run_id}") + def assertion_info(expected, actual): return f"Expected: {expected}, Actual: {actual}" From 40ee6a5eba7a1287b1f05496cfb85ed47f4293c7 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 18:24:02 +0200 Subject: [PATCH 13/27] Refactor carbon calculations in phase stats by moving it to the end --- lib/carbon_intensity.py | 44 +++++++++++----- lib/phase_stats.py | 96 ++++++++++++++++------------------- tests/lib/test_phase_stats.py | 51 ++++++++++++++++--- tests/test_functions.py | 76 +++++++++++++-------------- 4 files changed, 155 insertions(+), 112 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index ceb1d287d..9b772a88a 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -125,23 +125,25 @@ def get_carbon_intensity_data_for_run(run_id): def store_static_carbon_intensity(run_id, static_value): """ - Store static carbon intensity value as a constant time series. + Store static carbon intensity value as a constant time series at multiple timestamps: + - Start and end of measurement run to ensure graph looks good in frontend + - Middle of each phase to enable carbon metrics calculation per phase Args: run_id: UUID of the run static_value: Static carbon intensity value from config (gCO2e/kWh) """ - # Get run start and end times + # Get run phases data and overall start/end times run_query = """ - SELECT start_measurement, end_measurement + SELECT phases, start_measurement, end_measurement FROM runs WHERE id = %s """ run_data = DB().fetch_one(run_query, (run_id,)) - if not run_data or not run_data[0] or not run_data[1]: - raise ValueError(f"Run {run_id} does not have valid start_measurement and end_measurement times") + if not run_data or not run_data[0]: + raise ValueError(f"Run {run_id} does not have phases data") - start_time_us, end_time_us = run_data + phases, start_time_us, end_time_us = run_data # Create measurement_metric entry for static carbon intensity metric_name = 'grid_carbon_intensity_static' @@ -158,14 +160,30 @@ def store_static_carbon_intensity(run_id, static_value): # Convert static value to integer carbon_intensity_value = int(float(static_value)) - # Store as constant time series: same value at start and end times - DB().query( - "INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES (%s, %s, %s), (%s, %s, %s)", - (measurement_metric_id, carbon_intensity_value, start_time_us, - measurement_metric_id, carbon_intensity_value, end_time_us) - ) + # Calculate timestamps: start/end of run + middle of each phase + timestamps = [] + + # Add overall run start and end times + if start_time_us and end_time_us: + timestamps.extend([start_time_us, end_time_us]) + + # Add middle timestamp for each phase + for phase in phases: + middle_timestamp = (phase['start'] + phase['end']) // 2 + timestamps.append(middle_timestamp) + + # Insert static value for all timestamps + values_to_insert = [] + for timestamp in timestamps: + values_to_insert.extend([measurement_metric_id, carbon_intensity_value, timestamp]) + + # Build dynamic query with correct number of placeholders + placeholders = ', '.join(['(%s, %s, %s)'] * len(timestamps)) + query = f"INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES {placeholders}" + + DB().query(query, tuple(values_to_insert)) - print(f"Stored static carbon intensity value {static_value} gCO2e/kWh as constant time series") + print(f"Stored static carbon intensity value {static_value} gCO2e/kWh at {len(timestamps)} timestamps (run start/end + phase middles)") def store_dynamic_carbon_intensity(run_id, location): diff --git a/lib/phase_stats.py b/lib/phase_stats.py index b50ab0169..1acd9d637 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -10,10 +10,6 @@ from lib.db import DB from lib import error_helpers -from lib.carbon_intensity import ( - get_carbon_intensity_data_for_run, - generate_carbon_intensity_timeseries_for_phase, -) def reconstruct_runtime_phase(run_id, runtime_phase_idx): # First we create averages for all types. This includes means and totals @@ -70,16 +66,14 @@ def generate_csv_line(run_id, metric, detail_name, phase_name, value, value_type # else '' resolves to NULL return f"{run_id},{metric},{detail_name},{phase_name},{round(value)},{value_type},{round(max_value) if max_value is not None else ''},{round(min_value) if min_value is not None else ''},{round(sampling_rate_avg) if sampling_rate_avg is not None else ''},{round(sampling_rate_max) if sampling_rate_max is not None else ''},{round(sampling_rate_95p) if sampling_rate_95p is not None else ''},{unit},NOW()\n" + + def build_and_store_phase_stats(run_id, sci=None): if not sci: sci = {} software_carbon_intensity_global = {} - # Load carbon intensity time series data once to use as lookup table during energy metric processing. - # This data is used for interpolation at specific timestamps rather than being aggregated like regular metrics. - # Auto-detects whether dynamic (API) or static (config) carbon intensity data is available. - carbon_intensity_data, carbon_sampling_rate_ms = get_carbon_intensity_data_for_run(run_id) query = """ SELECT id, metric, unit, detail_name, sampling_rate_configured @@ -124,26 +118,8 @@ def build_and_store_phase_stats(run_id, sci=None): cpu_utilization_machine = None network_io_carbon_in_ug = None - # Generate carbon intensity timeseries for this phase and extract representative value - phase_carbon_intensity = None - if carbon_intensity_data: - try: - carbon_timeseries = generate_carbon_intensity_timeseries_for_phase( - phase['start'], phase['end'], carbon_intensity_data, - sampling_rate_ms=carbon_sampling_rate_ms - ) - - # INTERIM: Calculate representative carbon intensity for current energy calculations - # TODO: Replace this simple average with time-weighted energy×carbon integration # pylint: disable=fixme - # Future enhancement: Synchronize energy and carbon timeseries for precise temporal calculation - # instead of: energy_total * carbon_average - # use: ∫(energy(t) * carbon_intensity(t))dt over phase duration - if carbon_timeseries: - total_carbon = sum(point['carbon_intensity'] for point in carbon_timeseries) - phase_carbon_intensity = Decimal(total_carbon / len(carbon_timeseries)) - - except Exception as e: # pylint: disable=broad-except - raise RuntimeError(f"Failed to calculate carbon intensity for phase {phase['name']}: {e}") from e + phase_grid_carbon_intensity = None + phase_energy_metrics = [] select_query = """ WITH lag_table as ( @@ -269,15 +245,16 @@ def build_and_store_phase_stats(run_id, sci=None): power_min = (min_value * 10**3) / (duration / value_count) csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_power_')}", detail_name, f"{idx:03}_{phase['name']}", power_avg, 'MEAN', power_max, power_min, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'mW')) - # Calculate carbon values using pre-calculated phase specific carbon intensity - if phase_carbon_intensity is not None: - value_carbon_ug = (value_sum / 3_600_000) * phase_carbon_intensity - csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, f"{idx:03}_{phase['name']}", value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) - - if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count ... needs refactor ... see comment at beginning of file - software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug - else: - error_helpers.log_error(f'Cannot calculate carbon intensity for energy metric {metric}". No carbon intensity data found in measurement_metrics. Configure either dynamic carbon intensity or static SCI value I.', run_id=run_id) + # Store energy metric data for carbon calculation at the end of the phase loop + phase_energy_metrics.append({ + 'metric': metric, + 'detail_name': detail_name, + 'value_sum': value_sum, + 'phase_name': f"{idx:03}_{phase['name']}", + 'sampling_rate_avg': sampling_rate_avg, + 'sampling_rate_max': sampling_rate_max, + 'sampling_rate_95p': sampling_rate_95p + }) if metric.endswith('_machine'): @@ -289,6 +266,7 @@ def build_and_store_phase_stats(run_id, sci=None): elif "grid_carbon_intensity" in metric: csv_buffer.write(generate_csv_line(run_id, metric, detail_name, f"{idx:03}_{phase['name']}", avg_value, 'MEAN', max_value, min_value, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, unit)) + phase_grid_carbon_intensity = avg_value else: # Default if metric not in ('cpu_time_powermetrics_vm', ): @@ -296,33 +274,26 @@ def build_and_store_phase_stats(run_id, sci=None): csv_buffer.write(generate_csv_line(run_id, metric, detail_name, f"{idx:03}_{phase['name']}", value_sum, 'TOTAL', max_value, min_value, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, unit)) # after going through detail metrics, create cumulated ones + phase_network_bytes_total = 0 + network_io_in_kWh = None if network_bytes_total: - # Check if we can calculate network energy and carbon + phase_network_bytes_total = sum(network_bytes_total) + + # Check if we can calculate network energy has_network_factor = sci.get('N', None) is not None if has_network_factor: # build the network energy by using a formula: https://www.green-coding.io/co2-formulas/ # pylint: disable=invalid-name - network_io_in_kWh = Decimal(sum(network_bytes_total)) / 1_000_000_000 * Decimal(sci['N']) + network_io_in_kWh = Decimal(phase_network_bytes_total) / 1_000_000_000 * Decimal(sci['N']) network_io_in_uJ = network_io_in_kWh * 3_600_000_000_000 csv_buffer.write(generate_csv_line(run_id, 'network_energy_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_in_uJ, 'TOTAL', None, None, None, None, None, 'uJ')) - #power calculations + # power calculations network_io_power_in_mW = (network_io_in_kWh * Decimal('3600000') / Decimal(duration_in_s) * Decimal('1000')) csv_buffer.write(generate_csv_line(run_id, 'network_power_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_power_in_mW, 'TOTAL', None, None, None, None, None, 'mW')) - - # co2 calculations using pre-calculated phase specific carbon intensity - if phase_carbon_intensity is not None: - network_io_carbon_in_ug = network_io_in_kWh * phase_carbon_intensity * 1_000_000 - csv_buffer.write(generate_csv_line(run_id, 'network_carbon_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_carbon_in_ug, 'TOTAL', None, None, None, None, None, 'ug')) - else: - error_helpers.log_error('Cannot calculate network carbon emissions. No carbon intensity data found in measurement_metrics. Configure either dynamic carbon intensity or static SCI value I.', run_id=run_id) - network_io_carbon_in_ug = 0 else: error_helpers.log_error('Cannot calculate the total network energy consumption. SCI value N is missing in the config.', run_id=run_id) - network_io_carbon_in_ug = 0 - else: - network_io_carbon_in_ug = 0 if sci.get('EL', None) is not None and sci.get('TE', None) is not None and sci.get('RS', None) is not None: duration_in_years = duration_in_s / (60 * 60 * 24 * 365) @@ -350,6 +321,28 @@ def build_and_store_phase_stats(run_id, sci=None): csv_buffer.write(generate_csv_line(run_id, 'psu_energy_cgroup_container', detail_name, f"{idx:03}_{phase['name']}", surplus_energy_runtime * splitting_ratio, 'TOTAL', None, None, None, None, None, 'uJ')) csv_buffer.write(generate_csv_line(run_id, 'psu_power_cgroup_container', detail_name, f"{idx:03}_{phase['name']}", surplus_power_runtime * splitting_ratio, 'TOTAL', None, None, None, None, None, 'mW')) + # Calculate carbon emissions for this phase's energy metrics + if phase_grid_carbon_intensity is not None and phase_energy_metrics: + for energy_metric in phase_energy_metrics: + metric = energy_metric['metric'] + detail_name = energy_metric['detail_name'] + value_sum = energy_metric['value_sum'] + phase_full_name = energy_metric['phase_name'] + sampling_rate_avg = energy_metric['sampling_rate_avg'] + sampling_rate_max = energy_metric['sampling_rate_max'] + sampling_rate_95p = energy_metric['sampling_rate_95p'] + + value_carbon_ug = (value_sum / 3_600_000) * phase_grid_carbon_intensity + csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, phase_full_name, value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) + + if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count + software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug + + # Calculate network carbon emissions for this phase + if phase_grid_carbon_intensity is not None and network_io_in_kWh is not None: + network_io_carbon_in_ug = network_io_in_kWh * phase_grid_carbon_intensity * 1_000_000 + csv_buffer.write(generate_csv_line(run_id, 'network_carbon_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_carbon_in_ug, 'TOTAL', None, None, None, None, None, 'ug')) + # TODO: refactor to be a metric provider. Than it can also be per phase # pylint: disable=fixme if software_carbon_intensity_global.get('machine_carbon_ug', None) is not None \ and software_carbon_intensity_global.get('embodied_carbon_share_ug', None) is not None \ @@ -359,6 +352,7 @@ def build_and_store_phase_stats(run_id, sci=None): csv_buffer.write(generate_csv_line(run_id, 'software_carbon_intensity_global', '[SYSTEM]', f"{runtime_phase_idx:03}_[RUNTIME]", (software_carbon_intensity_global['machine_carbon_ug'] + software_carbon_intensity_global['embodied_carbon_share_ug']) / Decimal(sci['R']), 'TOTAL', None, None, None, None, None, f"ugCO2e/{sci['R_d']}")) # TODO End # pylint: disable=fixme + csv_buffer.seek(0) # Reset buffer position to the beginning DB().copy_from( csv_buffer, diff --git a/tests/lib/test_phase_stats.py b/tests/lib/test_phase_stats.py index 614d59bb7..53fb5d7d0 100644 --- a/tests/lib/test_phase_stats.py +++ b/tests/lib/test_phase_stats.py @@ -125,18 +125,18 @@ def test_phase_stats_multi(): assert data[5]['sampling_rate_max'] == 100688, 'MAX sampling rate not in expected range' assert data[5]['sampling_rate_95p'] == 99696, '95p sampling rate not in expected range' -def test_phase_embodied_and_operational_carbon(): +def test_phase_embodied_and_operational_carbon_using_static_intensity(): run_id = Tests.insert_run() Tests.import_machine_energy(run_id) sci = {"I":436,"R":0,"EL":4,"RS":1,"TE":181000,"R_d":"page request"} - Tests.import_static_carbon_intensity_value(run_id, sci['I']) + Tests.import_carbon_intensity_metrics(run_id, sci['I']) build_and_store_phase_stats(run_id, sci=sci) data = DB().fetch_all('SELECT metric, detail_name, unit, value, type, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, phase FROM phase_stats WHERE phase = %s ', params=('004_[RUNTIME]', ), fetch_mode='dict') - assert len(data) == 5 + assert len(data) == 6 psu_energy_ac_mcp_machine = data[3] assert psu_energy_ac_mcp_machine['metric'] == 'psu_energy_ac_mcp_machine' @@ -164,6 +164,45 @@ def test_phase_embodied_and_operational_carbon(): assert embodied_carbon_share_machine['sampling_rate_max'] is None, 'MAX sampling rate not in expected range' assert embodied_carbon_share_machine['sampling_rate_95p'] is None, '95p sampling rate not in expected range' +def test_phase_embodied_and_operational_carbon_using_dynamic_intensity(): + run_id = Tests.insert_run() + Tests.import_machine_energy(run_id) + + sci = {"R":0,"EL":4,"RS":1,"TE":181000,"R_d":"page request"} # 'I' was removed, because it is not relevant here using dynamic values + grid_carbon_intensity = Tests.import_carbon_intensity_metrics(run_id) + + build_and_store_phase_stats(run_id, sci=sci) + + data = DB().fetch_all('SELECT metric, detail_name, unit, value, type, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, phase FROM phase_stats WHERE phase = %s ', params=('004_[RUNTIME]', ), fetch_mode='dict') + + assert len(data) == 6 + psu_energy_ac_mcp_machine = data[3] + assert psu_energy_ac_mcp_machine['metric'] == 'psu_energy_ac_mcp_machine' + + psu_carbon_ac_mcp_machine = data[2] + + assert psu_carbon_ac_mcp_machine['metric'] == 'psu_carbon_ac_mcp_machine' + assert psu_carbon_ac_mcp_machine['detail_name'] == '[MACHINE]' + assert psu_carbon_ac_mcp_machine['unit'] == 'ug' + + operational_carbon_expected = int(psu_energy_ac_mcp_machine['value'] * MICROJOULES_TO_KWH * grid_carbon_intensity * 1_000_000) + assert psu_carbon_ac_mcp_machine['value'] == operational_carbon_expected + assert psu_carbon_ac_mcp_machine['type'] == 'TOTAL' + + phase_time_in_years = Tests.TEST_MEASUREMENT_DURATION_S / (60 * 60 * 24 * 365) + embodied_carbon_expected = int((phase_time_in_years / sci['EL']) * sci['TE'] * sci['RS'] * 1_000_000) + + embodied_carbon_share_machine = data[0] + assert embodied_carbon_share_machine['metric'] == 'embodied_carbon_share_machine' + assert embodied_carbon_share_machine['detail_name'] == '[SYSTEM]' + assert embodied_carbon_share_machine['unit'] == 'ug' + assert embodied_carbon_share_machine['value'] == embodied_carbon_expected + assert embodied_carbon_share_machine['type'] == 'TOTAL' + + assert embodied_carbon_share_machine['sampling_rate_avg'] is None, 'AVG sampling rate not in expected range' + assert embodied_carbon_share_machine['sampling_rate_max'] is None, 'MAX sampling rate not in expected range' + assert embodied_carbon_share_machine['sampling_rate_95p'] is None, '95p sampling rate not in expected range' + def test_phase_stats_energy_one_measurement(): run_id = Tests.insert_run() Tests.import_single_cpu_energy_measurement(run_id) @@ -290,7 +329,7 @@ def test_phase_stats_network_data(): 'N': 0.001, # Network energy intensity (kWh/GB) 'I': 500, # Carbon intensity (gCO2e/kWh) } - Tests.import_static_carbon_intensity_value(run_id, test_sci_config['I']) + Tests.import_carbon_intensity_metrics(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) @@ -338,7 +377,7 @@ def test_phase_stats_network_data(): def test_phase_stats_dynamic_grid_carbon_intensity(): run_id = Tests.insert_run() - Tests.import_dynamic_carbon_intensity_value(run_id) + Tests.import_carbon_intensity_metrics(run_id) build_and_store_phase_stats(run_id) @@ -369,7 +408,7 @@ def test_sci_calculation(): 'R': 10, # Functional unit count (10 runs) 'R_d': 'test runs' # Functional unit description } - Tests.import_static_carbon_intensity_value(run_id, test_sci_config['I']) + Tests.import_carbon_intensity_metrics(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) diff --git a/tests/test_functions.py b/tests/test_functions.py index 08c32f784..dce662f30 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -221,32 +221,20 @@ def import_demo_data_ee(): raise RuntimeError('Import of Demo data into DB failed', ps.stderr) -def import_static_carbon_intensity_value(run_id, static_carbon_intensity_value): - # TODO: Refactor - DB().query( - "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", - (TEST_MEASUREMENT_START_TIME, TEST_MEASUREMENT_END_TIME, run_id) - ) - from lib.carbon_intensity import store_static_carbon_intensity # pylint: disable=import-outside-toplevel - store_static_carbon_intensity(run_id, static_carbon_intensity_value) - - -def import_dynamic_carbon_intensity_value(run_id): +def import_carbon_intensity_metrics(run_id, static_value=None): """ - Import sample dynamic carbon intensity data for a test run. + Import sample carbon intensity data for a test run. Creates multiple measurement values aligned with phase timestamps. - """ + If no static_value is given, dynamic values are inserted. - # TODO: Refactor - # Update the run to have measurement times - DB().query( - "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", - (TEST_MEASUREMENT_START_TIME, TEST_MEASUREMENT_END_TIME, run_id) - ) - - # Create measurement_metric entry for dynamic carbon intensity + Args: + run_id: The ID of the test run + static_value: Optional static carbon intensity value (gCO2e/kWh). + If provided, this value will be used for all timestamps + instead of the dynamic values. + """ metric_name = 'grid_carbon_intensity_dynamic' - detail_name = 'DE' # German location + detail_name = 'DE' unit = 'gCO2e/kWh' sampling_rate_configured = 300000 # 5 minutes in milliseconds @@ -256,8 +244,6 @@ def import_dynamic_carbon_intensity_value(run_id): RETURNING id ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] - # Create sample carbon intensity data points aligned with phase timestamps - # Values are in integer format (multiply by 1000 for precision) values_data = [] # Phase timestamps from insert_run function: @@ -278,25 +264,29 @@ def import_dynamic_carbon_intensity_value(run_id): TEST_MEASUREMENT_END_TIME+2, # [REMOVE] end ] - # Sample carbon intensity values that vary over time (realistic grid data) - carbon_intensity_values = [ - 180, # 180.0 gCO2e/kWh (baseline - low demand) - 175, # 175.0 gCO2e/kWh - 220, # 220.0 gCO2e/kWh (install - higher demand) - 230, # 230.0 gCO2e/kWh - 250, # 250.0 gCO2e/kWh (boot - peak demand) - 240, # 240.0 gCO2e/kWh - 190, # 190.0 gCO2e/kWh (idle - lower demand) - 185, # 185.0 gCO2e/kWh - 300, # 300.0 gCO2e/kWh (runtime start - high demand) - 280, # 280.0 gCO2e/kWh (mid-runtime) - 260, # 260.0 gCO2e/kWh (mid-runtime 2) - 240, # 240.0 gCO2e/kWh (runtime end) - 200, # 200.0 gCO2e/kWh (remove start) - 180, # 180.0 gCO2e/kWh (remove end - back to baseline) - ] + # Use static value if provided, otherwise use dynamic values + if static_value is not None: + carbon_intensity_values = [static_value] * len(phase_timestamps) + mean_runtime_value = static_value + else: + carbon_intensity_values = [ + 180, # 180.0 gCO2e/kWh (baseline - low demand) + 175, # 175.0 gCO2e/kWh + 220, # 220.0 gCO2e/kWh (install - higher demand) + 230, # 230.0 gCO2e/kWh + 250, # 250.0 gCO2e/kWh (boot - peak demand) + 240, # 240.0 gCO2e/kWh + 190, # 190.0 gCO2e/kWh (idle - lower demand) + 185, # 185.0 gCO2e/kWh + 300, # 300.0 gCO2e/kWh (runtime start - high demand) + 280, # 280.0 gCO2e/kWh (mid-runtime) + 260, # 260.0 gCO2e/kWh (mid-runtime 2) + 240, # 240.0 gCO2e/kWh (runtime end) + 200, # 200.0 gCO2e/kWh (remove start) + 180, # 180.0 gCO2e/kWh (remove end - back to baseline) + ] + mean_runtime_value = (carbon_intensity_values[9] + carbon_intensity_values[10]) / 2 - # Create measurement values for timestamp, value in zip(phase_timestamps, carbon_intensity_values): values_data.append((measurement_metric_id, value, timestamp)) @@ -314,6 +304,8 @@ def import_dynamic_carbon_intensity_value(run_id): print(f"Imported {len(values_data)} dynamic carbon intensity data points for run {run_id}") + return mean_runtime_value + def assertion_info(expected, actual): return f"Expected: {expected}, Actual: {actual}" From 8c834da5f73f057c8c3f46473c64b42d46843c89 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 18:40:02 +0200 Subject: [PATCH 14/27] Fix tests --- lib/carbon_intensity.py | 10 +++++++--- tests/lib/test_carbon_intensity.py | 25 ++++++++++++++++--------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 9b772a88a..83f54fae1 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -161,16 +161,20 @@ def store_static_carbon_intensity(run_id, static_value): carbon_intensity_value = int(float(static_value)) # Calculate timestamps: start/end of run + middle of each phase - timestamps = [] + timestamps = set() # Add overall run start and end times if start_time_us and end_time_us: - timestamps.extend([start_time_us, end_time_us]) + timestamps.add(start_time_us) + timestamps.add(end_time_us) # Add middle timestamp for each phase for phase in phases: middle_timestamp = (phase['start'] + phase['end']) // 2 - timestamps.append(middle_timestamp) + timestamps.add(middle_timestamp) + + # Convert back to list for iteration + timestamps = list(timestamps) # Insert static value for all timestamps values_to_insert = [] diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 4e2d49690..51129b2d3 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -254,7 +254,7 @@ def test_store_carbon_intensity_static_value(self, run_with_measurement_times): assert metric_result[1] == '[CONFIG]' assert metric_result[2] == 'gCO2e/kWh' - # Verify that static value was stored (should have 2 data points: start and end) + # Verify that static value was stored (should have up to 7 data points: start/end of run + middle of 5 phases, deduplicated) values_result = DB().fetch_all( """SELECT mv.value FROM measurement_values mv @@ -263,16 +263,23 @@ def test_store_carbon_intensity_static_value(self, run_with_measurement_times): (run_id,) ) - assert len(values_result) == 2 - # Both values should be the same static value (multiplied by 1000) - assert values_result[0][0] == 250500 # 250.5 * 1000 - assert values_result[1][0] == 250500 # 250.5 * 1000 + run_query = """ + SELECT phases, start_measurement, end_measurement + FROM runs + WHERE id = %s + """ + run_data = DB().fetch_one(run_query, (run_id,)) + print(run_data) + + assert len(values_result) == 8 # 5 phases + 1 flow + start of run + end of run + # All values should be the same static value + for result in values_result: + assert result[0] == 250 # 250.5 is converted to integer def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_times): # Test that dynamic grid carbon intensity is stored when enabled in measurement config run_id = run_with_measurement_times - # Mock the carbon intensity API call # Use timestamps that align with the measurement timeframe (2024-12-24T13:33:10Z to 2024-12-24T13:41:00Z) with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: @@ -311,12 +318,12 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_ # Should have 4 data points: start boundary + 2 intermediate points + end boundary assert len(values_result) == 4 - # Values should be stored as integers (multiplied by 1000) + # Values should be stored as integers # First point: interpolated start boundary (between 185.0 and 190.0) # Second point: 190.0 (intermediate point at 13:35:00) - assert values_result[1][0] == 190000 # 190.0 * 1000 + assert values_result[1][0] == 190 # Third point: 188.0 (intermediate point at 13:38:00) - assert values_result[2][0] == 188000 # 188.0 * 1000 + assert values_result[2][0] == 188 # Fourth point: interpolated end boundary (between 188.0 and 183.0) def test_store_carbon_intensity_dynamic_single_data_point(self, run_with_measurement_times): From 1ae96cb5299a0fc97fe068351b9480c64d0f3ab1 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 18:40:43 +0200 Subject: [PATCH 15/27] Remove carbon intensity methods not needed anymore --- lib/carbon_intensity.py | 116 ----------------------------- tests/lib/test_carbon_intensity.py | 88 +--------------------- 2 files changed, 1 insertion(+), 203 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 83f54fae1..a25dce334 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -66,63 +66,6 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: return data -def get_carbon_intensity_data_for_run(run_id): - """ - Get carbon intensity data for a run, automatically detecting dynamic vs static. - - Args: - run_id: UUID of the run - - Returns: - Tuple of (carbon_data, sampling_rate_ms) where: - - carbon_data: List of carbon intensity data points or None if no data found - - sampling_rate_ms: Sampling rate in milliseconds - """ - # Auto-detect what carbon intensity data is available for this run - # Check for both static and dynamic carbon intensity - query = """ - SELECT metric, detail_name - FROM measurement_metrics - WHERE run_id = %s AND metric IN ('grid_carbon_intensity_static', 'grid_carbon_intensity_dynamic') - LIMIT 1 - """ - grid_carbon_intensity_metrics = DB().fetch_one(query, (run_id,)) - - if not grid_carbon_intensity_metrics: - return None, None - - metric_name, detail_name = grid_carbon_intensity_metrics - - query = """ - SELECT mv.time, mv.value, mm.sampling_rate_configured - FROM measurement_values mv - JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s - AND mm.metric = %s - AND mm.detail_name = %s - ORDER BY mv.time ASC - """ - carbon_intensity_values = DB().fetch_all(query, (run_id, metric_name, detail_name)) - - if not carbon_intensity_values: - return None, None - - # Extract sampling rate from first row (all rows have the same sampling_rate_configured) - sampling_rate_ms = carbon_intensity_values[0][2] if carbon_intensity_values else 300000 - - # Convert from database format to carbon data format (keep timestamps as microseconds) - carbon_data = [ - { - 'timestamp_us': timestamp_us, - 'carbon_intensity': float(value_int), - 'location': detail_name - } - for timestamp_us, value_int, _ in carbon_intensity_values # Unpack the third element (sampling_rate_configured) - ] - - return carbon_data, sampling_rate_ms - - def store_static_carbon_intensity(run_id, static_value): """ Store static carbon intensity value as a constant time series at multiple timestamps: @@ -299,65 +242,6 @@ def store_dynamic_carbon_intensity(run_id, location): print(f"Stored dynamic carbon intensity for location {location}: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") -def generate_carbon_intensity_timeseries_for_phase( - phase_start_us: int, - phase_end_us: int, - carbon_data: List[Dict[str, Any]], - sampling_rate_ms: int = 300000 -) -> List[Dict[str, Any]]: - """ - Generate carbon intensity timeseries for a specific phase timeframe. - - This function generates carbon intensity values at regular intervals throughout a phase, - which are stored as measurement metrics and used for calculating representative carbon - intensity values for energy calculations. - - Args: - phase_start_us: Phase start timestamp in microseconds - phase_end_us: Phase end timestamp in microseconds - carbon_data: List of carbon intensity data points from service - sampling_rate_ms: Sampling rate in milliseconds for timeseries generation (default: 300000 = 5 minutes) - - Returns: - List of carbon intensity timeseries points: - [{"timestamp_us": 1727003400000000, "carbon_intensity": 185.0}, ...] - - Raises: - ValueError: If carbon_data is empty or phase timeframe is invalid - """ - if not carbon_data: - raise ValueError("No carbon intensity data available for timeseries generation") - - if phase_start_us >= phase_end_us: - raise ValueError("Invalid phase timeframe: start must be before end") - - # Convert sampling rate to microseconds - sampling_rate_us = sampling_rate_ms * 1000 - - # Generate timestamps at regular intervals throughout the phase - timeseries = [] - current_timestamp_us = phase_start_us - - while current_timestamp_us <= phase_end_us: - carbon_intensity = _get_carbon_intensity_at_timestamp(current_timestamp_us, carbon_data) - timeseries.append({ - "timestamp_us": current_timestamp_us, - "carbon_intensity": carbon_intensity - }) - current_timestamp_us += sampling_rate_us - - # Always include the phase end timestamp if it wasn't already included - if timeseries and timeseries[-1]["timestamp_us"] != phase_end_us: - carbon_intensity = _get_carbon_intensity_at_timestamp(phase_end_us, carbon_data) - timeseries.append({ - "timestamp_us": phase_end_us, - "carbon_intensity": carbon_intensity - }) - - return timeseries - - - def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: """ Get carbon intensity value for a specific timestamp using nearest data point. diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 51129b2d3..6764aeb35 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -15,10 +15,8 @@ _microseconds_to_iso8601, _calculate_sampling_rate_from_data, _get_carbon_intensity_at_timestamp, - get_carbon_intensity_data_for_run, store_static_carbon_intensity, - store_dynamic_carbon_intensity, - generate_carbon_intensity_timeseries_for_phase, + store_dynamic_carbon_intensity ) class TestCarbonIntensityClient: @@ -185,41 +183,6 @@ def test_carbon_intensity_client_invalid_response(self, mock_get): with pytest.raises(ValueError, match="Expected list response from carbon intensity service"): client.get_carbon_intensity_history("DE", "2024-09-22T10:50:00Z", "2024-09-22T10:55:00Z") - -class TestGetCarbonIntensityDataForRun: - - def test_no_carbon_intensity_data(self): - # Test with run that has no carbon intensity data - run_id = Tests.insert_run() - carbon_data, sampling_rate_ms = get_carbon_intensity_data_for_run(run_id) - assert carbon_data is None - assert sampling_rate_ms is None - - def test_with_dynamic_carbon_intensity_data(self): - # Test with run that has dynamic carbon intensity data stored - run_id = Tests.insert_run() - - # Insert mock carbon intensity metadata into measurement_metrics - # This simulates data that would be stored during a run with dynamic carbon intensity - metric_id = DB().fetch_one( - "INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) VALUES (%s, %s, %s, %s, %s) RETURNING id", - (run_id, 'grid_carbon_intensity_dynamic', 'DE', 'gCO2e/kWh', 1000) - )[0] - - # Insert actual carbon intensity values - DB().query( - "INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES (%s, %s, %s)", - (metric_id, 185, 1727003400000000) - ) - - carbon_data, sampling_rate_ms = get_carbon_intensity_data_for_run(run_id) - - # Should return the stored carbon intensity data - assert carbon_data is not None - assert len(carbon_data) > 0 - assert sampling_rate_ms == 1000 # Should match the sampling rate we inserted - - class TestStoreCarbonIntensityAsMetrics: @pytest.fixture @@ -409,52 +372,3 @@ def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurem # Call the function with None location - should raise an exception or fail gracefully with pytest.raises(Exception): # The method should fail when location is None store_dynamic_carbon_intensity(run_id, None) - - -class TestCarbonIntensityTimeseries: - - def test_generate_carbon_intensity_timeseries_for_phase(self): - """Test generating carbon intensity timeseries for a phase""" - # Sample carbon intensity data with microsecond timestamps - carbon_data = [ - {"timestamp_us": int(datetime(2025, 9, 22, 10, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 185.0, "location": "DE"}, - {"timestamp_us": int(datetime(2025, 9, 22, 11, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 190.0, "location": "DE"}, - {"timestamp_us": int(datetime(2025, 9, 22, 12, 0, 0, tzinfo=timezone.utc).timestamp() * 1_000_000), "carbon_intensity": 183.0, "location": "DE"} - ] - - # Phase timeframe: 10:30 to 11:30 (90 minutes) in UTC - phase_start_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) - phase_end_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) - - # Generate timeseries with 30-minute intervals - result = generate_carbon_intensity_timeseries_for_phase( - phase_start_us, phase_end_us, carbon_data, sampling_rate_ms=30*60*1000 - ) - - # Should generate points at 10:30, 11:00, 11:30 - assert len(result) == 3 - assert result[0]["timestamp_us"] == phase_start_us - assert result[-1]["timestamp_us"] == phase_end_us - - # Values should be interpolated appropriately - assert 185.0 <= result[0]["carbon_intensity"] <= 190.0 # Interpolated between 185 and 190 - assert result[1]["carbon_intensity"] == 190.0 # Exact match at 11:00 - assert 183.0 <= result[2]["carbon_intensity"] <= 190.0 # Interpolated between 190 and 183 - - def test_get_carbon_intensity_timeseries_empty_data(self): - """Test error handling with empty carbon data""" - phase_start_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) - phase_end_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) - - with pytest.raises(ValueError, match="No carbon intensity data available"): - generate_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, []) - - def test_get_carbon_intensity_timeseries_invalid_timeframe(self): - """Test error handling with invalid phase timeframe""" - carbon_data = [{"time": "2025-09-22T10:00:00Z", "carbon_intensity": 185.0, "location": "DE"}] - - phase_start_us = int(datetime(2025, 9, 22, 11, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) - phase_end_us = int(datetime(2025, 9, 22, 10, 30, 0, tzinfo=timezone.utc).timestamp() * 1_000_000) # End before start - - with pytest.raises(ValueError, match="Invalid phase timeframe"): - generate_carbon_intensity_timeseries_for_phase(phase_start_us, phase_end_us, carbon_data) From a95c66296071f55ea305ecab108dcd1298d4c69c Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 18:58:16 +0200 Subject: [PATCH 16/27] Fix storage of dynamic carbon intensity data at all relevant timestamps --- lib/carbon_intensity.py | 58 +++++++++++++++--------------- tests/lib/test_carbon_intensity.py | 29 ++++++++------- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index a25dce334..4b5ad7d38 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -135,23 +135,24 @@ def store_static_carbon_intensity(run_id, static_value): def store_dynamic_carbon_intensity(run_id, location): """ - Store dynamic carbon intensity data from API as time series. + Store dynamic carbon intensity data from API as time series, ensuring coverage per phase. + Uses nearest data point logic for timestamps where API data may be sparse. Args: run_id: UUID of the run location: Location code (e.g., "DE", "ES-IB-MA") """ - # Get run start and end times + # Get run phases data and overall start/end times run_query = """ - SELECT start_measurement, end_measurement + SELECT phases, start_measurement, end_measurement FROM runs WHERE id = %s """ run_data = DB().fetch_one(run_query, (run_id,)) - if not run_data or not run_data[0] or not run_data[1]: - raise ValueError(f"Run {run_id} does not have valid start_measurement and end_measurement times") + if not run_data or not run_data[0] or not run_data[1] or not run_data[2]: + raise ValueError(f"Run {run_id} does not have valid phases and measurement times") - start_time_us, end_time_us = run_data + phases, start_time_us, end_time_us = run_data start_time_iso = _microseconds_to_iso8601(start_time_us) end_time_iso = _microseconds_to_iso8601(end_time_us) @@ -160,13 +161,16 @@ def store_dynamic_carbon_intensity(run_id, location): carbon_intensity_data = carbon_client.get_carbon_intensity_history( location, start_time_iso, end_time_iso ) - if not carbon_intensity_data: raise ValueError( f"No carbon intensity data received from service for location '{location}' " f"between {start_time_iso} and {end_time_iso}. The service returned an empty dataset." ) + values = [float(dp['carbon_intensity']) for dp in carbon_intensity_data] + print(f"Retrieved {len(carbon_intensity_data)} API data points for {location}: " + f"range {min(values):.1f}-{max(values):.1f} gCO2e/kWh") + # Create measurement_metric entry for dynamic carbon intensity metric_name = 'grid_carbon_intensity_dynamic' detail_name = location @@ -197,34 +201,29 @@ def store_dynamic_carbon_intensity(run_id, location): carbon_data_for_lookup.sort(key=lambda x: x['timestamp_us']) # Prepare measurement values for bulk insert - values_data = [] + timestamps = set() # Always ensure we have data points at measurement start and end times - # Get carbon intensity at measurement start time - start_carbon_intensity = _get_carbon_intensity_at_timestamp(start_time_us, carbon_data_for_lookup) - start_carbon_intensity_value = int(start_carbon_intensity) - values_data.append((measurement_metric_id, start_carbon_intensity_value, start_time_us)) + timestamps.add(start_time_us) + timestamps.add(end_time_us) - # Get carbon intensity at measurement end time - end_carbon_intensity = _get_carbon_intensity_at_timestamp(end_time_us, carbon_data_for_lookup) - end_carbon_intensity_value = int(end_carbon_intensity) + # Add middle timestamp for each phase to ensure coverage + for phase in phases: + middle_timestamp = (phase['start'] + phase['end']) // 2 + timestamps.add(middle_timestamp) - # Add intermediate data points that fall within measurement timeframe - intermediate_points = [] + # Add any intermediate API data points that fall within measurement timeframe for data_point in carbon_data_for_lookup: timestamp_us = data_point['timestamp_us'] - # Only include points strictly within the timeframe (not at boundaries) - if start_time_us < timestamp_us < end_time_us: - carbon_intensity_value = int(float(data_point['carbon_intensity'])) - intermediate_points.append((measurement_metric_id, carbon_intensity_value, timestamp_us)) + if start_time_us <= timestamp_us <= end_time_us: + timestamps.add(timestamp_us) - # Sort intermediate points by time and add them - intermediate_points.sort(key=lambda x: x[2]) # Sort by timestamp - values_data.extend(intermediate_points) - - # Add end time point (ensure it's different from start time) - if start_time_us != end_time_us: - values_data.append((measurement_metric_id, end_carbon_intensity_value, end_time_us)) + # Convert timestamps to values using nearest data point logic + values_data = [] + for timestamp in timestamps: + carbon_intensity = _get_carbon_intensity_at_timestamp(timestamp, carbon_data_for_lookup) + carbon_intensity_value = int(carbon_intensity) + values_data.append((measurement_metric_id, carbon_intensity_value, timestamp)) if values_data: @@ -239,7 +238,8 @@ def store_dynamic_carbon_intensity(run_id, location): ) f.close() - print(f"Stored dynamic carbon intensity for location {location}: start={start_carbon_intensity} gCO2e/kWh, end={end_carbon_intensity} gCO2e/kWh, {len(intermediate_points)} intermediate points") + unique_values = len(set(row[1] for row in values_data)) + print(f"Stored dynamic carbon intensity for location {location}: {len(values_data)} timestamps, {unique_values} unique values") def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 6764aeb35..9bab84a60 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -279,15 +279,12 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_ (run_id,) ) - # Should have 4 data points: start boundary + 2 intermediate points + end boundary - assert len(values_result) == 4 - # Values should be stored as integers - # First point: interpolated start boundary (between 185.0 and 190.0) - # Second point: 190.0 (intermediate point at 13:35:00) - assert values_result[1][0] == 190 - # Third point: 188.0 (intermediate point at 13:38:00) - assert values_result[2][0] == 188 - # Fourth point: interpolated end boundary (between 188.0 and 183.0) + # Should have at least 7 data points: start/end of run + middle of 5 phases + API data points + # Actual count may vary due to deduplication of timestamps + assert len(values_result) >= 7 + # All values should be integers (nearest data point logic applied) + for value, _ in values_result: + assert isinstance(value, int) def test_store_carbon_intensity_dynamic_single_data_point(self, run_with_measurement_times): run_id = run_with_measurement_times @@ -324,7 +321,12 @@ def test_store_carbon_intensity_dynamic_single_data_point(self, run_with_measure (run_id,) ) - assert len(values_result) >= 2, "Dynamic carbon intensity requires at least 2 data points" + # Should have at least 7 data points: start/end of run + middle of 5 phases + API data point + # All using nearest data point (single API point in this case) + assert len(values_result) >= 7 + # All values should be the same (185) since only one API data point + for value, _ in values_result: + assert value == 185 def test_store_carbon_intensity_dynamic_data_outside_timeframe(self, run_with_measurement_times): # Test that dynamic carbon intensity properly handles data outside measurement timeframe using extrapolation @@ -361,9 +363,10 @@ def test_store_carbon_intensity_dynamic_data_outside_timeframe(self, run_with_me (run_id,) ) - # Should have exactly 2 data points (start and end boundaries) since no intermediate points in timeframe - assert len(values_result) == 2 - # Both values should be extrapolated from the trend in the API data (210 is higher than 200) + # Should have at least 7 data points: start/end of run + middle of 5 phases + # All using nearest data point logic with API data outside timeframe + assert len(values_result) >= 7 + # Values should be extrapolated using nearest data point logic def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurement_times): # Test error handling when dynamic method is called with None location From 297645509a82d579bf2a94283a464a932cef5ffd Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 19:38:32 +0200 Subject: [PATCH 17/27] Remove code duplications --- lib/carbon_intensity.py | 185 +++++++++++++++++++++++++--------------- 1 file changed, 118 insertions(+), 67 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 4b5ad7d38..187ca5259 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -66,17 +66,19 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: return data -def store_static_carbon_intensity(run_id, static_value): +def _get_run_data_and_phases(run_id): """ - Store static carbon intensity value as a constant time series at multiple timestamps: - - Start and end of measurement run to ensure graph looks good in frontend - - Middle of each phase to enable carbon metrics calculation per phase + Fetch run data including phases and measurement times. Args: run_id: UUID of the run - static_value: Static carbon intensity value from config (gCO2e/kWh) + + Returns: + tuple: (phases, start_time_us, end_time_us) + + Raises: + ValueError: If run data is invalid or missing """ - # Get run phases data and overall start/end times run_query = """ SELECT phases, start_measurement, end_measurement FROM runs @@ -87,23 +89,42 @@ def store_static_carbon_intensity(run_id, static_value): raise ValueError(f"Run {run_id} does not have phases data") phases, start_time_us, end_time_us = run_data + return phases, start_time_us, end_time_us - # Create measurement_metric entry for static carbon intensity - metric_name = 'grid_carbon_intensity_static' - detail_name = '[CONFIG]' - unit = 'gCO2e/kWh' - sampling_rate_configured = 0 # Static value has no sampling rate - measurement_metric_id = DB().fetch_one(''' +def _create_measurement_metric(run_id, metric_name, detail_name, unit, sampling_rate_configured): + """ + Create a measurement metric entry in the database. + + Args: + run_id: UUID of the run + metric_name: Name of the metric + detail_name: Detail/source name for the metric + unit: Unit of measurement + sampling_rate_configured: Configured sampling rate + + Returns: + int: measurement_metric_id + """ + return DB().fetch_one(''' INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) VALUES (%s, %s, %s, %s, %s) RETURNING id ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] - # Convert static value to integer - carbon_intensity_value = int(float(static_value)) - # Calculate timestamps: start/end of run + middle of each phase +def _get_base_timestamps(phases, start_time_us, end_time_us): + """ + Get base timestamps: run start/end + phase middles. + + Args: + phases: List of phase dictionaries + start_time_us: Run start time in microseconds + end_time_us: Run end time in microseconds + + Returns: + set: Set of timestamps + """ timestamps = set() # Add overall run start and end times @@ -116,19 +137,78 @@ def store_static_carbon_intensity(run_id, static_value): middle_timestamp = (phase['start'] + phase['end']) // 2 timestamps.add(middle_timestamp) - # Convert back to list for iteration - timestamps = list(timestamps) + return timestamps - # Insert static value for all timestamps - values_to_insert = [] - for timestamp in timestamps: - values_to_insert.extend([measurement_metric_id, carbon_intensity_value, timestamp]) - # Build dynamic query with correct number of placeholders - placeholders = ', '.join(['(%s, %s, %s)'] * len(timestamps)) - query = f"INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES {placeholders}" +def _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs): + """ + Efficiently insert measurement values using the most appropriate method. + + Args: + measurement_metric_id: ID of the measurement metric + value_timestamp_pairs: List of (value, timestamp) tuples + """ + if not value_timestamp_pairs: + return + + # For small datasets, use regular INSERT with multiple VALUES + if len(value_timestamp_pairs) <= 10: + values_to_insert = [] + for value, timestamp in value_timestamp_pairs: + values_to_insert.extend([measurement_metric_id, int(value), timestamp]) + + placeholders = ', '.join(['(%s, %s, %s)'] * len(value_timestamp_pairs)) + query = f"INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES {placeholders}" + DB().query(query, tuple(values_to_insert)) + else: + # For larger datasets, use COPY FROM for better performance + values_data = [(measurement_metric_id, int(value), timestamp) + for value, timestamp in value_timestamp_pairs] + csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) + f = StringIO(csv_data) + DB().copy_from( + file=f, + table='measurement_values', + columns=['measurement_metric_id', 'value', 'time'], + sep=',' + ) + f.close() + + +def store_static_carbon_intensity(run_id, static_value): + """ + Store static carbon intensity value as a constant time series at multiple timestamps: + - Start and end of measurement run to ensure graph looks good in frontend + - Middle of each phase to enable carbon metrics calculation per phase + + Args: + run_id: UUID of the run + static_value: Static carbon intensity value from config (gCO2e/kWh) + """ + # Get run phases data and overall start/end times + phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) + + # Create measurement_metric entry for static carbon intensity + metric_name = 'grid_carbon_intensity_static' + detail_name = '[CONFIG]' + unit = 'gCO2e/kWh' + sampling_rate_configured = 0 # Static value has no sampling rate + + measurement_metric_id = _create_measurement_metric( + run_id, metric_name, detail_name, unit, sampling_rate_configured + ) + + # Convert static value to integer + carbon_intensity_value = int(float(static_value)) + + # Calculate timestamps: start/end of run + middle of each phase + timestamps = _get_base_timestamps(phases, start_time_us, end_time_us) - DB().query(query, tuple(values_to_insert)) + # Prepare value-timestamp pairs for bulk insert + value_timestamp_pairs = [(carbon_intensity_value, timestamp) for timestamp in timestamps] + + # Insert static value for all timestamps + _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs) print(f"Stored static carbon intensity value {static_value} gCO2e/kWh at {len(timestamps)} timestamps (run start/end + phase middles)") @@ -143,16 +223,9 @@ def store_dynamic_carbon_intensity(run_id, location): location: Location code (e.g., "DE", "ES-IB-MA") """ # Get run phases data and overall start/end times - run_query = """ - SELECT phases, start_measurement, end_measurement - FROM runs - WHERE id = %s - """ - run_data = DB().fetch_one(run_query, (run_id,)) - if not run_data or not run_data[0] or not run_data[1] or not run_data[2]: - raise ValueError(f"Run {run_id} does not have valid phases and measurement times") - - phases, start_time_us, end_time_us = run_data + phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) + if not start_time_us or not end_time_us: + raise ValueError(f"Run {run_id} does not have valid measurement times") start_time_iso = _microseconds_to_iso8601(start_time_us) end_time_iso = _microseconds_to_iso8601(end_time_us) @@ -178,11 +251,9 @@ def store_dynamic_carbon_intensity(run_id, location): # Calculate sampling rate based on actual data intervals from API format sampling_rate_configured = _calculate_sampling_rate_from_data(carbon_intensity_data) - measurement_metric_id = DB().fetch_one(''' - INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) - VALUES (%s, %s, %s, %s, %s) - RETURNING id - ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + measurement_metric_id = _create_measurement_metric( + run_id, metric_name, detail_name, unit, sampling_rate_configured + ) # Convert API data to format expected by _get_carbon_intensity_at_timestamp carbon_data_for_lookup = [] @@ -201,16 +272,7 @@ def store_dynamic_carbon_intensity(run_id, location): carbon_data_for_lookup.sort(key=lambda x: x['timestamp_us']) # Prepare measurement values for bulk insert - timestamps = set() - - # Always ensure we have data points at measurement start and end times - timestamps.add(start_time_us) - timestamps.add(end_time_us) - - # Add middle timestamp for each phase to ensure coverage - for phase in phases: - middle_timestamp = (phase['start'] + phase['end']) // 2 - timestamps.add(middle_timestamp) + timestamps = _get_base_timestamps(phases, start_time_us, end_time_us) # Add any intermediate API data points that fall within measurement timeframe for data_point in carbon_data_for_lookup: @@ -219,27 +281,16 @@ def store_dynamic_carbon_intensity(run_id, location): timestamps.add(timestamp_us) # Convert timestamps to values using nearest data point logic - values_data = [] + value_timestamp_pairs = [] for timestamp in timestamps: carbon_intensity = _get_carbon_intensity_at_timestamp(timestamp, carbon_data_for_lookup) - carbon_intensity_value = int(carbon_intensity) - values_data.append((measurement_metric_id, carbon_intensity_value, timestamp)) - + value_timestamp_pairs.append((carbon_intensity, timestamp)) - if values_data: - # Bulk insert measurement values using copy_from - csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) - f = StringIO(csv_data) - DB().copy_from( - file=f, - table='measurement_values', - columns=['measurement_metric_id', 'value', 'time'], - sep=',' - ) - f.close() + # Bulk insert measurement values + _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs) - unique_values = len(set(row[1] for row in values_data)) - print(f"Stored dynamic carbon intensity for location {location}: {len(values_data)} timestamps, {unique_values} unique values") + unique_values = len(set(int(value) for value, _ in value_timestamp_pairs)) + print(f"Stored dynamic carbon intensity for location {location}: {len(value_timestamp_pairs)} timestamps, {unique_values} unique values") def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: From a90e89715b54b9b56d6c5f64e335468c84f60994 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 20:19:41 +0200 Subject: [PATCH 18/27] Minor fixes and improvements --- lib/carbon_intensity.py | 62 +++++++++++++++--------------- tests/lib/test_carbon_intensity.py | 55 ++++++++++---------------- tests/test_functions.py | 8 ++-- 3 files changed, 57 insertions(+), 68 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 187ca5259..f39934eb4 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -19,7 +19,8 @@ def __init__(self, base_url: str = None): """ if base_url is None: config = GlobalConfig().config - elephant_config = config.get('elephant', {}) + dynamic_config = config.get('dynamic_grid_carbon_intensity', {}) + elephant_config = dynamic_config.get('elephant', {}) protocol = elephant_config.get('protocol', 'http') host = elephant_config.get('host', 'localhost') port = elephant_config.get('port', 8000) @@ -48,7 +49,7 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: 'location': location, 'startTime': start_time, 'endTime': end_time, - 'interpolate': 'true' + 'interpolate': 'true' # we also want to get data points that are adjacent to the requested time range, to be ensure we always get at least one data point } response = requests.get(url, params=params, timeout=30) @@ -92,7 +93,7 @@ def _get_run_data_and_phases(run_id): return phases, start_time_us, end_time_us -def _create_measurement_metric(run_id, metric_name, detail_name, unit, sampling_rate_configured): +def _create_measurement_metric(run_id, metric_name, detail_name, unit, sampling_rate): """ Create a measurement metric entry in the database. @@ -101,7 +102,7 @@ def _create_measurement_metric(run_id, metric_name, detail_name, unit, sampling_ metric_name: Name of the metric detail_name: Detail/source name for the metric unit: Unit of measurement - sampling_rate_configured: Configured sampling rate + sampling_rate: Configured sampling rate Returns: int: measurement_metric_id @@ -110,12 +111,14 @@ def _create_measurement_metric(run_id, metric_name, detail_name, unit, sampling_ INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) VALUES (%s, %s, %s, %s, %s) RETURNING id - ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + ''', params=(run_id, metric_name, detail_name, unit, sampling_rate))[0] def _get_base_timestamps(phases, start_time_us, end_time_us): """ - Get base timestamps: run start/end + phase middles. + Defines for which timestamps a carbon intensity value is needed: + - run start/end + - phase middles Args: phases: List of phase dictionaries @@ -185,17 +188,15 @@ def store_static_carbon_intensity(run_id, static_value): run_id: UUID of the run static_value: Static carbon intensity value from config (gCO2e/kWh) """ - # Get run phases data and overall start/end times phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) - # Create measurement_metric entry for static carbon intensity metric_name = 'grid_carbon_intensity_static' detail_name = '[CONFIG]' unit = 'gCO2e/kWh' - sampling_rate_configured = 0 # Static value has no sampling rate + sampling_rate = 0 # Static value has no sampling rate measurement_metric_id = _create_measurement_metric( - run_id, metric_name, detail_name, unit, sampling_rate_configured + run_id, metric_name, detail_name, unit, sampling_rate ) # Convert static value to integer @@ -220,16 +221,13 @@ def store_dynamic_carbon_intensity(run_id, location): Args: run_id: UUID of the run - location: Location code (e.g., "DE", "ES-IB-MA") + location: Grid zone code (e.g., "DE", "CH", "ES-IB-MA") """ - # Get run phases data and overall start/end times phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) - if not start_time_us or not end_time_us: - raise ValueError(f"Run {run_id} does not have valid measurement times") start_time_iso = _microseconds_to_iso8601(start_time_us) end_time_iso = _microseconds_to_iso8601(end_time_us) - # Fetch carbon intensity data + # Fetch dynamic carbon intensity data for the relevant time frame carbon_client = CarbonIntensityClient() carbon_intensity_data = carbon_client.get_carbon_intensity_history( location, start_time_iso, end_time_iso @@ -244,18 +242,16 @@ def store_dynamic_carbon_intensity(run_id, location): print(f"Retrieved {len(carbon_intensity_data)} API data points for {location}: " f"range {min(values):.1f}-{max(values):.1f} gCO2e/kWh") - # Create measurement_metric entry for dynamic carbon intensity metric_name = 'grid_carbon_intensity_dynamic' detail_name = location unit = 'gCO2e/kWh' - # Calculate sampling rate based on actual data intervals from API format - sampling_rate_configured = _calculate_sampling_rate_from_data(carbon_intensity_data) + sampling_rate = _calculate_sampling_rate_from_data(carbon_intensity_data) measurement_metric_id = _create_measurement_metric( - run_id, metric_name, detail_name, unit, sampling_rate_configured + run_id, metric_name, detail_name, unit, sampling_rate ) - # Convert API data to format expected by _get_carbon_intensity_at_timestamp + # Convert API data to format we need within GMT carbon_data_for_lookup = [] for data_point in carbon_intensity_data: # Convert ISO timestamp to microseconds @@ -271,7 +267,8 @@ def store_dynamic_carbon_intensity(run_id, location): # Sort by timestamp for consistent processing carbon_data_for_lookup.sort(key=lambda x: x['timestamp_us']) - # Prepare measurement values for bulk insert + # Calculate base timestamps, for which we definitely need a value: + # start/end of run + middle of each phase timestamps = _get_base_timestamps(phases, start_time_us, end_time_us) # Add any intermediate API data points that fall within measurement timeframe @@ -280,11 +277,16 @@ def store_dynamic_carbon_intensity(run_id, location): if start_time_us <= timestamp_us <= end_time_us: timestamps.add(timestamp_us) - # Convert timestamps to values using nearest data point logic value_timestamp_pairs = [] - for timestamp in timestamps: - carbon_intensity = _get_carbon_intensity_at_timestamp(timestamp, carbon_data_for_lookup) - value_timestamp_pairs.append((carbon_intensity, timestamp)) + if len(carbon_data_for_lookup) == 1: + # If only one data point, use it for all timestamps + carbon_intensity = carbon_data_for_lookup[0]['carbon_intensity'] + value_timestamp_pairs = [(carbon_intensity, timestamp) for timestamp in timestamps] + else: + # Convert timestamps to values using nearest data point logic + for timestamp in timestamps: + carbon_intensity = _get_carbon_intensity_at_timestamp(timestamp, carbon_data_for_lookup) + value_timestamp_pairs.append((carbon_intensity, timestamp)) # Bulk insert measurement values _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs) @@ -326,22 +328,22 @@ def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any carbon_intensity_data: List of carbon intensity data points with 'time' field (API format) Returns: - Sampling rate in milliseconds, or 300000 (5 minutes) as fallback + Sampling rate in milliseconds, or 0 as fallback Example: For data with 1 hour intervals: Returns 3600000 (1 hour in milliseconds) """ if not carbon_intensity_data or len(carbon_intensity_data) < 2: - return 300000 + return 0 try: time1 = datetime.fromisoformat(carbon_intensity_data[0]['time'].replace('Z', '+00:00')) time2 = datetime.fromisoformat(carbon_intensity_data[1]['time'].replace('Z', '+00:00')) interval_seconds = abs((time2 - time1).total_seconds()) - sampling_rate_configured = int(interval_seconds * 1000) - return sampling_rate_configured + sampling_rate = int(interval_seconds * 1000) + return sampling_rate except (KeyError, ValueError, IndexError): - return 300000 + return 0 def _microseconds_to_iso8601(timestamp_us: int) -> str: diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index 9bab84a60..c6a8c4312 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -26,10 +26,12 @@ def test_config_based_initialization(self, mock_global_config): # Test that client reads URL from config when not provided mock_config = Mock() mock_config.config = { - 'elephant': { - 'protocol': 'https', - 'host': 'example.com', - 'port': 9000 + 'dynamic_grid_carbon_intensity': { + 'elephant': { + 'protocol': 'https', + 'host': 'example.com', + 'port': 9000 + } } } mock_global_config.return_value = mock_config @@ -78,15 +80,15 @@ def test__calculate_sampling_rate_from_data(self): # Test with empty data (should return fallback) result = _calculate_sampling_rate_from_data([]) - assert result == 300000 # 5 minutes fallback + assert result == 0 # Test with single data point (should return fallback) result = _calculate_sampling_rate_from_data([{"location": "DE", "time": "2025-09-23T10:00:00Z", "carbon_intensity": 253.0}]) - assert result == 300000 # 5 minutes fallback + assert result == 0 # Test with invalid data (should return fallback) result = _calculate_sampling_rate_from_data([{"invalid": "data"}, {"also": "invalid"}]) - assert result == 300000 # 5 minutes fallback + assert result == 0 def test__get_carbon_intensity_at_timestamp_single_point(self): # Test with single data point @@ -185,25 +187,11 @@ def test_carbon_intensity_client_invalid_response(self, mock_get): class TestStoreCarbonIntensityAsMetrics: - @pytest.fixture - def run_with_measurement_times(self): - """Fixture that creates a test run with measurement start/end times set.""" + def test_store_carbon_intensity_static_value(self): + # Test that static carbon intensity is stored correctly at the relevant time points run_id = Tests.insert_run() - - # Set measurement times (required for carbon intensity functions) - DB().query( - "UPDATE runs SET start_measurement = %s, end_measurement = %s WHERE id = %s", - (Tests.TEST_MEASUREMENT_START_TIME, Tests.TEST_MEASUREMENT_END_TIME, run_id) - ) - - return run_id - - def test_store_carbon_intensity_static_value(self, run_with_measurement_times): - # Test that static carbon intensity is stored when dynamic is not enabled - run_id = run_with_measurement_times static_carbon_intensity = 250.5 - # Call the function with static value store_static_carbon_intensity(run_id, static_carbon_intensity) # Verify that measurement_metrics entry was created for static carbon intensity @@ -235,13 +223,12 @@ def test_store_carbon_intensity_static_value(self, run_with_measurement_times): print(run_data) assert len(values_result) == 8 # 5 phases + 1 flow + start of run + end of run - # All values should be the same static value for result in values_result: assert result[0] == 250 # 250.5 is converted to integer - def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_times): + def test_store_carbon_intensity_dynamic_grid_enabled(self): # Test that dynamic grid carbon intensity is stored when enabled in measurement config - run_id = run_with_measurement_times + run_id = Tests.insert_run() # Mock the carbon intensity API call # Use timestamps that align with the measurement timeframe (2024-12-24T13:33:10Z to 2024-12-24T13:41:00Z) @@ -286,8 +273,8 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self, run_with_measurement_ for value, _ in values_result: assert isinstance(value, int) - def test_store_carbon_intensity_dynamic_single_data_point(self, run_with_measurement_times): - run_id = run_with_measurement_times + def test_store_carbon_intensity_dynamic_single_data_point(self): + run_id = Tests.insert_run() # Mock the carbon intensity API call with only one data point within timeframe with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: @@ -328,9 +315,9 @@ def test_store_carbon_intensity_dynamic_single_data_point(self, run_with_measure for value, _ in values_result: assert value == 185 - def test_store_carbon_intensity_dynamic_data_outside_timeframe(self, run_with_measurement_times): + def test_store_carbon_intensity_dynamic_data_outside_timeframe(self): # Test that dynamic carbon intensity properly handles data outside measurement timeframe using extrapolation - run_id = run_with_measurement_times + run_id = Tests.insert_run() # Mock API data that is completely outside the measurement timeframe with patch('lib.carbon_intensity.CarbonIntensityClient') as mock_client_class: @@ -366,12 +353,10 @@ def test_store_carbon_intensity_dynamic_data_outside_timeframe(self, run_with_me # Should have at least 7 data points: start/end of run + middle of 5 phases # All using nearest data point logic with API data outside timeframe assert len(values_result) >= 7 - # Values should be extrapolated using nearest data point logic - def test_store_carbon_intensity_dynamic_missing_location(self, run_with_measurement_times): + def test_store_carbon_intensity_dynamic_missing_location(self): # Test error handling when dynamic method is called with None location - run_id = run_with_measurement_times + run_id = Tests.insert_run() - # Call the function with None location - should raise an exception or fail gracefully - with pytest.raises(Exception): # The method should fail when location is None + with pytest.raises(Exception): store_dynamic_carbon_intensity(run_id, None) diff --git a/tests/test_functions.py b/tests/test_functions.py index dce662f30..bd8faa131 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -43,12 +43,14 @@ def insert_run(*, uri='test-uri', branch='test-branch', filename='test-filename' {"start": TEST_MEASUREMENT_START_TIME, "name": "Only Phase", "end": TEST_MEASUREMENT_END_TIME}, {"start": TEST_MEASUREMENT_END_TIME+1, "name": "[REMOVE]", "end": TEST_MEASUREMENT_END_TIME+2}, ] + start_measurement=TEST_MEASUREMENT_START_TIME-9 + end_measurement=TEST_MEASUREMENT_START_TIME+2 return DB().fetch_one(''' - INSERT INTO runs (uri, branch, filename, phases, user_id, machine_id) + INSERT INTO runs (uri, branch, filename, phases, user_id, machine_id, start_measurement, end_measurement) VALUES - (%s, %s, %s, %s, %s, %s) RETURNING id; - ''', params=(uri, branch, filename, json.dumps(phases), user_id, machine_id))[0] + (%s, %s, %s, %s, %s, %s, %s, %s) RETURNING id; + ''', params=(uri, branch, filename, json.dumps(phases), user_id, machine_id, start_measurement, end_measurement))[0] def import_single_cpu_energy_measurement(run_id): From 92fc2d9f63f3f74fd818fb364a47624c2bd361cf Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 20:37:42 +0200 Subject: [PATCH 19/27] Override sampling rate with custom one --- lib/phase_stats.py | 3 ++- tests/lib/test_phase_stats.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 1acd9d637..afa302531 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -265,7 +265,8 @@ def build_and_store_phase_stats(run_id, sci=None): machine_power_current_phase = power_avg elif "grid_carbon_intensity" in metric: - csv_buffer.write(generate_csv_line(run_id, metric, detail_name, f"{idx:03}_{phase['name']}", avg_value, 'MEAN', max_value, min_value, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, unit)) + # For the average sampling rate use the configured one, for 95p and max we don't use a value + csv_buffer.write(generate_csv_line(run_id, metric, detail_name, f"{idx:03}_{phase['name']}", avg_value, 'MEAN', max_value, min_value, sampling_rate_configured, None, None, unit)) phase_grid_carbon_intensity = avg_value else: # Default diff --git a/tests/lib/test_phase_stats.py b/tests/lib/test_phase_stats.py index 53fb5d7d0..93827c68f 100644 --- a/tests/lib/test_phase_stats.py +++ b/tests/lib/test_phase_stats.py @@ -389,9 +389,9 @@ def test_phase_stats_dynamic_grid_carbon_intensity(): assert data[1]['unit'] == 'gCO2e/kWh' assert data[1]['value'] == 270 assert data[1]['type'] == 'MEAN' - assert data[1]['sampling_rate_avg'] == 60000000, 'AVG sampling rate not in expected range' - assert data[1]['sampling_rate_max'] == 60000000, 'MAX sampling rate not in expected range' - assert data[1]['sampling_rate_95p'] == 60000000, '95p sampling rate not in expected range' + assert data[1]['sampling_rate_avg'] == 300000, 'Configured sampling rate should be used' + assert data[1]['sampling_rate_max'] is None, 'MAX sampling rate should not be set' + assert data[1]['sampling_rate_95p'] is None, '95p sampling rate should not be set' def test_sci_calculation(): run_id = Tests.insert_run() From 623f989192d47d09c9360980e25532058947729b Mon Sep 17 00:00:00 2001 From: David Kopp Date: Thu, 25 Sep 2025 21:04:47 +0200 Subject: [PATCH 20/27] Cleanup --- lib/phase_stats.py | 13 +++---------- lib/scenario_runner.py | 5 ++--- .../dynamic_grid_carbon_intensity_single.log | 1 - 3 files changed, 5 insertions(+), 14 deletions(-) delete mode 100644 tests/data/metrics/dynamic_grid_carbon_intensity_single.log diff --git a/lib/phase_stats.py b/lib/phase_stats.py index afa302531..4d30b3919 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -74,7 +74,6 @@ def build_and_store_phase_stats(run_id, sci=None): software_carbon_intensity_global = {} - query = """ SELECT id, metric, unit, detail_name, sampling_rate_configured FROM measurement_metrics @@ -275,18 +274,12 @@ def build_and_store_phase_stats(run_id, sci=None): csv_buffer.write(generate_csv_line(run_id, metric, detail_name, f"{idx:03}_{phase['name']}", value_sum, 'TOTAL', max_value, min_value, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, unit)) # after going through detail metrics, create cumulated ones - phase_network_bytes_total = 0 network_io_in_kWh = None if network_bytes_total: - phase_network_bytes_total = sum(network_bytes_total) - - # Check if we can calculate network energy - has_network_factor = sci.get('N', None) is not None - - if has_network_factor: + if sci.get('N', None) is not None: # build the network energy by using a formula: https://www.green-coding.io/co2-formulas/ # pylint: disable=invalid-name - network_io_in_kWh = Decimal(phase_network_bytes_total) / 1_000_000_000 * Decimal(sci['N']) + network_io_in_kWh = Decimal(sum(network_bytes_total)) / 1_000_000_000 * Decimal(sci['N']) network_io_in_uJ = network_io_in_kWh * 3_600_000_000_000 csv_buffer.write(generate_csv_line(run_id, 'network_energy_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_in_uJ, 'TOTAL', None, None, None, None, None, 'uJ')) @@ -336,7 +329,7 @@ def build_and_store_phase_stats(run_id, sci=None): value_carbon_ug = (value_sum / 3_600_000) * phase_grid_carbon_intensity csv_buffer.write(generate_csv_line(run_id, f"{metric.replace('_energy_', '_carbon_')}", detail_name, phase_full_name, value_carbon_ug, 'TOTAL', None, None, sampling_rate_avg, sampling_rate_max, sampling_rate_95p, 'ug')) - if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count + if '[' not in phase['name'] and metric.endswith('_machine'): # only for runtime sub phases to not double count ... needs refactor ... see comment at beginning of file software_carbon_intensity_global['machine_carbon_ug'] = software_carbon_intensity_global.get('machine_carbon_ug', 0) + value_carbon_ug # Calculate network carbon emissions for this phase diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index bc3d41c39..73ce02a2a 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -2075,9 +2075,6 @@ def _process_grid_carbon_intensity(self): print(TerminalColors.HEADER, '\nProcess grid carbon intensity values', TerminalColors.ENDC) - # pylint: disable=import-outside-toplevel - from lib.carbon_intensity import store_static_carbon_intensity, store_dynamic_carbon_intensity - config = GlobalConfig().config dynamic_grid_carbon_intensity = config.get('dynamic_grid_carbon_intensity', None) if dynamic_grid_carbon_intensity: @@ -2086,9 +2083,11 @@ def _process_grid_carbon_intensity(self): if location is None: raise ValueError("Dynamic grid carbon intensity is enabled, but location configuration is missing! Ensure it is set in your config.yml.") + from lib.carbon_intensity import store_dynamic_carbon_intensity # pylint: disable=import-outside-toplevel store_dynamic_carbon_intensity(self._run_id, location) elif self._sci['I']: # Store static carbon intensity from config as constant time series + from lib.carbon_intensity import store_static_carbon_intensity # pylint: disable=import-outside-toplevel store_static_carbon_intensity(self._run_id, self._sci['I']) else: raise ValueError( diff --git a/tests/data/metrics/dynamic_grid_carbon_intensity_single.log b/tests/data/metrics/dynamic_grid_carbon_intensity_single.log deleted file mode 100644 index b0905352d..000000000 --- a/tests/data/metrics/dynamic_grid_carbon_intensity_single.log +++ /dev/null @@ -1 +0,0 @@ -1735047190000005 300 DE From 94b8384ea560b4b3d9edec21cdaa4a932388f821 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Fri, 26 Sep 2025 08:55:29 +0200 Subject: [PATCH 21/27] Fix smoke test test_db_rows_are_written_and_presented --- tests/smoke_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/smoke_test.py b/tests/smoke_test.py index d75579e34..d872f60cf 100644 --- a/tests/smoke_test.py +++ b/tests/smoke_test.py @@ -61,6 +61,7 @@ def test_db_rows_are_written_and_presented(): # for every metric provider, check that there were rows written in the DB with info for that provider # also check (in the same test, to save on a DB call) that the output to STD.OUT # "Imported XXX metrics from {metric_provider}" displays the same count as in the DB + # The grid carbon intensity metrics are not provided by a classic metric provider and are therefore excluded from this test run_id = utils.get_run_data(RUN_NAME)['id'] assert(run_id is not None and run_id != '') @@ -69,7 +70,7 @@ def test_db_rows_are_written_and_presented(): FROM measurement_metrics as mm JOIN measurement_values as mv ON mm.id = mv.measurement_metric_id - WHERE mm.run_id = %s + WHERE mm.run_id = %s AND mm.metric NOT LIKE 'grid_carbon_intensity%%' GROUP BY mm.metric """ From e00b70e84b1030c3c1f818e0b3da19dfde28e85a Mon Sep 17 00:00:00 2001 From: David Kopp Date: Fri, 26 Sep 2025 08:57:31 +0200 Subject: [PATCH 22/27] Only process grid carbon intensity if phase stats are calculated --- lib/scenario_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scenario_runner.py b/lib/scenario_runner.py index 73ce02a2a..a82a54593 100644 --- a/lib/scenario_runner.py +++ b/lib/scenario_runner.py @@ -2070,7 +2070,7 @@ def _patch_phases(self): self.__phases['[RUNTIME]']['end'] = int(time.time_ns() / 1_000) def _process_grid_carbon_intensity(self): - if not self._run_id or self._dev_no_save: + if not self._run_id or self._dev_no_phase_stats or self._dev_no_save: return print(TerminalColors.HEADER, '\nProcess grid carbon intensity values', TerminalColors.ENDC) From 0eddcf229357ac342203bf29f18492585806bf2d Mon Sep 17 00:00:00 2001 From: David Kopp Date: Sat, 27 Sep 2025 14:43:45 +0200 Subject: [PATCH 23/27] Remove unnecessary docstrings and comments --- lib/carbon_intensity.py | 135 +++------------------------------------- tests/test_functions.py | 11 ---- 2 files changed, 9 insertions(+), 137 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index f39934eb4..49f4a5646 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -29,26 +29,11 @@ def __init__(self, base_url: str = None): self.base_url = base_url.rstrip('/') def get_carbon_intensity_history(self, location: str, start_time: str, end_time: str) -> List[Dict[str, Any]]: - """ - Fetch carbon intensity history from Elephant service. - - Args: - location: Location code (e.g., "DE", "ES-IB-MA") - start_time: Start time in ISO 8601 format (e.g., "2025-09-22T10:50:00Z") - end_time: End time in ISO 8601 format (e.g., "2025-09-22T10:55:00Z") - - Returns: - List of carbon intensity data points: - [{"location": "DE", "time": "2025-09-22T10:00:00Z", "carbon_intensity": 185.0}, ...] - - Raises: - Exception: On any service error, network issue, or invalid response - """ url = f"{self.base_url}/carbon-intensity/history" params = { - 'location': location, - 'startTime': start_time, - 'endTime': end_time, + 'location': location, # Location code (e.g., "DE", "ES-IB-MA") + 'startTime': start_time, # ISO 8601 format (e.g., "2025-09-22T10:50:00Z") + 'endTime': end_time, # ISO 8601 format (e.g., "2025-09-22T10:55:00Z") 'interpolate': 'true' # we also want to get data points that are adjacent to the requested time range, to be ensure we always get at least one data point } @@ -68,18 +53,6 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: def _get_run_data_and_phases(run_id): - """ - Fetch run data including phases and measurement times. - - Args: - run_id: UUID of the run - - Returns: - tuple: (phases, start_time_us, end_time_us) - - Raises: - ValueError: If run data is invalid or missing - """ run_query = """ SELECT phases, start_measurement, end_measurement FROM runs @@ -94,40 +67,14 @@ def _get_run_data_and_phases(run_id): def _create_measurement_metric(run_id, metric_name, detail_name, unit, sampling_rate): - """ - Create a measurement metric entry in the database. - - Args: - run_id: UUID of the run - metric_name: Name of the metric - detail_name: Detail/source name for the metric - unit: Unit of measurement - sampling_rate: Configured sampling rate - - Returns: - int: measurement_metric_id - """ return DB().fetch_one(''' INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) VALUES (%s, %s, %s, %s, %s) RETURNING id ''', params=(run_id, metric_name, detail_name, unit, sampling_rate))[0] - +# Defines for which timestamps a carbon intensity value is needed: run start/end & phase middles def _get_base_timestamps(phases, start_time_us, end_time_us): - """ - Defines for which timestamps a carbon intensity value is needed: - - run start/end - - phase middles - - Args: - phases: List of phase dictionaries - start_time_us: Run start time in microseconds - end_time_us: Run end time in microseconds - - Returns: - set: Set of timestamps - """ timestamps = set() # Add overall run start and end times @@ -144,13 +91,6 @@ def _get_base_timestamps(phases, start_time_us, end_time_us): def _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs): - """ - Efficiently insert measurement values using the most appropriate method. - - Args: - measurement_metric_id: ID of the measurement metric - value_timestamp_pairs: List of (value, timestamp) tuples - """ if not value_timestamp_pairs: return @@ -163,8 +103,8 @@ def _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs placeholders = ', '.join(['(%s, %s, %s)'] * len(value_timestamp_pairs)) query = f"INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES {placeholders}" DB().query(query, tuple(values_to_insert)) + # For larger datasets, use COPY FROM for better performance else: - # For larger datasets, use COPY FROM for better performance values_data = [(measurement_metric_id, int(value), timestamp) for value, timestamp in value_timestamp_pairs] csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) @@ -179,15 +119,6 @@ def _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs def store_static_carbon_intensity(run_id, static_value): - """ - Store static carbon intensity value as a constant time series at multiple timestamps: - - Start and end of measurement run to ensure graph looks good in frontend - - Middle of each phase to enable carbon metrics calculation per phase - - Args: - run_id: UUID of the run - static_value: Static carbon intensity value from config (gCO2e/kWh) - """ phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) metric_name = 'grid_carbon_intensity_static' @@ -199,35 +130,24 @@ def store_static_carbon_intensity(run_id, static_value): run_id, metric_name, detail_name, unit, sampling_rate ) - # Convert static value to integer carbon_intensity_value = int(float(static_value)) - # Calculate timestamps: start/end of run + middle of each phase + # Calculate base timestamps, for which we definitely need a value: + # start/end of run + middle of each phase timestamps = _get_base_timestamps(phases, start_time_us, end_time_us) - # Prepare value-timestamp pairs for bulk insert value_timestamp_pairs = [(carbon_intensity_value, timestamp) for timestamp in timestamps] - # Insert static value for all timestamps _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs) print(f"Stored static carbon intensity value {static_value} gCO2e/kWh at {len(timestamps)} timestamps (run start/end + phase middles)") def store_dynamic_carbon_intensity(run_id, location): - """ - Store dynamic carbon intensity data from API as time series, ensuring coverage per phase. - Uses nearest data point logic for timestamps where API data may be sparse. - - Args: - run_id: UUID of the run - location: Grid zone code (e.g., "DE", "CH", "ES-IB-MA") - """ phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) start_time_iso = _microseconds_to_iso8601(start_time_us) end_time_iso = _microseconds_to_iso8601(end_time_us) - # Fetch dynamic carbon intensity data for the relevant time frame carbon_client = CarbonIntensityClient() carbon_intensity_data = carbon_client.get_carbon_intensity_history( location, start_time_iso, end_time_iso @@ -264,7 +184,6 @@ def store_dynamic_carbon_intensity(run_id, location): 'carbon_intensity': float(data_point['carbon_intensity']) }) - # Sort by timestamp for consistent processing carbon_data_for_lookup.sort(key=lambda x: x['timestamp_us']) # Calculate base timestamps, for which we definitely need a value: @@ -288,30 +207,15 @@ def store_dynamic_carbon_intensity(run_id, location): carbon_intensity = _get_carbon_intensity_at_timestamp(timestamp, carbon_data_for_lookup) value_timestamp_pairs.append((carbon_intensity, timestamp)) - # Bulk insert measurement values _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs) unique_values = len(set(int(value) for value, _ in value_timestamp_pairs)) print(f"Stored dynamic carbon intensity for location {location}: {len(value_timestamp_pairs)} timestamps, {unique_values} unique values") +# Find the data point with timestamp closest to target timestamp. +# Interpolation is not used on purpose here. def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict[str, Any]]) -> float: - """ - Get carbon intensity value for a specific timestamp using nearest data point. - - This function finds the carbon intensity at a given timestamp by: - - Finding the data point with timestamp closest to the target timestamp - - Returning the carbon intensity of that nearest data point - - Args: - timestamp_us: Target timestamp in microseconds - carbon_data: List of carbon intensity data points with 'timestamp_us' and 'carbon_intensity' fields - (guaranteed to be non-empty by calling functions) - - Returns: - Carbon intensity value in gCO2e/kWh - """ - # Find the data point with timestamp closest to target timestamp closest_point = min( carbon_data, key=lambda point: abs(point['timestamp_us'] - timestamp_us) @@ -321,18 +225,6 @@ def _get_carbon_intensity_at_timestamp(timestamp_us: int, carbon_data: List[Dict def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any]]) -> int: - """ - Calculate sampling rate in milliseconds based on time intervals in carbon intensity data. - - Args: - carbon_intensity_data: List of carbon intensity data points with 'time' field (API format) - - Returns: - Sampling rate in milliseconds, or 0 as fallback - - Example: - For data with 1 hour intervals: Returns 3600000 (1 hour in milliseconds) - """ if not carbon_intensity_data or len(carbon_intensity_data) < 2: return 0 @@ -347,15 +239,6 @@ def _calculate_sampling_rate_from_data(carbon_intensity_data: List[Dict[str, Any def _microseconds_to_iso8601(timestamp_us: int) -> str: - """ - Convert microsecond timestamp to ISO 8601 format. - - Args: - timestamp_us: Timestamp in microseconds since epoch - - Returns: - ISO 8601 formatted timestamp string (e.g., "2025-09-22T10:50:00Z") - """ timestamp_seconds = timestamp_us / 1_000_000 dt = datetime.fromtimestamp(timestamp_seconds, timezone.utc) return dt.strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/tests/test_functions.py b/tests/test_functions.py index bd8faa131..f932dc3d7 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -224,17 +224,6 @@ def import_demo_data_ee(): def import_carbon_intensity_metrics(run_id, static_value=None): - """ - Import sample carbon intensity data for a test run. - Creates multiple measurement values aligned with phase timestamps. - If no static_value is given, dynamic values are inserted. - - Args: - run_id: The ID of the test run - static_value: Optional static carbon intensity value (gCO2e/kWh). - If provided, this value will be used for all timestamps - instead of the dynamic values. - """ metric_name = 'grid_carbon_intensity_dynamic' detail_name = 'DE' unit = 'gCO2e/kWh' From 17be841f70bcf81f4326de9e32ead56c45ba4062 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Sat, 27 Sep 2025 15:06:09 +0200 Subject: [PATCH 24/27] Refactor import test function for carbon intensity metrics --- tests/lib/test_phase_stats.py | 14 +++--- tests/test_functions.py | 89 +++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 48 deletions(-) diff --git a/tests/lib/test_phase_stats.py b/tests/lib/test_phase_stats.py index 93827c68f..8720ae86f 100644 --- a/tests/lib/test_phase_stats.py +++ b/tests/lib/test_phase_stats.py @@ -130,7 +130,7 @@ def test_phase_embodied_and_operational_carbon_using_static_intensity(): Tests.import_machine_energy(run_id) sci = {"I":436,"R":0,"EL":4,"RS":1,"TE":181000,"R_d":"page request"} - Tests.import_carbon_intensity_metrics(run_id, sci['I']) + Tests.import_static_carbon_intensity_metrics(run_id, sci['I']) build_and_store_phase_stats(run_id, sci=sci) @@ -169,7 +169,7 @@ def test_phase_embodied_and_operational_carbon_using_dynamic_intensity(): Tests.import_machine_energy(run_id) sci = {"R":0,"EL":4,"RS":1,"TE":181000,"R_d":"page request"} # 'I' was removed, because it is not relevant here using dynamic values - grid_carbon_intensity = Tests.import_carbon_intensity_metrics(run_id) + grid_carbon_intensity = Tests.import_dynamic_carbon_intensity_metrics(run_id) build_and_store_phase_stats(run_id, sci=sci) @@ -329,7 +329,7 @@ def test_phase_stats_network_data(): 'N': 0.001, # Network energy intensity (kWh/GB) 'I': 500, # Carbon intensity (gCO2e/kWh) } - Tests.import_carbon_intensity_metrics(run_id, test_sci_config['I']) + Tests.import_static_carbon_intensity_metrics(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) @@ -377,7 +377,7 @@ def test_phase_stats_network_data(): def test_phase_stats_dynamic_grid_carbon_intensity(): run_id = Tests.insert_run() - Tests.import_carbon_intensity_metrics(run_id) + Tests.import_dynamic_carbon_intensity_metrics(run_id) build_and_store_phase_stats(run_id) @@ -389,7 +389,7 @@ def test_phase_stats_dynamic_grid_carbon_intensity(): assert data[1]['unit'] == 'gCO2e/kWh' assert data[1]['value'] == 270 assert data[1]['type'] == 'MEAN' - assert data[1]['sampling_rate_avg'] == 300000, 'Configured sampling rate should be used' + assert data[1]['sampling_rate_avg'] == 60000000, 'Configured sampling rate should be used' assert data[1]['sampling_rate_max'] is None, 'MAX sampling rate should not be set' assert data[1]['sampling_rate_95p'] is None, '95p sampling rate should not be set' @@ -408,7 +408,7 @@ def test_sci_calculation(): 'R': 10, # Functional unit count (10 runs) 'R_d': 'test runs' # Functional unit description } - Tests.import_carbon_intensity_metrics(run_id, test_sci_config['I']) + Tests.import_static_carbon_intensity_metrics(run_id, test_sci_config['I']) build_and_store_phase_stats(run_id, sci=test_sci_config) @@ -483,5 +483,5 @@ def test_sci_multi_steps_run(): data = DB().fetch_all("SELECT value, unit FROM phase_stats WHERE phase = %s AND run_id = %s AND metric = 'software_carbon_intensity_global' ", params=('004_[RUNTIME]', run_id), fetch_mode='dict') assert len(data) == 1 - assert 8 < data[0]['value'] < 20 + assert 5 < data[0]['value'] < 20 assert data[0]['unit'] == 'ugCO2e/Cool run' diff --git a/tests/test_functions.py b/tests/test_functions.py index f932dc3d7..8c8023a8a 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -24,6 +24,23 @@ TEST_MEASUREMENT_DURATION_S = TEST_MEASUREMENT_DURATION / 1_000_000 TEST_MEASUREMENT_DURATION_H = TEST_MEASUREMENT_DURATION_S/60/60 +PHASE_TIMESTAMPS = [ + TEST_MEASUREMENT_START_TIME-8, # [BASELINE] start + TEST_MEASUREMENT_START_TIME-7, # [BASELINE] end + TEST_MEASUREMENT_START_TIME-6, # [INSTALL] start + TEST_MEASUREMENT_START_TIME-5, # [INSTALL] end + TEST_MEASUREMENT_START_TIME-4, # [BOOT] start + TEST_MEASUREMENT_START_TIME-3, # [BOOT] end + TEST_MEASUREMENT_START_TIME-2, # [IDLE] start + TEST_MEASUREMENT_START_TIME-1, # [IDLE] end + TEST_MEASUREMENT_START_TIME, # [RUNTIME]/Only Phase start + TEST_MEASUREMENT_START_TIME + 60000000, # Mid-runtime - 1 minute + TEST_MEASUREMENT_START_TIME + 120000000, # Mid-runtime - 2 minutes + TEST_MEASUREMENT_END_TIME, # [RUNTIME]/Only Phase end + TEST_MEASUREMENT_END_TIME+1, # [REMOVE] start + TEST_MEASUREMENT_END_TIME+2, # [REMOVE] end +] + def shorten_sleep_times(duration_in_s): DB().query("UPDATE users SET capabilities = jsonb_set(capabilities,'{measurement,pre_test_sleep}',%s,false)", params=(str(duration_in_s), )) DB().query("UPDATE users SET capabilities = jsonb_set(capabilities,'{measurement,baseline_duration}',%s,false)", params=(str(duration_in_s), )) @@ -222,44 +239,16 @@ def import_demo_data_ee(): reset_db() raise RuntimeError('Import of Demo data into DB failed', ps.stderr) - -def import_carbon_intensity_metrics(run_id, static_value=None): - metric_name = 'grid_carbon_intensity_dynamic' - detail_name = 'DE' - unit = 'gCO2e/kWh' - sampling_rate_configured = 300000 # 5 minutes in milliseconds - - measurement_metric_id = DB().fetch_one(''' - INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) - VALUES (%s, %s, %s, %s, %s) - RETURNING id - ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] - - values_data = [] - - # Phase timestamps from insert_run function: - phase_timestamps = [ - TEST_MEASUREMENT_START_TIME-8, # [BASELINE] start - TEST_MEASUREMENT_START_TIME-7, # [BASELINE] end - TEST_MEASUREMENT_START_TIME-6, # [INSTALL] start - TEST_MEASUREMENT_START_TIME-5, # [INSTALL] end - TEST_MEASUREMENT_START_TIME-4, # [BOOT] start - TEST_MEASUREMENT_START_TIME-3, # [BOOT] end - TEST_MEASUREMENT_START_TIME-2, # [IDLE] start - TEST_MEASUREMENT_START_TIME-1, # [IDLE] end - TEST_MEASUREMENT_START_TIME, # [RUNTIME]/Only Phase start - TEST_MEASUREMENT_START_TIME + 60000000, # Mid-runtime - 1 minute - TEST_MEASUREMENT_START_TIME + 120000000, # Mid-runtime - 2 minutes - TEST_MEASUREMENT_END_TIME, # [RUNTIME]/Only Phase end - TEST_MEASUREMENT_END_TIME+1, # [REMOVE] start - TEST_MEASUREMENT_END_TIME+2, # [REMOVE] end - ] - - # Use static value if provided, otherwise use dynamic values +def _import_carbon_intensity_metrics(run_id, static_value=None): if static_value is not None: - carbon_intensity_values = [static_value] * len(phase_timestamps) - mean_runtime_value = static_value + metric_name = 'grid_carbon_intensity_static' + sampling_rate_configured = 0 + carbon_intensity_values = [static_value] * len(PHASE_TIMESTAMPS) + metric_type = 'static' + avg_carbon_intensity_during_runtime = static_value else: + metric_name = 'grid_carbon_intensity_dynamic' + sampling_rate_configured = 60000000 # 1 minute in milliseconds carbon_intensity_values = [ 180, # 180.0 gCO2e/kWh (baseline - low demand) 175, # 175.0 gCO2e/kWh @@ -276,12 +265,22 @@ def import_carbon_intensity_metrics(run_id, static_value=None): 200, # 200.0 gCO2e/kWh (remove start) 180, # 180.0 gCO2e/kWh (remove end - back to baseline) ] - mean_runtime_value = (carbon_intensity_values[9] + carbon_intensity_values[10]) / 2 + metric_type = 'dynamic' + avg_carbon_intensity_during_runtime = (carbon_intensity_values[9] + carbon_intensity_values[10]) / 2 + + detail_name = 'DE' + unit = 'gCO2e/kWh' - for timestamp, value in zip(phase_timestamps, carbon_intensity_values): + measurement_metric_id = DB().fetch_one(''' + INSERT INTO measurement_metrics (run_id, metric, detail_name, unit, sampling_rate_configured) + VALUES (%s, %s, %s, %s, %s) + RETURNING id + ''', params=(run_id, metric_name, detail_name, unit, sampling_rate_configured))[0] + + values_data = [] + for timestamp, value in zip(PHASE_TIMESTAMPS, carbon_intensity_values): values_data.append((measurement_metric_id, value, timestamp)) - # Bulk insert measurement values using copy_from if values_data: csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) f = StringIO(csv_data) @@ -293,9 +292,17 @@ def import_carbon_intensity_metrics(run_id, static_value=None): ) f.close() - print(f"Imported {len(values_data)} dynamic carbon intensity data points for run {run_id}") + print(f"Imported {len(values_data)} {metric_type} carbon intensity data points for run {run_id}") + + return avg_carbon_intensity_during_runtime + +def import_static_carbon_intensity_metrics(run_id, static_value): + if static_value is None: + raise ValueError('Parameter "static_value" is missing!') + return _import_carbon_intensity_metrics(run_id, static_value) - return mean_runtime_value +def import_dynamic_carbon_intensity_metrics(run_id): + return _import_carbon_intensity_metrics(run_id) def assertion_info(expected, actual): return f"Expected: {expected}, Actual: {actual}" From 6f3e1b225190be3024a36a3618bc7160a30ffeab Mon Sep 17 00:00:00 2001 From: David Kopp Date: Sat, 27 Sep 2025 15:16:37 +0200 Subject: [PATCH 25/27] Round carbon intensity value --- lib/carbon_intensity.py | 8 +++----- tests/lib/test_carbon_intensity.py | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 49f4a5646..0c52b1547 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -98,14 +98,14 @@ def _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs if len(value_timestamp_pairs) <= 10: values_to_insert = [] for value, timestamp in value_timestamp_pairs: - values_to_insert.extend([measurement_metric_id, int(value), timestamp]) + values_to_insert.extend([measurement_metric_id, round(value), timestamp]) placeholders = ', '.join(['(%s, %s, %s)'] * len(value_timestamp_pairs)) query = f"INSERT INTO measurement_values (measurement_metric_id, value, time) VALUES {placeholders}" DB().query(query, tuple(values_to_insert)) # For larger datasets, use COPY FROM for better performance else: - values_data = [(measurement_metric_id, int(value), timestamp) + values_data = [(measurement_metric_id, round(value), timestamp) for value, timestamp in value_timestamp_pairs] csv_data = '\n'.join([f"{row[0]},{row[1]},{row[2]}" for row in values_data]) f = StringIO(csv_data) @@ -130,13 +130,11 @@ def store_static_carbon_intensity(run_id, static_value): run_id, metric_name, detail_name, unit, sampling_rate ) - carbon_intensity_value = int(float(static_value)) - # Calculate base timestamps, for which we definitely need a value: # start/end of run + middle of each phase timestamps = _get_base_timestamps(phases, start_time_us, end_time_us) - value_timestamp_pairs = [(carbon_intensity_value, timestamp) for timestamp in timestamps] + value_timestamp_pairs = [(static_value, timestamp) for timestamp in timestamps] _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs) diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index c6a8c4312..d3407afff 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -190,7 +190,7 @@ class TestStoreCarbonIntensityAsMetrics: def test_store_carbon_intensity_static_value(self): # Test that static carbon intensity is stored correctly at the relevant time points run_id = Tests.insert_run() - static_carbon_intensity = 250.5 + static_carbon_intensity = 250.6 store_static_carbon_intensity(run_id, static_carbon_intensity) @@ -224,7 +224,7 @@ def test_store_carbon_intensity_static_value(self): assert len(values_result) == 8 # 5 phases + 1 flow + start of run + end of run for result in values_result: - assert result[0] == 250 # 250.5 is converted to integer + assert result[0] == 251 # 250.6 is rounded up def test_store_carbon_intensity_dynamic_grid_enabled(self): # Test that dynamic grid carbon intensity is stored when enabled in measurement config From d9c8df6a3603a0b3044ac7fe63b2c9090876597e Mon Sep 17 00:00:00 2001 From: David Kopp Date: Sat, 27 Sep 2025 15:20:30 +0200 Subject: [PATCH 26/27] Rename metric values to have proper scope definition --- frontend/js/helpers/config.js.example | 4 ++-- lib/carbon_intensity.py | 4 ++-- tests/lib/test_carbon_intensity.py | 16 ++++++++-------- tests/lib/test_phase_stats.py | 2 +- tests/test_functions.py | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/frontend/js/helpers/config.js.example b/frontend/js/helpers/config.js.example index df9976a95..cf74a0939 100644 --- a/frontend/js/helpers/config.js.example +++ b/frontend/js/helpers/config.js.example @@ -622,12 +622,12 @@ METRIC_MAPPINGS = { "source": "cgroup", "explanation": "Total data written to disk for the system via cgroup" }, - "grid_carbon_intensity_static": { + "grid_carbon_intensity_config_location": { "clean_name": "Grid Carbon Intensity", "source": "Config (Static)", "explanation": "Static grid carbon intensity used to calculate the carbon emissions" }, - "grid_carbon_intensity_dynamic": { + "grid_carbon_intensity_api_location": { "clean_name": "Grid Carbon Intensity", "source": "External Provider (Dynamic)", "explanation": "Dynamic grid carbon intensity during the run retrieved from external carbon intensity provider" diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 0c52b1547..9c721093b 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -121,7 +121,7 @@ def _bulk_insert_measurement_values(measurement_metric_id, value_timestamp_pairs def store_static_carbon_intensity(run_id, static_value): phases, start_time_us, end_time_us = _get_run_data_and_phases(run_id) - metric_name = 'grid_carbon_intensity_static' + metric_name = 'grid_carbon_intensity_config_location' detail_name = '[CONFIG]' unit = 'gCO2e/kWh' sampling_rate = 0 # Static value has no sampling rate @@ -160,7 +160,7 @@ def store_dynamic_carbon_intensity(run_id, location): print(f"Retrieved {len(carbon_intensity_data)} API data points for {location}: " f"range {min(values):.1f}-{max(values):.1f} gCO2e/kWh") - metric_name = 'grid_carbon_intensity_dynamic' + metric_name = 'grid_carbon_intensity_api_location' detail_name = location unit = 'gCO2e/kWh' sampling_rate = _calculate_sampling_rate_from_data(carbon_intensity_data) diff --git a/tests/lib/test_carbon_intensity.py b/tests/lib/test_carbon_intensity.py index d3407afff..e54868a1d 100644 --- a/tests/lib/test_carbon_intensity.py +++ b/tests/lib/test_carbon_intensity.py @@ -201,7 +201,7 @@ def test_store_carbon_intensity_static_value(self): ) assert metric_result is not None - assert metric_result[0] == 'grid_carbon_intensity_static' + assert metric_result[0] == 'grid_carbon_intensity_config_location' assert metric_result[1] == '[CONFIG]' assert metric_result[2] == 'gCO2e/kWh' @@ -210,7 +210,7 @@ def test_store_carbon_intensity_static_value(self): """SELECT mv.value FROM measurement_values mv JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_static'""", + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_config_location'""", (run_id,) ) @@ -252,7 +252,7 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self): ) assert metric_result is not None - assert metric_result[0] == 'grid_carbon_intensity_dynamic' + assert metric_result[0] == 'grid_carbon_intensity_api_location' assert metric_result[1] == 'DE' assert metric_result[2] == 'gCO2e/kWh' @@ -261,7 +261,7 @@ def test_store_carbon_intensity_dynamic_grid_enabled(self): """SELECT mv.value, mv.time FROM measurement_values mv JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_dynamic' + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_api_location' ORDER BY mv.time""", (run_id,) ) @@ -294,7 +294,7 @@ def test_store_carbon_intensity_dynamic_single_data_point(self): ) assert metric_result is not None - assert metric_result[0] == 'grid_carbon_intensity_dynamic' + assert metric_result[0] == 'grid_carbon_intensity_api_location' assert metric_result[1] == 'DE' assert metric_result[2] == 'gCO2e/kWh' @@ -303,7 +303,7 @@ def test_store_carbon_intensity_dynamic_single_data_point(self): """SELECT mv.value, mv.time FROM measurement_values mv JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_dynamic' + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_api_location' ORDER BY mv.time""", (run_id,) ) @@ -338,14 +338,14 @@ def test_store_carbon_intensity_dynamic_data_outside_timeframe(self): ) assert metric_result is not None - assert metric_result[0] == 'grid_carbon_intensity_dynamic' + assert metric_result[0] == 'grid_carbon_intensity_api_location' # Verify that measurement values were stored using extrapolation values_result = DB().fetch_all( """SELECT mv.value, mv.time FROM measurement_values mv JOIN measurement_metrics mm ON mv.measurement_metric_id = mm.id - WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_dynamic' + WHERE mm.run_id = %s AND mm.metric = 'grid_carbon_intensity_api_location' ORDER BY mv.time""", (run_id,) ) diff --git a/tests/lib/test_phase_stats.py b/tests/lib/test_phase_stats.py index 8720ae86f..41a001e3f 100644 --- a/tests/lib/test_phase_stats.py +++ b/tests/lib/test_phase_stats.py @@ -384,7 +384,7 @@ def test_phase_stats_dynamic_grid_carbon_intensity(): data = DB().fetch_all('SELECT metric, detail_name, unit, value, type, sampling_rate_avg, sampling_rate_max, sampling_rate_95p FROM phase_stats WHERE phase = %s ', params=('004_[RUNTIME]', ), fetch_mode='dict') assert len(data) == 2 - assert data[1]['metric'] == 'grid_carbon_intensity_dynamic' + assert data[1]['metric'] == 'grid_carbon_intensity_api_location' assert data[1]['detail_name'] == 'DE' assert data[1]['unit'] == 'gCO2e/kWh' assert data[1]['value'] == 270 diff --git a/tests/test_functions.py b/tests/test_functions.py index 8c8023a8a..d8f906f49 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -241,13 +241,13 @@ def import_demo_data_ee(): def _import_carbon_intensity_metrics(run_id, static_value=None): if static_value is not None: - metric_name = 'grid_carbon_intensity_static' + metric_name = 'grid_carbon_intensity_config_location' sampling_rate_configured = 0 carbon_intensity_values = [static_value] * len(PHASE_TIMESTAMPS) metric_type = 'static' avg_carbon_intensity_during_runtime = static_value else: - metric_name = 'grid_carbon_intensity_dynamic' + metric_name = 'grid_carbon_intensity_api_location' sampling_rate_configured = 60000000 # 1 minute in milliseconds carbon_intensity_values = [ 180, # 180.0 gCO2e/kWh (baseline - low demand) From bf57e0e2cc71b462aa53a7540a6288afca4a9165 Mon Sep 17 00:00:00 2001 From: David Kopp Date: Sat, 27 Sep 2025 15:46:06 +0200 Subject: [PATCH 27/27] Improve error output --- lib/carbon_intensity.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/carbon_intensity.py b/lib/carbon_intensity.py index 9c721093b..fbb38a8b7 100644 --- a/lib/carbon_intensity.py +++ b/lib/carbon_intensity.py @@ -38,7 +38,24 @@ def get_carbon_intensity_history(self, location: str, start_time: str, end_time: } response = requests.get(url, params=params, timeout=30) - response.raise_for_status() + + if not response.ok: + error_detail = "No additional error details available" + try: + error_json = response.json() + if isinstance(error_json, dict) and 'detail' in error_json: + error_detail = error_json['detail'] + elif isinstance(error_json, dict): + error_detail = str(error_json) + else: + error_detail = str(error_json) + except (ValueError, KeyError): + error_detail = response.text if response.text else "No response body" + + raise requests.HTTPError( + f"Carbon intensity API request failed with status {response.status_code}. " + f"Error details: {error_detail}" + ) data = response.json()