From 24c21bdd0aa0d762b4b1fdd44e5570c7d8387688 Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Sat, 15 Feb 2025 21:41:19 +0100 Subject: [PATCH 01/13] feature: integrate amtviz for visualization of tuning jobs --- src/sagemaker/amtviz/__init__.py | 17 + src/sagemaker/amtviz/job_metrics.py | 185 ++++++ src/sagemaker/amtviz/visualization.py | 800 +++++++++++++++++++++++ src/sagemaker/tuner.py | 57 ++ tests/unit/test_tuner_visualize.py | 303 +++++++++ tests/unit/tuner_visualize_test_utils.py | 110 ++++ tox.ini | 1 + 7 files changed, 1473 insertions(+) create mode 100644 src/sagemaker/amtviz/__init__.py create mode 100644 src/sagemaker/amtviz/job_metrics.py create mode 100644 src/sagemaker/amtviz/visualization.py create mode 100644 tests/unit/test_tuner_visualize.py create mode 100644 tests/unit/tuner_visualize_test_utils.py diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py new file mode 100644 index 0000000000..9e6dd1a64b --- /dev/null +++ b/src/sagemaker/amtviz/__init__.py @@ -0,0 +1,17 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from sagemaker.amtviz.visualization import visualize_tuning_job +__all__ = ['visualize_tuning_job'] \ No newline at end of file diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py new file mode 100644 index 0000000000..6005f886f8 --- /dev/null +++ b/src/sagemaker/amtviz/job_metrics.py @@ -0,0 +1,185 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +# IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from datetime import datetime, timedelta +from typing import Callable, List, Optional, Tuple, Dict, Any +import hashlib +import os +from pathlib import Path + +import pandas as pd +import numpy as np +import boto3 +import logging + +logger = logging.getLogger(__name__) + +cw = boto3.client("cloudwatch") +sm = boto3.client("sagemaker") + + +def disk_cache(outer: Callable) -> Callable: + """A decorator that implements disk-based caching for CloudWatch metrics data. + + This decorator caches the output of the wrapped function to disk in JSON Lines format. + It creates a cache key using MD5 hash of the function arguments and stores the data + in the user's home directory under .amtviz/cw_metrics_cache/. + + Args: + outer (Callable): The function to be wrapped. Must return a pandas DataFrame + containing CloudWatch metrics data. + + Returns: + Callable: A wrapper function that implements the caching logic. + """ + + def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: + key_input = str(args) + str(kwargs) + # nosec b303 - Not used for cryptography, but to create lookup key + key = hashlib.md5(key_input.encode("utf-8")).hexdigest() + cache_dir = Path.home().joinpath(".amtviz/cw_metrics_cache") + fn = f"{cache_dir}/req_{key}.jsonl.gz" + if Path(fn).exists(): + try: + df = pd.read_json(fn, lines=True) + logger.debug("H", end="") + df["ts"] = pd.to_datetime(df["ts"]) + df["ts"] = df["ts"].dt.tz_localize(None) + df["rel_ts"] = pd.to_datetime(df["rel_ts"]) # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + df["rel_ts"] = df["rel_ts"].dt.tz_localize(None) + return df + except KeyError: + # Empty file leads to empty df, hence no df['ts'] possible + pass + # nosec b110 - doesn't matter why we could not load it. + except BaseException as e: + logger.error("\nException", type(e), e) + pass # continue with calling the outer function + + logger.debug("M", end="") + df = outer(*args, **kwargs) + assert isinstance(df, pd.DataFrame), "Only caching Pandas DataFrames." + + os.makedirs(cache_dir, exist_ok=True) + df.to_json(fn, orient="records", date_format="iso", lines=True) + + return df + + return inner + + +def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> Dict[str, Any]: + return { + "Id": metric_name.lower().replace(":", "_").replace("-", "_"), + "MetricStat": { + "Stat": "Average", + "Metric": { + "Namespace": "/aws/sagemaker/TrainingJobs", + "MetricName": metric_name, + "Dimensions": [ + {"Name": dim_name, "Value": dim_value}, + ], + }, + "Period": 60, + }, + "ReturnData": True, + } + + +def _get_metric_data( + queries: List[Dict[str, Any]], + start_time: datetime, + end_time: datetime +) -> pd.DataFrame: + start_time = start_time - timedelta(hours=1) + end_time = end_time + timedelta(hours=1) + response = cw.get_metric_data(MetricDataQueries=queries, StartTime=start_time, EndTime=end_time) + + df = pd.DataFrame() + if "MetricDataResults" not in response: + return df + + for metric_data in response["MetricDataResults"]: + values = metric_data["Values"] + ts = np.array(metric_data["Timestamps"], dtype=np.datetime64) + labels = [metric_data["Label"]] * len(values) + + df = pd.concat([df, pd.DataFrame({"value": values, "ts": ts, "label": labels})]) + + # We now calculate the relative time based on the first actual observed + # time stamps, not the potentially start time that we used to scope our CW + # API call. The difference could be for example startup times or waiting + # for Spot. + if not df.empty: + df["rel_ts"] = datetime.fromtimestamp(1) + (df["ts"] - df["ts"].min()) # pyright: ignore + return df + + +@disk_cache +def _collect_metrics( + dimensions: List[Tuple[str, str]], + start_time: datetime, + end_time: Optional[datetime] +) -> pd.DataFrame: + + df = pd.DataFrame() + for dim_name, dim_value in dimensions: + response = cw.list_metrics( + Namespace="/aws/sagemaker/TrainingJobs", + Dimensions=[ + {"Name": dim_name, "Value": dim_value}, + ], + ) + if not response["Metrics"]: + continue + metric_names = [metric["MetricName"] for metric in response["Metrics"]] + if not metric_names: + # No metric data yet, or not any longer, because the data were aged out + continue + metric_data_queries = [ + _metric_data_query_tpl(metric_name, dim_name, dim_value) for metric_name in metric_names + ] + df = pd.concat([df, _get_metric_data(metric_data_queries, start_time, end_time)]) + + return df + + +def get_cw_job_metrics( + job_name: str, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None +) -> pd.DataFrame: + """Retrieves CloudWatch metrics for a SageMaker training job. + + Args: + job_name (str): Name of the SageMaker training job. + start_time (datetime, optional): Start time for metrics collection. + Defaults to now - 4 hours. + end_time (datetime, optional): End time for metrics collection. + Defaults to start_time + 4 hours. + + Returns: + pd.DataFrame: Metrics data with columns for value, timestamp, and metric name. + Results are cached to disk for improved performance. + """ + dimensions = [ + ("TrainingJobName", job_name), + ("Host", job_name + "/algo-1"), + ] + # If not given, use reasonable defaults for start and end time + start_time = start_time or datetime.now() - timedelta(hours=4) + end_time = end_time or start_time + timedelta(hours=4) + return _collect_metrics(dimensions, start_time, end_time) \ No newline at end of file diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py new file mode 100644 index 0000000000..377a19304d --- /dev/null +++ b/src/sagemaker/amtviz/visualization.py @@ -0,0 +1,800 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +import sagemaker +import boto3 +from typing import Union, List, Optional, Tuple, Dict, Any +import altair as alt +import pandas as pd +import numpy as np +import os +import warnings +import logging +from sagemaker.amtviz.job_metrics import get_cw_job_metrics + +warnings.filterwarnings("ignore") +logger = logging.getLogger(__name__) + +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) +pd.set_option("display.max_colwidth", None) # Don't truncate TrainingJobName + + +alt.data_transformers.disable_max_rows() +altair_renderer = os.getenv("ALTAIR_RENDERER", "default") +logger.info(f"Setting altair renderer to {altair_renderer}.") +alt.renderers.enable(altair_renderer) + + +sm = boto3.client("sagemaker") + + +def _columnize(charts: List[alt.Chart], cols: int = 2) -> alt.VConcatChart: + return alt.vconcat(*[alt.hconcat(*charts[i : i + cols]) for i in range(0, len(charts), cols)]) + + +def visualize_tuning_job( + tuning_jobs: Union[str, List[str], "sagemaker.tuner.HyperparameterTuner"], + return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False, +) -> Union[alt.Chart, Tuple[alt.Chart, pd.DataFrame, pd.DataFrame]]: + """ + Visualize SageMaker hyperparameter tuning jobs. + + Args: + tuning_jobs: Single tuning job or list of tuning jobs (name or HyperparameterTuner object) + return_dfs: Whether to return the underlying DataFrames + job_metrics: List of additional job metrics to include + trials_only: Whether to only show trials data + advanced: Whether to show advanced visualizations + + Returns: + If return_dfs is False, returns Altair chart + If return_dfs is True, returns tuple of (chart, trials_df, full_df) + """ + + trials_df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data(tuning_jobs) + + try: + from IPython import get_ipython + if get_ipython(): + # Running in a Jupyter Notebook + display(trials_df.head(10)) + else: + # Running in a non-Jupyter environment + logger.info(trials_df.head(10).to_string()) + except ImportError: + # Not running in a Jupyter Notebook + logger.info(trials_df.head(10).to_string()) + + full_df = ( + _prepare_consolidated_df(trials_df, objective_name) if not trials_only else pd.DataFrame() + ) + + trials_df.columns = trials_df.columns.map(_clean_parameter_name) + full_df.columns = full_df.columns.map(_clean_parameter_name) + tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] + objective_name = _clean_parameter_name(objective_name) + + charts = create_charts( + trials_df, + tuned_parameters, + full_df, + objective_name, + minimize_objective=is_minimize, + job_metrics=job_metrics, + advanced=advanced, + ) + + if return_dfs: + return charts, trials_df, full_df + else: + return charts + + +def create_charts( + trials_df: pd.DataFrame, + tuning_parameters: List[str], + full_df: pd.DataFrame, + objective_name: str, + minimize_objective: bool, + job_metrics: Optional[List[str]] = None, + highlight_trials: bool = True, + color_trials: bool = False, + advanced: bool = False, +) -> alt.Chart: + """ + Create visualization charts for hyperparameter tuning results. + + Args: + trials_df: DataFrame containing trials data + tuning_parameters: List of hyperparameter names + full_df: DataFrame with consolidated data + objective_name: Name of the objective metric + minimize_objective: Whether objective should be minimized + job_metrics: Additional job metrics to include + highlight_trials: Whether to highlight selected trials + color_trials: Whether to color trials by job + advanced: Whether to show advanced visualizations + + Returns: + Altair chart visualization + """ + + if trials_df.empty: + logger.info("No results available yet.") + return pd.DataFrame() + + if job_metrics is None: + job_metrics = [] + + multiple_tuning_jobs = len(trials_df["TuningJobName"].unique()) > 1 + multiple_job_status = len(trials_df["TrainingJobStatus"].unique()) > 1 + + # Rows, n>1 + # Detail Charts + + brush = alt.selection_interval(encodings=["x"], resolve="intersect", empty=True) + + job_highlight_selection = alt.selection_point( + on="mouseover", + nearest=False, + empty=False, + fields=["TrainingJobName", "TrainingStartTime"], + ) + + # create tooltip + detail_tooltip = [] + for trp in [objective_name] + tuning_parameters: + if trials_df[trp].dtype == np.float64: + trp = alt.Tooltip(trp, format=".2e") + detail_tooltip.append(trp) + + detail_tooltip.append(alt.Tooltip("TrainingStartTime:T", format="%H:%M:%S")) + detail_tooltip.extend(["TrainingJobName", "TrainingJobStatus", "TrainingElapsedTimeSeconds"]) + + # create stroke/stroke-width for tuning_jobs + # and color for training jobs, if wanted + # add coloring of the stroke to highlight correlated + # data points + jobs_props = {"shape": alt.Shape("TrainingJobStatus:N", legend=None)} + + if multiple_tuning_jobs: + jobs_props["strokeWidth"] = alt.StrokeWidthValue(2.0) + jobs_props["stroke"] = alt.Stroke("TuningJobName:N", legend=None) + + if color_trials: + jobs_props["color"] = alt.Color("TrainingJobName:N") + + if highlight_trials: + jobs_props["strokeWidth"] = alt.condition( + job_highlight_selection, + alt.StrokeWidthValue(2.0), + alt.StrokeWidthValue(2.0), + ) + jobs_props["stroke"] = alt.condition( + job_highlight_selection, + alt.StrokeValue("gold"), + alt.Stroke("TuningJobName:N", legend=None) + if multiple_tuning_jobs + else alt.StrokeValue("white"), + ) + + opacity = alt.condition(brush, alt.value(1.0), alt.value(0.35)) + charts = [] + + # Min and max of the objective. This is used in filtered + # charts, so that the filtering does not make the axis + # jump, which would make comparisons harder. + objective_scale = alt.Scale( + domain=( + trials_df[objective_name].min(), + trials_df[objective_name].max(), + ) + ) + + # If we have multiple tuning jobs, we also want to be able + # to discriminate based on the individual tuning job, so + # we just treat them as an additional tuning parameter + tuning_parameters = tuning_parameters.copy() + if multiple_tuning_jobs: + tuning_parameters.append("TuningJobName") + + # If we use early stopping and at least some jobs were + # stopped early, we want to be able to discriminate + # those jobs. + if multiple_job_status: + tuning_parameters.append("TrainingJobStatus") + + def render_detail_charts(): + # To force a tuning job to sample a combination more than once, we + # sometimes introduce a hyperparameter that has no effect. + # It's values are random and without impact, so we omit it from analysis. + ignored_parameters = {"dummy"} + for tuning_parameter in tuning_parameters: + if tuning_parameter in ignored_parameters: + continue + + # Map dataframe's dtype to altair's types and + # adjust scale if necessary + scale_type = "linear" + scale_log_base = 10 + + few_values = len(trials_df[tuning_parameter].unique()) < 8 + parameter_type = "N" # Nominal + dtype = str(trials_df.dtypes[tuning_parameter]) + if "float" in dtype: + parameter_type = "Q" # Quantitative + ratio = (trials_df[tuning_parameter].max() + 1e-10) / ( + trials_df[tuning_parameter].min() + 1e-10 + ) + not_likely_discrete = ( + len(trials_df[tuning_parameter].unique()) > trials_df[tuning_parameter].count() + ) # edge case when both are equal + if few_values and not_likely_discrete: + if ratio > 50: + scale_type = "log" + elif ratio > 10: + scale_type = "log" + scale_log_base = 2 + + elif "int" in dtype or "object" in dtype: + parameter_type = "O" # Ordinal + + x_encoding = alt.X( + f"{tuning_parameter}:{parameter_type}", + scale=alt.Scale( + zero=False, + padding=1, + type=scale_type, + base=scale_log_base, + ), + ) + + # Sync the coloring for categorical hyperparameters + discrete = parameter_type in ["O", "N"] and few_values + + # Detail Chart + charts.append( + alt.Chart(trials_df) + .add_params(brush) + .add_params(job_highlight_selection) + .mark_point(filled=True, size=50) + .encode( + x=x_encoding, + y=alt.Y( + f"{objective_name}:Q", + scale=alt.Scale(zero=False, padding=1), + axis=alt.Axis(title=objective_name), + ), + opacity=opacity, + tooltip=detail_tooltip, + **jobs_props, + ) + ) + + if discrete: + # Individually coloring the values only if we don't already + # use the colors to show the different tuning jobs + logger.info(f"{parameter_type}, {tuning_parameter}") + if not multiple_tuning_jobs: + charts[-1] = charts[-1].encode(color=f"{tuning_parameter}:N") + charts[-1] = ( + ( + charts[-1] + | alt.Chart(trials_df) + .transform_filter(brush) + .transform_density( + objective_name, + bandwidth=0.01, + groupby=[tuning_parameter], + # https://github.com/vega/altair/issues/3203#issuecomment-2141558911 + # Specifying extent no longer necessary (>5.1.2). Leaving the work around in it for now. + extent=[ + trials_df[objective_name].min(), + trials_df[objective_name].max(), + ], + ) + .mark_area(opacity=0.5) + .encode( + x=alt.X( + "value:Q", + title=objective_name, + scale=objective_scale, + ), + y="density:Q", + color=alt.Color( + f"{tuning_parameter}:N", + ), + tooltip=tuning_parameter, + ) + ).properties(title=tuning_parameter) + # .resolve_scale("independent") + # .resolve_legend(color="independent") + ) + + if advanced and parameter_type == "Q": + # Adding tick marks to the detail charts with quantitative hyperparameters + x_enc = x_encoding.copy() + charts[-1].encoding.x.title = None + charts[-1].encoding.x.axis = alt.Axis(labels=False) + + charts[-1] = charts[-1] & alt.Chart(trials_df).mark_tick(opacity=0.5).encode( + x=x_enc, + opacity=alt.condition(brush, alt.value(0.5), alt.value(0.1)), + ) + + return _columnize(charts) + + detail_charts = render_detail_charts() + + # First Row + # Progress Over Time Chart + + def render_progress_chart(): + # Sorting trials by training start time, so that we can track the \ + # progress of the best objective so far over time + trials_df_by_tst = trials_df.sort_values(["TuningJobName", "TrainingStartTime"]) + trials_df_by_tst["cum_objective"] = trials_df_by_tst.groupby(["TuningJobName"]).transform( + lambda x: x.cummin() if minimize_objective else x.cummax() + )[objective_name] + + progress_chart = ( + alt.Chart(trials_df_by_tst) + .add_params(brush) + .add_params(job_highlight_selection) + .mark_point(filled=True, size=50) + .encode( + x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), + y=alt.Y( + f"{objective_name}:Q", + scale=alt.Scale(zero=False, padding=1), + axis=alt.Axis(title=objective_name), + ), + opacity=opacity, + tooltip=detail_tooltip, + **jobs_props, + ) + ) + + cum_obj_chart = ( + alt.Chart(trials_df_by_tst) + .mark_line( + interpolate="step-after", + opacity=1.0, + strokeDash=[3, 3], + strokeWidth=2.0, + ) + .encode( + x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), + y=alt.Y(f"cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), + stroke=alt.Stroke("TuningJobName:N", legend=None), + ) + ) + + if advanced: + return cum_obj_chart + progress_chart + else: + return progress_chart + + progress_chart = render_progress_chart() + + # First Row + # KDE Training Objective + result_hist_chart = ( + alt.Chart(trials_df) + .transform_filter(brush) + .transform_density(objective_name, bandwidth=0.01) + .mark_area() + .encode( + x=alt.X(f"value:Q", scale=objective_scale, title=objective_name), + y="density:Q", + ) + ) + # Training Jobs + training_jobs_chart = ( + alt.Chart(trials_df.sort_values(objective_name), title="Training Jobs") + .mark_bar() + .add_params(brush) + .add_params(job_highlight_selection) + .encode( + y=alt.Y(f"{objective_name}:Q"), + x=alt.X("TrainingJobName:N", sort=None), + color=alt.Color("TrainingJobName:N"), + opacity=opacity, + **jobs_props, + ) + ) + + # Job Level Stats + + training_job_name_encodings = { + "color": alt.condition( + brush, + alt.Color("TrainingJobName:N", legend=None), + alt.value("grey"), + ), + "opacity": alt.condition(brush, alt.value(1.0), alt.value(0.3)), + "strokeWidth": alt.condition(brush, alt.value(2.5), alt.value(0.8)), + } + + duration_format = "%M:%S" + metrics_tooltip = [ + "TrainingJobName:N", + "value:Q", + "label:N", + alt.Tooltip("ts:T", format="%e:%H:%M"), + alt.Tooltip("rel_ts:T", format="%e:%H:%M"), + ] + + job_level_rows = alt.HConcatChart() + + # Use CW metrics + if not full_df.empty: + # Objective Progression + + objective_progression_chart = None + # Suppress diagram if we only have one, final, value + if ( + full_df.loc[full_df.label == objective_name] + .groupby(["TuningJobName", "TrainingJobName"])[objective_name] + .count() + .max() + > 1 + ): + objective_progression_chart = ( + alt.Chart(full_df, title=f"Progression {objective_name}", width=400) + .transform_filter(alt.FieldEqualPredicate(field="label", equal=objective_name)) + .mark_line(point=True) + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y=alt.Y("value:Q", scale=alt.Scale(zero=False)), + **training_job_name_encodings, + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if multiple_job_status: + objective_progression_chart = objective_progression_chart.encode( + strokeDash=alt.StrokeDash("TrainingJobStatus:N", legend=None) + ) + + # Secondary chart showing the same contents, but by absolute time. + objective_progression_absolute_chart = objective_progression_chart.encode( + x=alt.X("ts:T", scale=alt.Scale(nice=True)) + ) + + objective_progression_chart = ( + objective_progression_chart | objective_progression_absolute_chart + ) + + ### + + job_metrics_charts = [] + for metric in job_metrics: + metric_chart = ( + alt.Chart(full_df, title=metric, width=400) + .transform_filter(alt.FieldEqualPredicate(field="label", equal=metric)) + .encode( + y=alt.Y("value:Q", scale=alt.Scale(zero=False)), + **training_job_name_encodings, + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if ( + full_df.loc[full_df.label == metric] + .groupby(["TuningJobName", "TrainingJobName"]) + .count() + .value.max() + == 1 + ): + # single value, render as a bar over the training jobs on the x-axis + metric_chart = metric_chart.encode( + x=alt.X("TrainingJobName:N", sort=None) + ).mark_bar(interpolate="linear", point=True) + else: + # multiple values, render the values over time on the x-axis + metric_chart = metric_chart.encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)) + ).mark_line(interpolate="linear", point=True) + + job_metrics_charts.append(metric_chart) + + job_metrics_chart = _columnize(job_metrics_charts, 3) + + # Job instance + # 'MemoryUtilization', 'CPUUtilization' + instance_metrics_chart = ( + alt.Chart(full_df, title="CPU and Memory") + .transform_filter( + alt.FieldOneOfPredicate( + field="label", + oneOf=[ + "MemoryUtilization", + "CPUUtilization", + ], + ) + ) + .mark_line() + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y="value:Q", + **training_job_name_encodings, + strokeDash=alt.StrokeDash("label:N", legend=alt.Legend(orient="bottom")), + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if "GPUUtilization" in full_df.label.values: + instance_metrics_chart = ( + instance_metrics_chart + | alt.Chart(full_df, title="GPU and GPU Memory") + .transform_filter( + alt.FieldOneOfPredicate( + field="label", + oneOf=[ + "GPUMemoryUtilization", + "GPUUtilization", + ], + ) + ) + .mark_line() + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y=alt.Y("value:Q"), + **training_job_name_encodings, + strokeDash=alt.StrokeDash("label:N", legend=alt.Legend(orient="bottom")), + tooltip=metrics_tooltip, + ) + .interactive() + ) + + job_level_rows = job_metrics_chart & instance_metrics_chart + if objective_progression_chart: + job_level_rows = objective_progression_chart & job_level_rows + job_level_rows = job_level_rows.resolve_scale(strokeDash="independent").properties( + title="Job / Instance Level Metrics" + ) + + overview_row = (progress_chart | result_hist_chart).properties( + title="Hyper Parameter Tuning Job" + ) + detail_rows = detail_charts.properties(title="Hyper Parameter Details") + if job_level_rows: + job_level_rows = training_jobs_chart & job_level_rows + + return overview_row & detail_rows & job_level_rows + + +# Ensure proper parameter name characters for altair 5+ +def _clean_parameter_name(s): + return s.replace(":", "_").replace(".", "_") + + +def _prepare_training_job_metrics(jobs): + df = pd.DataFrame() + for job_name, start_time, end_time in jobs: + job_df = get_cw_job_metrics( + job_name, + start_time=pd.Timestamp(start_time) - pd.DateOffset(hours=8), + end_time=pd.Timestamp(end_time) + pd.DateOffset(hours=8), + ) + if job_df is None: + logger.info(f"No CloudWatch metrics for {job_name}. Skipping.") + continue + + job_df["TrainingJobName"] = job_name + df = pd.concat([df, job_df]) + return df + + +def _prepare_consolidated_df(trials_df, objective_name): + if trials_df.empty: + return pd.DataFrame() + + logger.debug("Cache Hit/Miss: ", end="") + jobs_df = _prepare_training_job_metrics( + zip( + trials_df.TrainingJobName.values, + trials_df.TrainingStartTime.values, + trials_df.TrainingEndTime.values, + ) + ) + logger.info("") + + if jobs_df.empty: + return pd.DataFrame() + + merged_df = pd.merge(jobs_df, trials_df, on="TrainingJobName") + return merged_df + + +def _get_df(tuning_job_name, filter_out_stopped=False): + tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name) + + df = tuner.dataframe() + if df.empty: # HPO job just started; no results yet + return df + + df["TuningJobName"] = tuning_job_name + + # Filter out jobs without FinalObjectiveValue + df = df[df["FinalObjectiveValue"] > -float("inf")] + + # Jobs early stopped by AMT are reported with their last + # objective value, before they are stopped. + # However this value may not be a good representation + # of the eventual objective value we would have seen + # if run without stopping. Therefore it may be confusing + # to include those runs. + # For now, if included, we use a different mark to + # discriminate visually between a stopped and finished job + + if filter_out_stopped: + df = df[df["TrainingJobStatus"] != "Stopped"] + + # Preprocessing values for [32], [64] etc. + for tuning_range in tuner.tuning_ranges.values(): + parameter_name = tuning_range["Name"] + if df.dtypes[parameter_name] == "O": + try: + # Remove decorations, like [] + df[parameter_name] = df[parameter_name].apply( + lambda v: v.replace("[", "").replace("]", "").replace('"', "") + ) + + # Is it an int? 3 would work, 3.4 would fail. + try: + df[parameter_name] = df[parameter_name].astype(int) + except ValueError: + # A float then? + df[parameter_name] = df[parameter_name].astype(float) + + except Exception as e: + # Trouble, as this was not a number just pretending to be a string, but an actual string with charracters. Leaving the value untouched + # Ex: Caught exception could not convert string to float: 'sqrt' + pass + + return df + + +def _get_tuning_job_names_with_parents(tuning_job_names): + """Resolve dependent jobs, one level only""" + + all_tuning_job_names = [] + for tuning_job_name in tuning_job_names: + tuning_job_result = sm.describe_hyper_parameter_tuning_job( + HyperParameterTuningJobName=tuning_job_name + ) + + # find parent jobs and retrieve all tuner dataframes + parent_jobs = [] + if "WarmStartConfig" in tuning_job_result: + parent_jobs = [ + cfg["HyperParameterTuningJobName"] + for cfg in tuning_job_result["WarmStartConfig"]["ParentHyperParameterTuningJobs"] + ] + if parent_jobs: + logger.info(f'Tuning job {tuning_job_name}\'s parents: {", ".join(parent_jobs)}') + all_tuning_job_names.extend([tuning_job_name, *parent_jobs]) + + # return de-duplicated tuning job names + return list(set(all_tuning_job_names)) + + +def get_job_analytics_data(tuning_job_names): + if not isinstance(tuning_job_names, list): + tuning_job_names = [tuning_job_names] + + # Ensure to create a list of tuning job names (strings) + tuning_job_names = [ + tuning_job.describe()["HyperParameterTuningJobName"] + if isinstance(tuning_job, sagemaker.tuner.HyperparameterTuner) + else tuning_job + for tuning_job in tuning_job_names + ] + + # Maintain combined tuner dataframe from all tuning jobs + df = pd.DataFrame() + + # maintain objective, direction of optimization and tuned parameters + objective_name = None + is_minimize = None + tuned_parameters = None + + all_tuning_job_names = _get_tuning_job_names_with_parents(tuning_job_names) + + for tuning_job_name in all_tuning_job_names: + tuning_job_result = sm.describe_hyper_parameter_tuning_job( + HyperParameterTuningJobName=tuning_job_name + ) + status = tuning_job_result["HyperParameterTuningJobStatus"] + logger.info(f"Tuning job {tuning_job_name:25s} status: {status}") + + df = pd.concat([df, _get_df(tuning_job_name)]) + + # maintain objective and assure that all tuning jobs use the same + job_is_minimize = ( + tuning_job_result["HyperParameterTuningJobConfig"]["HyperParameterTuningJobObjective"][ + "Type" + ] + != "Maximize" + ) + job_objective_name = tuning_job_result["HyperParameterTuningJobConfig"][ + "HyperParameterTuningJobObjective" + ]["MetricName"] + job_tuned_parameters = [ + v["Name"] + for v in sagemaker.HyperparameterTuningJobAnalytics( + tuning_job_name + ).tuning_ranges.values() + ] + + if not objective_name: + objective_name = job_objective_name + is_minimize = job_is_minimize + tuned_parameters = job_tuned_parameters + else: + if ( + objective_name != job_objective_name + or is_minimize != job_is_minimize + or set(tuned_parameters) != set(job_tuned_parameters) + ): + raise ValueError( + "All tuning jobs must use the same objective and optimization direction." + ) + + if not df.empty: + # Cleanup wrongly encoded floats, e.g. containing quotes. + for i, dtype in enumerate(df.dtypes): + column_name = str(df.columns[i]) + if column_name in [ + "TrainingJobName", + "TrainingJobStatus", + "TuningJobName", + ]: + continue + if dtype == "object": + val = df[column_name].iloc[0] + if isinstance(val, str) and val.startswith('"'): + try: + df[column_name] = df[column_name].apply(lambda x: int(x.replace('"', ""))) + except: # noqa: E722 nosec b110 if we fail, we just continue with what we had + pass # Value is not an int, but a string + + df = df.sort_values("FinalObjectiveValue", ascending=is_minimize) + df[objective_name] = df.pop("FinalObjectiveValue") + + # Fix potential issue with dates represented as objects, instead of a timestamp + # This can in other cases lead to https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ + # Have only observed this for TrainingEndTime, but will be on the lookout dfor TrainingStartTime as well now + df["TrainingEndTime"] = pd.to_datetime(df["TrainingEndTime"]) + df["TrainingStartTime"] = pd.to_datetime(df["TrainingStartTime"]) + + logger.info("") + logger.info(f"Number of training jobs with valid objective: {len(df)}") + logger.info(f"Lowest: {min(df[objective_name])} Highest {max(df[objective_name])}") + + tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] + + return df, tuned_parameters, objective_name, is_minimize \ No newline at end of file diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 4b0f38f36f..00ae78b1ad 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2123,6 +2123,63 @@ def _add_estimator( delete_endpoint = removed_function("delete_endpoint") + @staticmethod + def visualize_jobs( + tuning_jobs: Union[str, 'sagemaker.tuner.HyperparameterTuner', List[Union[str, 'sagemaker.tuner.HyperparameterTuner']]], + return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False + ): + """Create an interactive visualization based on altair charts using the sagemaker.amtviz + package. + Args: + tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): One or more tuning jobs to create + visualization for. + return_dfs: (bool): Option to return trials and full dataframe. + job_metrics: (list[str]): Metrics to be used in charts. + trials_only: (bool): Whether to show trials only or full dataframe. + advanced: (bool): Show a cumulative step line in the progress over time chart. + Returns: + A collection of charts (altair.VConcatChart); or charts, trials_df (pandas.DataFrame), + full_df (pandas.DataFrame) if ``return_dfs=True``. + """ + try: + # Check if altair is installed + importlib.import_module('altair') + + except ImportError: + print("Altair is not installed. To use the visualization feature, please install Altair:") + print(" pip install altair") + print("After installing Altair, you can use the methods visualize_jobs or visualize_job.") + return None + + # If altair is installed, proceed with visualization + from sagemaker.amtviz import visualize_tuning_job + + return visualize_tuning_job( + tuning_jobs, + return_dfs=return_dfs, + job_metrics=job_metrics, + trials_only=trials_only, + advanced=advanced, + ) + + def visualize_job( + self, return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, trials_only: bool = False, advanced: bool = False + ): + """Convenience method on instance level for visualize_jobs(). + See static method visualize_jobs(). + """ + return HyperparameterTuner.visualize_jobs( + self, + return_dfs=return_dfs, + job_metrics=job_metrics, + trials_only=trials_only, + advanced=advanced, + ) + class _TuningJob(_Job): """Placeholder docstring""" diff --git a/tests/unit/test_tuner_visualize.py b/tests/unit/test_tuner_visualize.py new file mode 100644 index 0000000000..ea9835a408 --- /dev/null +++ b/tests/unit/test_tuner_visualize.py @@ -0,0 +1,303 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pandas as pd +import pytest +from mock import Mock, patch, MagicMock +import sagemaker +from sagemaker.estimator import Estimator +from sagemaker.session_settings import SessionSettings +from sagemaker.tuner import ( + HyperparameterTuner +) +from tests.unit.tuner_test_utils import ( + OBJECTIVE_METRIC_NAME, + HYPERPARAMETER_RANGES, + METRIC_DEFINITIONS +) +from sagemaker.session_settings import SessionSettings +# Visualization specific imports +from sagemaker.amtviz.visualization import visualize_tuning_job, get_job_analytics_data +from tests.unit.tuner_visualize_test_utils import ( + TUNING_JOB_NAMES, + TUNED_PARAMETERS, + OBJECTIVE_NAME, + TRIALS_DF_DATA, + FULL_DF_DATA, + TUNING_JOB_NAME_1, + TUNING_JOB_NAME_2, + TUNING_JOB_RESULT, + TRIALS_DF_COLUMNS, + FULL_DF_COLUMNS, + TRIALS_DF_TRAINING_JOB_NAMES, + TRIALS_DF_TRAINING_JOB_STATUSES, + TUNING_JOB_NAMES, + TRIALS_DF_VALID_F1_VALUES, + FILTERED_TUNING_JOB_DF_DATA, + TUNING_RANGES +) +import altair as alt + +def create_sagemaker_session(): + boto_mock = Mock(name="boto_session") + sms = Mock( + name="sagemaker_session", + boto_session=boto_mock, + config=None, + local_mode=False, + settings=SessionSettings() + ) + sms.sagemaker_config = {} + return sms + +@pytest.fixture() +def sagemaker_session(): + return create_sagemaker_session() + + +@pytest.fixture() +def estimator(sagemaker_session): + return Estimator( + "image", + "role", + 1, + "ml.c4.xlarge", + output_path="s3://bucket/prefix", + sagemaker_session=sagemaker_session, + ) + + +@pytest.fixture() +def tuner(estimator): + return HyperparameterTuner( + estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS + ) + +@pytest.fixture() +def tuner2(estimator): + return HyperparameterTuner( + estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS + ) + + +@pytest.fixture +def mock_visualize_tuning_job(): + with patch("sagemaker.amtviz.visualize_tuning_job") as mock_visualize: + mock_visualize.return_value = "mock_chart" + yield mock_visualize + + +@pytest.fixture +def mock_get_job_analytics_data(): + with patch("sagemaker.amtviz.visualization.get_job_analytics_data") as mock: + mock.return_value = ( + pd.DataFrame(TRIALS_DF_DATA), + TUNED_PARAMETERS, + OBJECTIVE_NAME, + True + ) + yield mock + + +@pytest.fixture +def mock_prepare_consolidated_df(): + with patch("sagemaker.amtviz.visualization._prepare_consolidated_df") as mock: + mock.return_value = pd.DataFrame(FULL_DF_DATA) + yield mock + + +# Test graceful handling if the required altair library is not installed +def test_visualize_jobs_altair_not_installed(capsys): + # Mock importlib.import_module to raise ImportError for 'altair' + with patch("importlib.import_module") as mock_import: + mock_import.side_effect = ImportError("No module named 'altair'") + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result is None + captured = capsys.readouterr() + assert "Altair is not installed." in captured.out + assert "pip install altair" in captured.out + + +# Test basic method call if altair is installed +def test_visualize_jobs_altair_installed(mock_visualize_tuning_job): + # Mock successful import of altair + with patch("importlib.import_module") as mock_import: + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result == "mock_chart" + + +# Test for static method visualize_jobs() +def test_visualize_jobs(mock_visualize_tuning_job): + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + TUNING_JOB_NAMES, + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # Vary the parameters and check if they have been passed correctly + result = HyperparameterTuner.visualize_jobs( + [TUNING_JOB_NAME_1], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + mock_visualize_tuning_job.assert_called_with( + [TUNING_JOB_NAME_1], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + +# Test the instance method visualize_job() on a stubbed tuner object +def test_visualize_job(tuner, mock_visualize_tuning_job): + # With default parameters + result = tuner.visualize_job() + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + tuner, + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # With varying parameters + result = tuner.visualize_job(return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_with( + tuner, + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + +# Test the static method visualize_jobs() on multiple stubbed tuner objects +def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): + result = HyperparameterTuner.visualize_jobs([tuner, tuner2]) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + [tuner, tuner2], + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # Vary the parameters and check if they have been passed correctly + result = HyperparameterTuner.visualize_jobs( + [[tuner, tuner2]], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + mock_visualize_tuning_job.assert_called_with( + [[tuner, tuner2]], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + +# Test direct method call for basic chart return type and default render settings +def test_visualize_tuning_job_analytics_data_results_in_altair_chart(mock_get_job_analytics_data): + result = visualize_tuning_job("mock_job") + assert alt.renderers.active == "default" + assert isinstance(result, alt.VConcatChart) + + +# Test the size and structure of the returned dataframes (trials_df and full_df) +def test_visualize_tuning_job_return_dfs(mock_get_job_analytics_data, mock_prepare_consolidated_df): + charts, trials_df, full_df = visualize_tuning_job("mock_job", return_dfs=True) + # Basic assertion for the charts + assert isinstance(charts, alt.VConcatChart) + + # Assertions for trials_df + assert isinstance(trials_df, pd.DataFrame) + assert trials_df.shape == (2, len(TRIALS_DF_COLUMNS)) + assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS + assert trials_df['TrainingJobName'].tolist() == TRIALS_DF_TRAINING_JOB_NAMES + assert trials_df['TrainingJobStatus'].tolist() == TRIALS_DF_TRAINING_JOB_STATUSES + assert trials_df['TuningJobName'].tolist() == TUNING_JOB_NAMES + assert trials_df['valid-f1'].tolist() == TRIALS_DF_VALID_F1_VALUES + + # Assertions for full_df + assert isinstance(full_df, pd.DataFrame) + assert full_df.shape == (2, 16) + assert full_df.columns.tolist() == FULL_DF_COLUMNS + + +# Test the handling of an an empty trials dataframe +@patch("sagemaker.amtviz.visualization.get_job_analytics_data") +def test_visualize_tuning_job_empty_trials(mock_get_job_analytics_data): + mock_get_job_analytics_data.return_value = ( + pd.DataFrame(), # empty dataframe + TUNED_PARAMETERS, + OBJECTIVE_NAME, + True + ) + charts = visualize_tuning_job("empty_job") + assert charts.empty + + +# Test handling of return_dfs and trials_only parameter +def test_visualize_tuning_job_trials_only(mock_get_job_analytics_data): + # If return_dfs is set to False, then only charts should be returned + result = visualize_tuning_job("mock_job", return_dfs=False, trials_only=True) + assert isinstance(result, alt.VConcatChart) + # Trials_only controls the content of the two returned dataframes (trials_df, full_df) + result, df1, df2 = visualize_tuning_job("mock_job", return_dfs=True, trials_only=True) + assert isinstance(df1, pd.DataFrame) + assert df1.shape == (2, len(TRIALS_DF_COLUMNS)) + assert isinstance(df2, pd.DataFrame) + assert df2.empty + # The combination of return_dfs and trials_only=False is covered in 'test_visualize_tuning_job_return_dfs' + + +# Check if all parameters are correctly passed to the (mocked) create_charts method +@patch("sagemaker.amtviz.visualization.create_charts") +def test_visualize_tuning_job_with_full_df(mock_create_charts, mock_get_job_analytics_data, mock_prepare_consolidated_df): + mock_create_charts.return_value = alt.Chart() + visualize_tuning_job("dummy_job") + + # Check the create_charts call arguments + call_args = mock_create_charts.call_args[0] + call_kwargs = mock_create_charts.call_args[1] + assert isinstance(call_args[0], pd.DataFrame) # trials_df + assert isinstance(call_args[1], list) # tuned_parameters + assert isinstance(call_args[2], pd.DataFrame) # full_df + assert isinstance(call_args[3], str) # objective_name + assert call_kwargs.get("minimize_objective") + + # Check the details of the passed arguments + trials_df = call_args[0] + assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS + tuned_parameters = call_args[1] + assert tuned_parameters == TUNED_PARAMETERS + objective_name = call_args[3] + assert objective_name == OBJECTIVE_NAME + full_df = call_args[2] + assert full_df.columns.tolist() == FULL_DF_COLUMNS + + +# Test the dataframe produced by get_job_analytics_data() +@patch("sagemaker.HyperparameterTuningJobAnalytics") +def test_get_job_analytics_data(mock_hyperparameter_tuning_job_analytics): + # Mock sagemaker's describe_hyper_parameter_tuning_job and some internal methods + sagemaker.amtviz.visualization.sm.describe_hyper_parameter_tuning_job = Mock(return_value=TUNING_JOB_RESULT) + sagemaker.amtviz.visualization._get_tuning_job_names_with_parents = Mock( + return_value=[TUNING_JOB_NAME_1, TUNING_JOB_NAME_2]) + sagemaker.amtviz.visualization._get_df = Mock(return_value=pd.DataFrame(FILTERED_TUNING_JOB_DF_DATA)) + mock_tuning_job_instance = MagicMock() + mock_hyperparameter_tuning_job_analytics.return_value = mock_tuning_job_instance + mock_tuning_job_instance.tuning_ranges.values.return_value = TUNING_RANGES + + df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data([TUNING_JOB_NAME_1]) + assert df.shape == (4, 12) + assert df.columns.tolist() == TRIALS_DF_COLUMNS + assert tuned_parameters == TUNED_PARAMETERS + assert objective_name == OBJECTIVE_NAME + assert is_minimize is False \ No newline at end of file diff --git a/tests/unit/tuner_visualize_test_utils.py b/tests/unit/tuner_visualize_test_utils.py new file mode 100644 index 0000000000..3f66794a00 --- /dev/null +++ b/tests/unit/tuner_visualize_test_utils.py @@ -0,0 +1,110 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +TRIALS_DF_COLUMNS = [ + 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobName', + 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' +] + +FULL_DF_COLUMNS = [ + 'value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', + 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', + 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' +] + + +TRIALS_DF_TRAINING_JOB_NAMES = [ + 'random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1' +] + +TRIALS_DF_TRAINING_JOB_STATUSES = ['Completed', 'Completed'] + +TUNING_JOB_NAME_1 = 'random-240712-1500' +TUNING_JOB_NAME_2 = 'bayesian-240712-1600' +TUNING_JOB_NAMES = [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] +TRIALS_DF_VALID_F1_VALUES = [0.950, 0.896] + +FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', + 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] + + +TUNED_PARAMETERS = ['n-estimators', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'criterion'] +OBJECTIVE_NAME = 'valid-f1' + +TRIALS_DF_DATA = { + 'criterion': ['gini', 'log_loss'], + 'max-depth': [18.0, 8.0], + 'min-samples-leaf': [3.0, 10.0], + 'min-weight-fraction-leaf': [0.011596, 0.062067], + 'n-estimators': [110.0, 18.0], + 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1'], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:56:50+02:00'], + 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:57:29+02:00'], + 'TrainingElapsedTimeSeconds': [44.0, 39.0], + 'TuningJobName': TUNING_JOB_NAMES, + 'valid-f1': [0.950, 0.896] +} + +FULL_DF_DATA = { + 'value': [0.951000, 0.950000], + 'ts': ['2024-07-12 15:56:00', '2024-07-12 15:56:00'], + 'label': ['valid-precision', 'valid-recall'], + 'rel_ts': ['1970-01-01 01:00:00', '1970-01-01 01:00:00'], + 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-019-4ac17a84'], + 'criterion': ['gini', 'gini'], + 'max-depth': [18.0, 18.0], + 'min-samples-leaf': [3.0, 3.0], + 'min-weight-fraction-leaf': [0.011596, 0.011596], + 'n-estimators': [110.0, 110.0], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:55:59+02:00'], + 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:56:43+02:00'], + 'TrainingElapsedTimeSeconds': [44.0, 45.0], + 'TuningJobName': ['random-240712-1545', 'random-240712-1545'], + 'valid-f1': [0.9500, 0.9500] +} + +FILTERED_TUNING_JOB_DF_DATA = { + 'criterion': ['log_loss', 'gini'], + 'max-depth': [10.0, 16.0], + 'min-samples-leaf': [7.0, 2.0], + 'min-weight-fraction-leaf': [0.160910, 0.069803], + 'n-estimators': [67.0, 79.0], + 'TrainingJobName': ['random-240712-1545-050-c0b5c10a', 'random-240712-1545-049-2db2ec05'], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'FinalObjectiveValue': [0.8190, 0.8910], + 'TrainingStartTime': ['2024-07-12 18:09:48+02:00', '2024-07-12 18:09:45+02:00'], + 'TrainingEndTime': ['2024-07-12 18:10:28+02:00', '2024-07-12 18:10:23+02:00'], + 'TrainingElapsedTimeSeconds': [40.0, 38.0], + 'TuningJobName': [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] +} + +TUNING_RANGES = [{'Name': 'n-estimators', 'MinValue': '1', 'MaxValue': '200', 'ScalingType': 'Auto'}, + {'Name': 'max-depth', 'MinValue': '1', 'MaxValue': '20', 'ScalingType': 'Auto'}, + {'Name': 'min-samples-leaf', 'MinValue': '1', 'MaxValue': '10', 'ScalingType': 'Auto'}, + {'Name': 'min-weight-fraction-leaf', 'MinValue': '0.01', 'MaxValue': '0.5', 'ScalingType': 'Auto'}, + {'Name': 'criterion', 'Values': ['"gini"', '"entropy"', '"log_loss"']}] + + +TUNING_JOB_RESULT = { + 'HyperParameterTuningJobName': TUNING_JOB_NAME_1, + 'HyperParameterTuningJobConfig': { + 'Strategy': 'Random', + 'HyperParameterTuningJobObjective': { + 'Type': 'Maximize', + 'MetricName': 'valid-f1' + } + }, + 'HyperParameterTuningJobStatus': 'Completed', +} \ No newline at end of file diff --git a/tox.ini b/tox.ini index b16c0d2f0b..21e7248da2 100644 --- a/tox.ini +++ b/tox.ini @@ -86,6 +86,7 @@ commands = pip install 'torch==2.0.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html' pip install 'torchvision==0.15.2+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html' pip install 'dill>=0.3.8' + pip install 'altair>=5.3' # needed for amtviz pytest {posargs} deps = .[test] From f006bd08ddadbd74c0a19b6b28a055d29fd2bef9 Mon Sep 17 00:00:00 2001 From: pintaoz-aws <167920275+pintaoz-aws@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:58:06 -0800 Subject: [PATCH 02/13] Move RecordSerializer and RecordDeserializer to sagemaker.serializers and sagemaker.deserialzers (#5037) * Move RecordSerializer and RecordDeserializer to sagemaker.serializers and sagemaker.deserializers * fix codestyle * fix test --------- Co-authored-by: pintaoz --- doc/v2.rst | 4 +- src/sagemaker/amazon/common.py | 72 ------------------- .../amazon/factorization_machines.py | 3 +- src/sagemaker/amazon/kmeans.py | 3 +- src/sagemaker/amazon/knn.py | 3 +- src/sagemaker/amazon/lda.py | 3 +- src/sagemaker/amazon/linear_learner.py | 3 +- src/sagemaker/amazon/ntm.py | 3 +- src/sagemaker/amazon/pca.py | 3 +- src/sagemaker/amazon/randomcutforest.py | 3 +- src/sagemaker/base_deserializers.py | 29 ++++++++ src/sagemaker/base_serializers.py | 37 ++++++++++ .../cli/compatibility/v2/modifiers/serde.py | 20 +++--- src/sagemaker/deserializers.py | 5 ++ src/sagemaker/serializers.py | 5 ++ .../compatibility/v2/modifiers/test_serde.py | 24 +++---- tests/unit/test_common.py | 4 +- 17 files changed, 114 insertions(+), 110 deletions(-) diff --git a/doc/v2.rst b/doc/v2.rst index 0677594b31..bca663af33 100644 --- a/doc/v2.rst +++ b/doc/v2.rst @@ -324,9 +324,9 @@ The follow serializer/deserializer classes have been renamed and/or moved: +--------------------------------------------------------+-------------------------------------------------------+ | ``sagemaker.predictor._NPYSerializer`` | ``sagemaker.serializers.NumpySerializer`` | +--------------------------------------------------------+-------------------------------------------------------+ -| ``sagemaker.amazon.common.numpy_to_record_serializer`` | ``sagemaker.amazon.common.RecordSerializer`` | +| ``sagemaker.amazon.common.numpy_to_record_serializer`` | ``sagemaker.serializers.RecordSerializer`` | +--------------------------------------------------------+-------------------------------------------------------+ -| ``sagemaker.amazon.common.record_deserializer`` | ``sagemaker.amazon.common.RecordDeserializer`` | +| ``sagemaker.amazon.common.record_deserializer`` | ``sagemaker.deserializers.RecordDeserializer`` | +--------------------------------------------------------+-------------------------------------------------------+ | ``sagemaker.predictor._JsonDeserializer`` | ``sagemaker.deserializers.JSONDeserializer`` | +--------------------------------------------------------+-------------------------------------------------------+ diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 4632bda628..96a931084c 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -13,7 +13,6 @@ """Placeholder docstring""" from __future__ import absolute_import -import io import logging import struct import sys @@ -21,76 +20,9 @@ import numpy as np from sagemaker.amazon.record_pb2 import Record -from sagemaker.deprecations import deprecated_class -from sagemaker.deserializers import SimpleBaseDeserializer -from sagemaker.serializers import SimpleBaseSerializer from sagemaker.utils import DeferredError -class RecordSerializer(SimpleBaseSerializer): - """Serialize a NumPy array for an inference request.""" - - def __init__(self, content_type="application/x-recordio-protobuf"): - """Initialize a ``RecordSerializer`` instance. - - Args: - content_type (str): The MIME type to signal to the inference endpoint when sending - request data (default: "application/x-recordio-protobuf"). - """ - super(RecordSerializer, self).__init__(content_type=content_type) - - def serialize(self, data): - """Serialize a NumPy array into a buffer containing RecordIO records. - - Args: - data (numpy.ndarray): The data to serialize. - - Returns: - io.BytesIO: A buffer containing the data serialized as records. - """ - if len(data.shape) == 1: - data = data.reshape(1, data.shape[0]) - - if len(data.shape) != 2: - raise ValueError( - "Expected a 1D or 2D array, but got a %dD array instead." % len(data.shape) - ) - - buffer = io.BytesIO() - write_numpy_to_dense_tensor(buffer, data) - buffer.seek(0) - - return buffer - - -class RecordDeserializer(SimpleBaseDeserializer): - """Deserialize RecordIO Protobuf data from an inference endpoint.""" - - def __init__(self, accept="application/x-recordio-protobuf"): - """Initialize a ``RecordDeserializer`` instance. - - Args: - accept (union[str, tuple[str]]): The MIME type (or tuple of allowable MIME types) that - is expected from the inference endpoint (default: - "application/x-recordio-protobuf"). - """ - super(RecordDeserializer, self).__init__(accept=accept) - - def deserialize(self, data, content_type): - """Deserialize RecordIO Protobuf data from an inference endpoint. - - Args: - data (object): The protobuf message to deserialize. - content_type (str): The MIME type of the data. - Returns: - list: A list of records. - """ - try: - return read_records(data) - finally: - data.close() - - def _write_feature_tensor(resolved_type, record, vector): """Placeholder Docstring""" if resolved_type == "Int32": @@ -288,7 +220,3 @@ def _resolve_type(dtype): if dtype == np.dtype("float32"): return "Float32" raise ValueError("Unsupported dtype {} on array".format(dtype)) - - -numpy_to_record_serializer = deprecated_class(RecordSerializer, "numpy_to_record_serializer") -record_deserializer = deprecated_class(RecordDeserializer, "record_deserializer") diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py index 2b24356ee9..1149cd02b2 100644 --- a/src/sagemaker/amazon/factorization_machines.py +++ b/src/sagemaker/amazon/factorization_machines.py @@ -17,11 +17,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import gt, isin, ge +from sagemaker.deserializers import RecordDeserializer from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py index 144cdc934a..25abb9cb27 100644 --- a/src/sagemaker/amazon/kmeans.py +++ b/src/sagemaker/amazon/kmeans.py @@ -17,11 +17,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import gt, isin, ge, le +from sagemaker.deserializers import RecordDeserializer from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/knn.py b/src/sagemaker/amazon/knn.py index f9c73381b4..89ec979e09 100644 --- a/src/sagemaker/amazon/knn.py +++ b/src/sagemaker/amazon/knn.py @@ -17,11 +17,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import ge, isin +from sagemaker.deserializers import RecordDeserializer from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/lda.py b/src/sagemaker/amazon/lda.py index bd64d3ae2e..c57da9643e 100644 --- a/src/sagemaker/amazon/lda.py +++ b/src/sagemaker/amazon/lda.py @@ -18,11 +18,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer +from sagemaker.deserializers import RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import gt from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index 695eb31dc1..4533dcdaea 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -18,11 +18,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer +from sagemaker.deserializers import RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import isin, gt, lt, ge, le from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/ntm.py b/src/sagemaker/amazon/ntm.py index 4267ac8969..41dde1c33c 100644 --- a/src/sagemaker/amazon/ntm.py +++ b/src/sagemaker/amazon/ntm.py @@ -17,11 +17,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import ge, le, isin +from sagemaker.deserializers import RecordDeserializer from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py index 953fff9d0b..b724435afa 100644 --- a/src/sagemaker/amazon/pca.py +++ b/src/sagemaker/amazon/pca.py @@ -17,11 +17,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import gt, isin +from sagemaker.deserializers import RecordDeserializer from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/amazon/randomcutforest.py b/src/sagemaker/amazon/randomcutforest.py index 21d98741b0..d60d5a7741 100644 --- a/src/sagemaker/amazon/randomcutforest.py +++ b/src/sagemaker/amazon/randomcutforest.py @@ -17,11 +17,12 @@ from sagemaker import image_uris from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase -from sagemaker.amazon.common import RecordSerializer, RecordDeserializer from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.amazon.validation import ge, le +from sagemaker.deserializers import RecordDeserializer from sagemaker.predictor import Predictor from sagemaker.model import Model +from sagemaker.serializers import RecordSerializer from sagemaker.session import Session from sagemaker.utils import pop_out_unused_kwarg from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT diff --git a/src/sagemaker/base_deserializers.py b/src/sagemaker/base_deserializers.py index a152f0144d..f811ff4e57 100644 --- a/src/sagemaker/base_deserializers.py +++ b/src/sagemaker/base_deserializers.py @@ -23,6 +23,7 @@ import numpy as np from six import with_metaclass +from sagemaker.amazon.common import read_records from sagemaker.utils import DeferredError try: @@ -388,3 +389,31 @@ def deserialize(self, stream, content_type="tensor/pt"): "Unable to deserialize your data to torch.Tensor.\ Please provide custom deserializer in InferenceSpec." ) + + +class RecordDeserializer(SimpleBaseDeserializer): + """Deserialize RecordIO Protobuf data from an inference endpoint.""" + + def __init__(self, accept="application/x-recordio-protobuf"): + """Initialize a ``RecordDeserializer`` instance. + + Args: + accept (union[str, tuple[str]]): The MIME type (or tuple of allowable MIME types) that + is expected from the inference endpoint (default: + "application/x-recordio-protobuf"). + """ + super(RecordDeserializer, self).__init__(accept=accept) + + def deserialize(self, data, content_type): + """Deserialize RecordIO Protobuf data from an inference endpoint. + + Args: + data (object): The protobuf message to deserialize. + content_type (str): The MIME type of the data. + Returns: + list: A list of records. + """ + try: + return read_records(data) + finally: + data.close() diff --git a/src/sagemaker/base_serializers.py b/src/sagemaker/base_serializers.py index 45fea23493..e5232ca160 100644 --- a/src/sagemaker/base_serializers.py +++ b/src/sagemaker/base_serializers.py @@ -22,6 +22,7 @@ from pandas import DataFrame from six import with_metaclass +from sagemaker.amazon.common import write_numpy_to_dense_tensor from sagemaker.utils import DeferredError try: @@ -466,3 +467,39 @@ def serialize(self, data): ) raise ValueError("Object of type %s is not a torch.Tensor" % type(data)) + + +class RecordSerializer(SimpleBaseSerializer): + """Serialize a NumPy array for an inference request.""" + + def __init__(self, content_type="application/x-recordio-protobuf"): + """Initialize a ``RecordSerializer`` instance. + + Args: + content_type (str): The MIME type to signal to the inference endpoint when sending + request data (default: "application/x-recordio-protobuf"). + """ + super(RecordSerializer, self).__init__(content_type=content_type) + + def serialize(self, data): + """Serialize a NumPy array into a buffer containing RecordIO records. + + Args: + data (numpy.ndarray): The data to serialize. + + Returns: + io.BytesIO: A buffer containing the data serialized as records. + """ + if len(data.shape) == 1: + data = data.reshape(1, data.shape[0]) + + if len(data.shape) != 2: + raise ValueError( + "Expected a 1D or 2D array, but got a %dD array instead." % len(data.shape) + ) + + buffer = io.BytesIO() + write_numpy_to_dense_tensor(buffer, data) + buffer.seek(0) + + return buffer diff --git a/src/sagemaker/cli/compatibility/v2/modifiers/serde.py b/src/sagemaker/cli/compatibility/v2/modifiers/serde.py index 0e2aabbec4..54bccba55e 100644 --- a/src/sagemaker/cli/compatibility/v2/modifiers/serde.py +++ b/src/sagemaker/cli/compatibility/v2/modifiers/serde.py @@ -51,8 +51,8 @@ "StreamDeserializer": ("sagemaker.deserializers",), "NumpyDeserializer": ("sagemaker.deserializers",), "JSONDeserializer": ("sagemaker.deserializers",), - "RecordSerializer ": ("sagemaker.amazon.common",), - "RecordDeserializer": ("sagemaker.amazon.common",), + "RecordSerializer ": ("sagemaker.serializers",), + "RecordDeserializer": ("sagemaker.deserializers",), } OLD_CLASS_NAME_TO_NEW_CLASS_NAME = { @@ -101,8 +101,8 @@ def node_should_be_modified(self, node): - ``sagemaker.predictor.StreamDeserializer`` - ``sagemaker.predictor._NumpyDeserializer`` - ``sagemaker.predictor._JsonDeserializer`` - - ``sagemaker.amazon.common.numpy_to_record_serializer`` - - ``sagemaker.amazon.common.record_deserializer`` + - ``sagemaker.serializers.numpy_to_record_serializer`` + - ``sagemaker.deserializers.record_deserializer`` Args: node (ast.Call): a node that represents a function call. For more, @@ -128,8 +128,8 @@ def modify_node(self, node): - ``sagemaker.deserializers.StreamDeserializer`` - ``sagemaker.deserializers.NumpyDeserializer`` - ``sagemaker.deserializers._JsonDeserializer`` - - ``sagemaker.amazon.common.RecordSerializer`` - - ``sagemaker.amazon.common.RecordDeserializer`` + - ``sagemaker.serializers.RecordSerializer`` + - ``sagemaker.deserializers.RecordDeserializer`` Args: node (ast.Call): a node that represents a SerDe constructor. @@ -303,8 +303,8 @@ def node_should_be_modified(self, node): """Checks if the import statement imports a SerDe from the ``sagemaker.amazon.common``. This checks for: - - ``sagemaker.amazon.common.numpy_to_record_serializer`` - - ``sagemaker.amazon.common.record_deserializer`` + - ``sagemaker.serializers.numpy_to_record_serializer`` + - ``sagemaker.deserializers.record_deserializer`` Args: node (ast.ImportFrom): a node that represents a ``from ... import ... `` statement. @@ -322,8 +322,8 @@ def modify_node(self, node): """Upgrades the ``numpy_to_record_serializer`` and ``record_deserializer`` imports. This upgrades the classes to (if applicable): - - ``sagemaker.amazon.common.RecordSerializer`` - - ``sagemaker.amazon.common.RecordDeserializer`` + - ``sagemaker.serializers.RecordSerializer`` + - ``sagemaker.deserializers.RecordDeserializer`` Args: node (ast.ImportFrom): a node that represents a ``from ... import ... `` statement. diff --git a/src/sagemaker/deserializers.py b/src/sagemaker/deserializers.py index 957a9dfb0c..dad5137329 100644 --- a/src/sagemaker/deserializers.py +++ b/src/sagemaker/deserializers.py @@ -31,8 +31,10 @@ StreamDeserializer, StringDeserializer, TorchTensorDeserializer, + RecordDeserializer, ) +from sagemaker.deprecations import deprecated_class from sagemaker.jumpstart import artifacts, utils as jumpstart_utils from sagemaker.jumpstart.constants import DEFAULT_JUMPSTART_SAGEMAKER_SESSION from sagemaker.jumpstart.enums import JumpStartModelType @@ -150,3 +152,6 @@ def retrieve_default( model_type=model_type, config_name=config_name, ) + + +record_deserializer = deprecated_class(RecordDeserializer, "record_deserializer") diff --git a/src/sagemaker/serializers.py b/src/sagemaker/serializers.py index ef502dc6f3..be46be0856 100644 --- a/src/sagemaker/serializers.py +++ b/src/sagemaker/serializers.py @@ -30,8 +30,10 @@ SparseMatrixSerializer, TorchTensorSerializer, StringSerializer, + RecordSerializer, ) +from sagemaker.deprecations import deprecated_class from sagemaker.jumpstart import artifacts, utils as jumpstart_utils from sagemaker.jumpstart.constants import DEFAULT_JUMPSTART_SAGEMAKER_SESSION from sagemaker.jumpstart.enums import JumpStartModelType @@ -152,3 +154,6 @@ def retrieve_default( model_type=model_type, config_name=config_name, ) + + +numpy_to_record_serializer = deprecated_class(RecordSerializer, "numpy_to_record_serializer") diff --git a/tests/unit/sagemaker/cli/compatibility/v2/modifiers/test_serde.py b/tests/unit/sagemaker/cli/compatibility/v2/modifiers/test_serde.py index 4c93e18939..5d32030580 100644 --- a/tests/unit/sagemaker/cli/compatibility/v2/modifiers/test_serde.py +++ b/tests/unit/sagemaker/cli/compatibility/v2/modifiers/test_serde.py @@ -75,12 +75,12 @@ def test_constructor_node_should_be_modified(src, expected): ("sagemaker.predictor._NumpyDeserializer()", "deserializers.NumpyDeserializer()"), ("sagemaker.predictor._JsonDeserializer()", "deserializers.JSONDeserializer()"), ( - "sagemaker.amazon.common.numpy_to_record_serializer()", - "sagemaker.amazon.common.RecordSerializer()", + "sagemaker.serializers.numpy_to_record_serializer()", + "sagemaker.serializers.RecordSerializer()", ), ( - "sagemaker.amazon.common.record_deserializer()", - "sagemaker.amazon.common.RecordDeserializer()", + "sagemaker.deserializers.record_deserializer()", + "sagemaker.deserializers.RecordDeserializer()", ), ("_CsvSerializer()", "serializers.CSVSerializer()"), ("_JsonSerializer()", "serializers.JSONSerializer()"), @@ -265,20 +265,12 @@ def test_import_from_amazon_common_node_should_be_modified(import_statement, exp "import_statement, expected", [ ( - "from sagemaker.amazon.common import numpy_to_record_serializer", - "from sagemaker.amazon.common import RecordSerializer", + "from sagemaker.serializers import numpy_to_record_serializer", + "from sagemaker.serializers import RecordSerializer", ), ( - "from sagemaker.amazon.common import record_deserializer", - "from sagemaker.amazon.common import RecordDeserializer", - ), - ( - "from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer", - "from sagemaker.amazon.common import RecordSerializer, RecordDeserializer", - ), - ( - "from sagemaker.amazon.common import write_spmatrix_to_sparse_tensor, numpy_to_record_serializer", - "from sagemaker.amazon.common import write_spmatrix_to_sparse_tensor, RecordSerializer", + "from sagemaker.deserializers import record_deserializer", + "from sagemaker.deserializers import RecordDeserializer", ), ], ) diff --git a/tests/unit/test_common.py b/tests/unit/test_common.py index 8fe7383fe4..9fe49ad448 100644 --- a/tests/unit/test_common.py +++ b/tests/unit/test_common.py @@ -16,12 +16,12 @@ import tempfile import pytest import itertools +from sagemaker.deserializers import RecordDeserializer +from sagemaker.serializers import RecordSerializer from scipy.sparse import coo_matrix from sagemaker.amazon.common import ( - RecordDeserializer, write_numpy_to_dense_tensor, read_recordio, - RecordSerializer, write_spmatrix_to_sparse_tensor, ) from sagemaker.amazon.record_pb2 import Record From 0e18d2fda959916e4cebd8d56ac9d51e771bcb11 Mon Sep 17 00:00:00 2001 From: pintaoz-aws <167920275+pintaoz-aws@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:58:21 -0800 Subject: [PATCH 03/13] Add framework_version to all TensorFlowModel examples (#5038) * Add framework_version to all TensorFlowModel examples * update framework_version to x.x.x --------- Co-authored-by: pintaoz --- .../tensorflow/deploying_tensorflow_serving.rst | 4 ++-- doc/frameworks/tensorflow/using_tf.rst | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/frameworks/tensorflow/deploying_tensorflow_serving.rst b/doc/frameworks/tensorflow/deploying_tensorflow_serving.rst index 1d7344fbbb..a645cd5a62 100644 --- a/doc/frameworks/tensorflow/deploying_tensorflow_serving.rst +++ b/doc/frameworks/tensorflow/deploying_tensorflow_serving.rst @@ -64,7 +64,7 @@ If you already have existing model artifacts in S3, you can skip training and de from sagemaker.tensorflow import TensorFlowModel - model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') + model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole', framework_version='x.x.x') predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge') @@ -74,7 +74,7 @@ Python-based TensorFlow serving on SageMaker has support for `Elastic Inference from sagemaker.tensorflow import TensorFlowModel - model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') + model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole', framework_version='x.x.x') predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge', accelerator_type='ml.eia1.medium') diff --git a/doc/frameworks/tensorflow/using_tf.rst b/doc/frameworks/tensorflow/using_tf.rst index 979e86d8b6..5b888f95be 100644 --- a/doc/frameworks/tensorflow/using_tf.rst +++ b/doc/frameworks/tensorflow/using_tf.rst @@ -468,7 +468,7 @@ If you already have existing model artifacts in S3, you can skip training and de from sagemaker.tensorflow import TensorFlowModel - model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') + model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole', framework_version='x.x.x') predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge') @@ -478,7 +478,7 @@ Python-based TensorFlow serving on SageMaker has support for `Elastic Inference from sagemaker.tensorflow import TensorFlowModel - model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') + model = TensorFlowModel(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole', framework_version='x.x.x') predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge', accelerator_type='ml.eia1.medium') @@ -767,7 +767,8 @@ This customized Python code must be named ``inference.py`` and is specified thro model = TensorFlowModel(entry_point='inference.py', model_data='s3://mybucket/model.tar.gz', - role='MySageMakerRole') + role='MySageMakerRole', + framework_version='x.x.x') In the example above, ``inference.py`` is assumed to be a file inside ``model.tar.gz``. If you want to use a local file instead, you must add the ``source_dir`` argument. See the documentation on `TensorFlowModel `_. @@ -923,7 +924,8 @@ processing. There are 2 ways to do this: model = TensorFlowModel(entry_point='inference.py', dependencies=['requirements.txt'], model_data='s3://mybucket/model.tar.gz', - role='MySageMakerRole') + role='MySageMakerRole', + framework_version='x.x.x') 2. If you are working in a network-isolation situation or if you don't @@ -941,7 +943,8 @@ processing. There are 2 ways to do this: model = TensorFlowModel(entry_point='inference.py', dependencies=['/path/to/folder/named/lib'], model_data='s3://mybucket/model.tar.gz', - role='MySageMakerRole') + role='MySageMakerRole', + framework_version='x.x.x') For more information, see: https://github.com/aws/sagemaker-tensorflow-serving-container#prepost-processing From 1ca90d61f8cead3b76fcfa4b5ae20f6e9ba19fdf Mon Sep 17 00:00:00 2001 From: "parknate@" Date: Mon, 17 Feb 2025 16:23:18 -0800 Subject: [PATCH 04/13] Fix hyperparameter strategy docs (#5045) --- src/sagemaker/tuner.py | 46 ++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 00ae78b1ad..17468b5593 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -18,21 +18,20 @@ import inspect import json import logging - from enum import Enum -from typing import Union, Dict, Optional, List, Set +from typing import Dict, List, Optional, Set, Union import sagemaker from sagemaker.amazon.amazon_estimator import ( - RecordSet, AmazonAlgorithmEstimatorBase, FileSystemRecordSet, + RecordSet, ) from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.analytics import HyperparameterTuningJobAnalytics from sagemaker.deprecations import removed_function -from sagemaker.estimator import Framework, EstimatorBase -from sagemaker.inputs import TrainingInput, FileSystemInput +from sagemaker.estimator import EstimatorBase, Framework +from sagemaker.inputs import FileSystemInput, TrainingInput from sagemaker.job import _Job from sagemaker.jumpstart.utils import ( add_jumpstart_uri_tags, @@ -44,18 +43,17 @@ IntegerParameter, ParameterRange, ) -from sagemaker.workflow.entities import PipelineVariable -from sagemaker.workflow.pipeline_context import runnable_by_pipeline - from sagemaker.session import Session from sagemaker.utils import ( + Tags, base_from_name, base_name_from_image, + format_tags, name_from_base, to_string, - format_tags, - Tags, ) +from sagemaker.workflow.entities import PipelineVariable +from sagemaker.workflow.pipeline_context import runnable_by_pipeline AMAZON_ESTIMATOR_MODULE = "sagemaker" AMAZON_ESTIMATOR_CLS_NAMES = { @@ -133,15 +131,12 @@ def __init__( if warm_start_type not in list(WarmStartTypes): raise ValueError( - "Invalid type: {}, valid warm start types are: {}".format( - warm_start_type, list(WarmStartTypes) - ) + f"Invalid type: {warm_start_type}, " + f"valid warm start types are: {list(WarmStartTypes)}" ) if not parents: - raise ValueError( - "Invalid parents: {}, parents should not be None/empty".format(parents) - ) + raise ValueError(f"Invalid parents: {parents}, parents should not be None/empty") self.type = warm_start_type self.parents = set(parents) @@ -1455,9 +1450,7 @@ def _get_best_training_job(self): return tuning_job_describe_result["BestTrainingJob"] except KeyError: raise Exception( - "Best training job not available for tuning job: {}".format( - self.latest_tuning_job.name - ) + f"Best training job not available for tuning job: {self.latest_tuning_job.name}" ) def _ensure_last_tuning_job(self): @@ -1920,8 +1913,11 @@ def create( :meth:`~sagemaker.tuner.HyperparameterTuner.fit` method launches. If not specified, a default job name is generated, based on the training image name and current timestamp. - strategy (str): Strategy to be used for hyperparameter estimations - (default: 'Bayesian'). + strategy (str or PipelineVariable): Strategy to be used for hyperparameter estimations. + More information about different strategies: + https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-how-it-works.html. + Available options are: 'Bayesian', 'Random', 'Hyperband', + 'Grid' (default: 'Bayesian') strategy_config (dict): The configuration for a training job launched by a hyperparameter tuning job. completion_criteria_config (dict): The configuration for tuning job completion criteria. @@ -2080,21 +2076,19 @@ def _validate_dict_argument(cls, name, value, allowed_keys, require_same_keys=Fa return if not isinstance(value, dict): - raise ValueError( - "Argument '{}' must be a dictionary using {} as keys".format(name, allowed_keys) - ) + raise ValueError(f"Argument '{name}' must be a dictionary using {allowed_keys} as keys") value_keys = sorted(value.keys()) if require_same_keys: if value_keys != allowed_keys: raise ValueError( - "The keys of argument '{}' must be the same as {}".format(name, allowed_keys) + f"The keys of argument '{name}' must be the same as {allowed_keys}" ) else: if not set(value_keys).issubset(set(allowed_keys)): raise ValueError( - "The keys of argument '{}' must be a subset of {}".format(name, allowed_keys) + f"The keys of argument '{name}' must be a subset of {allowed_keys}" ) def _add_estimator( From 7709396cbdcd95c3ac0bf87971405386f66e01b3 Mon Sep 17 00:00:00 2001 From: timkuo-amazon Date: Tue, 18 Feb 2025 13:55:05 -0500 Subject: [PATCH 05/13] fix: pass in inference_ami_version to model_based endpoint type (#5043) * fix: pass in inference_ami_version to model_based endpoint type * documentation: update contributing.md w/ venv instructions and pip install fixes --------- Co-authored-by: Zhaoqi --- CONTRIBUTING.md | 8 ++++++-- src/sagemaker/model.py | 4 ++++ tests/unit/sagemaker/model/test_deploy.py | 5 +++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 24226af4ee..65b7c0ee0c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,6 +61,10 @@ Before sending us a pull request, please ensure that: 1. Follow the instructions at [Modifying an EBS Volume Using Elastic Volumes (Console)](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/requesting-ebs-volume-modifications.html#modify-ebs-volume) to increase the EBS volume size associated with the newly created EC2 instance. 1. Wait 5-10min for the new EBS volume increase to finalize. 1. Allow EC2 to claim the additional space by stopping and then starting your EC2 host. +2. Set up a venv to manage dependencies: + 1. `python -m venv ~/.venv/myproject-env` to create the venv + 2. `source ~/.venv/myproject-env/bin/activate` to activate the venv + 3. `deactivate` to exit the venv ### Pull Down the Code @@ -74,8 +78,8 @@ Before sending us a pull request, please ensure that: ### Run the Unit Tests 1. Install tox using `pip install tox` -1. Install coverage using `pip install .[test]` -1. cd into the sagemaker-python-sdk folder: `cd sagemaker-python-sdk` or `cd /environment/sagemaker-python-sdk` +1. cd into the github project sagemaker-python-sdk folder: `cd sagemaker-python-sdk` or `cd /environment/sagemaker-python-sdk` +1. Install coverage using `pip install '.[test]'` 1. Run the following tox command and verify that all code checks and unit tests pass: `tox tests/unit` 1. You can also run a single test with the following command: `tox -e py310 -- -s -vv ::` 1. You can run coverage via runcvoerage env : `tox -e runcoverage -- tests/unit` or `tox -e py310 -- tests/unit --cov=sagemaker --cov-append --cov-report xml` diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 5494bf5e22..5cc260f3ef 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -1492,6 +1492,9 @@ def deploy( } model_reference_arn (Optional [str]): Hub Content Arn of a Model Reference type content (default: None). + inference_ami_version (Optional [str]): Specifies an option from a collection of preconfigured + Amazon Machine Image (AMI) images. For a full list of options, see: + https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html Raises: ValueError: If arguments combination check failed in these circumstances: - If no role is specified or @@ -1743,6 +1746,7 @@ def deploy( model_data_download_timeout=model_data_download_timeout, container_startup_health_check_timeout=container_startup_health_check_timeout, routing_config=routing_config, + inference_ami_version=inference_ami_version, ) if endpoint_name: self.endpoint_name = endpoint_name diff --git a/tests/unit/sagemaker/model/test_deploy.py b/tests/unit/sagemaker/model/test_deploy.py index 6bfb28f684..7b99281b96 100644 --- a/tests/unit/sagemaker/model/test_deploy.py +++ b/tests/unit/sagemaker/model/test_deploy.py @@ -130,6 +130,7 @@ def test_deploy(name_from_base, prepare_container_def, production_variant, sagem model_data_download_timeout=None, container_startup_health_check_timeout=None, routing_config=None, + inference_ami_version=None, ) sagemaker_session.create_model.assert_called_with( @@ -192,6 +193,7 @@ def test_deploy_accelerator_type( model_data_download_timeout=None, container_startup_health_check_timeout=None, routing_config=None, + inference_ami_version=None, ) sagemaker_session.endpoint_from_production_variants.assert_called_with( @@ -519,6 +521,7 @@ def test_deploy_serverless_inference(production_variant, create_sagemaker_model, model_data_download_timeout=None, container_startup_health_check_timeout=None, routing_config=None, + inference_ami_version=None, ) sagemaker_session.endpoint_from_production_variants.assert_called_with( @@ -956,6 +959,7 @@ def test_deploy_customized_volume_size_and_timeout( model_data_download_timeout=model_data_download_timeout_sec, container_startup_health_check_timeout=startup_health_check_timeout_sec, routing_config=None, + inference_ami_version=None, ) sagemaker_session.create_model.assert_called_with( @@ -1006,6 +1010,7 @@ def test_deploy_with_resources(sagemaker_session, name_from_base, production_var model_data_download_timeout=None, container_startup_health_check_timeout=None, routing_config=None, + inference_ami_version=None, ) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=name_from_base(MODEL_NAME), From 29271ac183bba389ff1dc8263a6e9d18375efd22 Mon Sep 17 00:00:00 2001 From: pintaoz-aws <167920275+pintaoz-aws@users.noreply.github.com> Date: Tue, 18 Feb 2025 12:59:11 -0800 Subject: [PATCH 06/13] Add warning about not supporting torch.nn.SyncBatchNorm (#5046) * Add warning about not supporting * update wording --------- Co-authored-by: pintaoz --- doc/frameworks/pytorch/using_pytorch.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/frameworks/pytorch/using_pytorch.rst b/doc/frameworks/pytorch/using_pytorch.rst index d415f38c27..c50376920e 100644 --- a/doc/frameworks/pytorch/using_pytorch.rst +++ b/doc/frameworks/pytorch/using_pytorch.rst @@ -375,6 +375,9 @@ To initialize distributed training in your script, call `torch.distributed.init_process_group `_ with the desired backend and the rank of the current host. +Warning: Some torch features, such as (and likely not limited to) ``torch.nn.SyncBatchNorm`` +is not supported and its existence in ``init_process_group`` will cause an exception during +distributed training. .. code:: python From 68921da8083f524a0e0b3991a85ffc5b8df9ab2d Mon Sep 17 00:00:00 2001 From: ci Date: Tue, 18 Feb 2025 23:31:28 +0000 Subject: [PATCH 07/13] prepare release v2.239.2 --- CHANGELOG.md | 10 ++++++++++ VERSION | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cbc9799b8..f55704e324 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## v2.239.2 (2025-02-18) + +### Bug Fixes and Other Changes + + * Add warning about not supporting torch.nn.SyncBatchNorm + * pass in inference_ami_version to model_based endpoint type + * Fix hyperparameter strategy docs + * Add framework_version to all TensorFlowModel examples + * Move RecordSerializer and RecordDeserializer to sagemaker.serializers and sagemaker.deserialzers + ## v2.239.1 (2025-02-14) ### Bug Fixes and Other Changes diff --git a/VERSION b/VERSION index 85465416f3..861206c067 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.239.2.dev0 +2.239.2 From fbb92b4bc593721f9e07e0ce7019e3cae2c65fe1 Mon Sep 17 00:00:00 2001 From: ci Date: Tue, 18 Feb 2025 23:31:32 +0000 Subject: [PATCH 08/13] update development version to v2.239.3.dev0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 861206c067..69500f5e46 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.239.2 +2.239.3.dev0 From f4a02f634f3545167037b76c9cae55bef13aa581 Mon Sep 17 00:00:00 2001 From: sagemaker-bot Date: Wed, 19 Feb 2025 14:18:15 +0000 Subject: [PATCH 09/13] change: update image_uri_configs 02-19-2025 06:18:15 PST --- .../image_uri_config/tensorflow.json | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/src/sagemaker/image_uri_config/tensorflow.json b/src/sagemaker/image_uri_config/tensorflow.json index 52c70d4021..37fa7ee46d 100644 --- a/src/sagemaker/image_uri_config/tensorflow.json +++ b/src/sagemaker/image_uri_config/tensorflow.json @@ -641,6 +641,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -656,6 +657,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -682,6 +684,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -697,6 +700,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -723,6 +727,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -738,6 +743,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -764,6 +770,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -779,6 +786,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -805,6 +813,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -820,6 +829,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -846,6 +856,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -861,6 +872,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -887,6 +899,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -902,6 +915,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -928,6 +942,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -943,6 +958,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -969,6 +985,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -984,6 +1001,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1010,6 +1028,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1025,6 +1044,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1051,6 +1071,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1066,6 +1087,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1092,6 +1114,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1107,6 +1130,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1133,6 +1157,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1148,6 +1173,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1174,6 +1200,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1189,6 +1216,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1215,6 +1243,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1230,6 +1259,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1256,6 +1286,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1271,6 +1302,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1297,6 +1329,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1312,6 +1345,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1338,6 +1372,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1353,6 +1388,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1379,6 +1415,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1394,6 +1431,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1420,6 +1458,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1435,6 +1474,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1461,6 +1501,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1476,6 +1517,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1502,6 +1544,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1517,6 +1560,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1543,6 +1587,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1558,6 +1603,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1584,6 +1630,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1599,6 +1646,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1625,6 +1673,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1640,6 +1689,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1666,6 +1716,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1681,6 +1732,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1707,6 +1759,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1722,6 +1775,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1748,6 +1802,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1763,6 +1818,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1789,6 +1845,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1804,6 +1861,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1830,6 +1888,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1845,6 +1904,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1871,6 +1931,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1886,6 +1947,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1912,6 +1974,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1927,6 +1990,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1953,6 +2017,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -1968,6 +2033,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -1994,6 +2060,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2009,6 +2076,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2035,6 +2103,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2050,6 +2119,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2076,6 +2146,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2091,6 +2162,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2117,6 +2189,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2132,6 +2205,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2158,6 +2232,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2173,6 +2248,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2201,6 +2277,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2216,6 +2293,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2244,6 +2322,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2259,6 +2338,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2283,6 +2363,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2298,6 +2379,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2342,6 +2424,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2357,6 +2440,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2389,6 +2473,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2404,6 +2489,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2436,6 +2522,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2451,6 +2538,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2483,6 +2571,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2498,6 +2587,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2530,6 +2620,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2545,6 +2636,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -2983,6 +3075,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -2998,6 +3091,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3028,6 +3122,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3043,6 +3138,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3074,6 +3170,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3089,6 +3186,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3120,6 +3218,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3135,6 +3234,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3166,6 +3266,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3181,6 +3282,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3212,6 +3314,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3227,6 +3330,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3257,6 +3361,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3272,6 +3377,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3302,6 +3408,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3317,6 +3424,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3347,6 +3455,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3362,6 +3471,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3392,6 +3502,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3407,6 +3518,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3437,6 +3549,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3452,6 +3565,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3482,6 +3596,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3497,6 +3612,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3527,6 +3643,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3542,6 +3659,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3572,6 +3690,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3587,6 +3706,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3617,6 +3737,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3632,6 +3753,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3661,6 +3783,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3676,6 +3799,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3705,6 +3829,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3720,6 +3845,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3749,6 +3875,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3764,6 +3891,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3793,6 +3921,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3808,6 +3937,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3837,6 +3967,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3852,6 +3983,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3881,6 +4013,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3896,6 +4029,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3925,6 +4059,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3940,6 +4075,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -3969,6 +4105,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -3984,6 +4121,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4013,6 +4151,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4028,6 +4167,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4057,6 +4197,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4072,6 +4213,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4101,6 +4243,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4116,6 +4259,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4145,6 +4289,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4160,6 +4305,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4189,6 +4335,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4204,6 +4351,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4233,6 +4381,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4248,6 +4397,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4277,6 +4427,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4292,6 +4443,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4321,6 +4473,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4336,6 +4489,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4365,6 +4519,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4380,6 +4535,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4409,6 +4565,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4424,6 +4581,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4453,6 +4611,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4468,6 +4627,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4495,6 +4655,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4510,6 +4671,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4541,6 +4703,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4556,6 +4719,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4587,6 +4751,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4602,6 +4767,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", @@ -4629,6 +4795,7 @@ "ap-southeast-3": "907027046896", "ap-southeast-4": "457447274322", "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", "ca-central-1": "763104351884", "ca-west-1": "204538143572", "cn-north-1": "727897471807", @@ -4644,6 +4811,7 @@ "il-central-1": "780543022126", "me-central-1": "914824155844", "me-south-1": "217643126080", + "mx-central-1": "637423239942", "sa-east-1": "763104351884", "us-east-1": "763104351884", "us-east-2": "763104351884", From 4aba04a125bcb1586f0b051de373e2decdd868e9 Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Fri, 25 Apr 2025 20:17:23 +0200 Subject: [PATCH 10/13] fix: codestyle, type hints, license, and docstrings --- src/sagemaker/amtviz/__init__.py | 28 ++--- src/sagemaker/amtviz/job_metrics.py | 55 ++++----- src/sagemaker/amtviz/visualization.py | 142 +++++++++++++++-------- src/sagemaker/tuner.py | 14 ++- tests/unit/test_tuner_visualize.py | 21 +++- tests/unit/tuner_visualize_test_utils.py | 48 ++++++-- 6 files changed, 201 insertions(+), 107 deletions(-) diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py index 9e6dd1a64b..2b05e7b0cf 100644 --- a/src/sagemaker/amtviz/__init__.py +++ b/src/sagemaker/amtviz/__init__.py @@ -1,17 +1,17 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Placeholder docstring""" +from __future__ import absolute_import from sagemaker.amtviz.visualization import visualize_tuning_job -__all__ = ['visualize_tuning_job'] \ No newline at end of file +__all__ = ['visualize_tuning_job'] diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py index 6005f886f8..f84457f9da 100644 --- a/src/sagemaker/amtviz/job_metrics.py +++ b/src/sagemaker/amtviz/job_metrics.py @@ -1,18 +1,17 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -# IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Helper functions to retrieve job metrics from CloudWatch.""" +from __future__ import absolute_import from datetime import datetime, timedelta from typing import Callable, List, Optional, Tuple, Dict, Any @@ -20,10 +19,10 @@ import os from pathlib import Path +import logging import pandas as pd import numpy as np import boto3 -import logging logger = logging.getLogger(__name__) @@ -58,7 +57,8 @@ def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: logger.debug("H", end="") df["ts"] = pd.to_datetime(df["ts"]) df["ts"] = df["ts"].dt.tz_localize(None) - df["rel_ts"] = pd.to_datetime(df["rel_ts"]) # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + df["rel_ts"] = pd.to_datetime(df["rel_ts"]) df["rel_ts"] = df["rel_ts"].dt.tz_localize(None) return df except KeyError: @@ -66,8 +66,7 @@ def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: pass # nosec b110 - doesn't matter why we could not load it. except BaseException as e: - logger.error("\nException", type(e), e) - pass # continue with calling the outer function + logger.error("\nException: %s - %s", type(e), e) logger.debug("M", end="") df = outer(*args, **kwargs) @@ -82,6 +81,7 @@ def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> Dict[str, Any]: + """Returns a CloudWatch metric data query template.""" return { "Id": metric_name.lower().replace(":", "_").replace("-", "_"), "MetricStat": { @@ -100,10 +100,11 @@ def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> D def _get_metric_data( - queries: List[Dict[str, Any]], - start_time: datetime, + queries: List[Dict[str, Any]], + start_time: datetime, end_time: datetime ) -> pd.DataFrame: + """Fetches CloudWatch metrics between timestamps and returns a DataFrame with selected columns.""" start_time = start_time - timedelta(hours=1) end_time = end_time + timedelta(hours=1) response = cw.get_metric_data(MetricDataQueries=queries, StartTime=start_time, EndTime=end_time) @@ -111,7 +112,7 @@ def _get_metric_data( df = pd.DataFrame() if "MetricDataResults" not in response: return df - + for metric_data in response["MetricDataResults"]: values = metric_data["Values"] ts = np.array(metric_data["Timestamps"], dtype=np.datetime64) @@ -130,11 +131,11 @@ def _get_metric_data( @disk_cache def _collect_metrics( - dimensions: List[Tuple[str, str]], - start_time: datetime, + dimensions: List[Tuple[str, str]], + start_time: datetime, end_time: Optional[datetime] ) -> pd.DataFrame: - + """Collects SageMaker training job metrics from CloudWatch based on given dimensions and time range.""" df = pd.DataFrame() for dim_name, dim_value in dimensions: response = cw.list_metrics( @@ -158,8 +159,8 @@ def _collect_metrics( def get_cw_job_metrics( - job_name: str, - start_time: Optional[datetime] = None, + job_name: str, + start_time: Optional[datetime] = None, end_time: Optional[datetime] = None ) -> pd.DataFrame: """Retrieves CloudWatch metrics for a SageMaker training job. @@ -182,4 +183,4 @@ def get_cw_job_metrics( # If not given, use reasonable defaults for start and end time start_time = start_time or datetime.now() - timedelta(hours=4) end_time = end_time or start_time + timedelta(hours=4) - return _collect_metrics(dimensions, start_time, end_time) \ No newline at end of file + return _collect_metrics(dimensions, start_time, end_time) diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py index 377a19304d..bb66195771 100644 --- a/src/sagemaker/amtviz/visualization.py +++ b/src/sagemaker/amtviz/visualization.py @@ -1,28 +1,56 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -import sagemaker -import boto3 -from typing import Union, List, Optional, Tuple, Dict, Any -import altair as alt -import pandas as pd -import numpy as np +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +""" +This module provides visualization capabilities for SageMaker hyperparameter tuning jobs. + +It contains utilities to create interactive visualizations of hyperparameter tuning results +using Altair charts. The module enables users to analyze and understand the performance +of their hyperparameter optimization experiments through various visual representations +including: +- Progress of objective metrics over time +- Distribution of results +- Relationship between hyperparameters and objective values +- Training job metrics and instance utilization +- Comparative analysis across multiple tuning jobs + +Main Features: + - Visualize single or multiple hyperparameter tuning jobs + - Display training job metrics from CloudWatch + - Support for both completed and in-progress tuning jobs + - Interactive filtering and highlighting of data points + - CPU, memory, and GPU utilization visualization + - Advanced visualization options for detailed analysis + +Primary Classes and Functions: + - visualize_tuning_job: Main function to create visualizations for tuning jobs + - create_charts: Core chart creation functionality + - get_job_analytics_data: Retrieves and processes tuning job data + +Dependencies: + - altair: For creating interactive visualizations + - pandas: For data manipulation and analysis + - boto3: For AWS service interaction + - sagemaker: For accessing SageMaker resources +""" +from __future__ import absolute_import + +from typing import Union, List, Optional, Tuple import os import warnings import logging +import altair as alt +import pandas as pd +import numpy as np +import boto3 +import sagemaker from sagemaker.amtviz.job_metrics import get_cw_job_metrics warnings.filterwarnings("ignore") @@ -36,7 +64,7 @@ alt.data_transformers.disable_max_rows() altair_renderer = os.getenv("ALTAIR_RENDERER", "default") -logger.info(f"Setting altair renderer to {altair_renderer}.") +logger.info("Setting altair renderer to %s.", altair_renderer) alt.renderers.enable(altair_renderer) @@ -44,6 +72,7 @@ def _columnize(charts: List[alt.Chart], cols: int = 2) -> alt.VConcatChart: + """Arrange charts in columns.""" return alt.vconcat(*[alt.hconcat(*charts[i : i + cols]) for i in range(0, len(charts), cols)]) @@ -72,7 +101,7 @@ def visualize_tuning_job( trials_df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data(tuning_jobs) try: - from IPython import get_ipython + from IPython import get_ipython, display if get_ipython(): # Running in a Jupyter Notebook display(trials_df.head(10)) @@ -84,7 +113,7 @@ def visualize_tuning_job( logger.info(trials_df.head(10).to_string()) full_df = ( - _prepare_consolidated_df(trials_df, objective_name) if not trials_only else pd.DataFrame() + _prepare_consolidated_df(trials_df) if not trials_only else pd.DataFrame() ) trials_df.columns = trials_df.columns.map(_clean_parameter_name) @@ -104,8 +133,7 @@ def visualize_tuning_job( if return_dfs: return charts, trials_df, full_df - else: - return charts + return charts def create_charts( @@ -212,9 +240,7 @@ def create_charts( # If we have multiple tuning jobs, we also want to be able # to discriminate based on the individual tuning job, so # we just treat them as an additional tuning parameter - tuning_parameters = tuning_parameters.copy() - if multiple_tuning_jobs: - tuning_parameters.append("TuningJobName") + tuning_parameters = tuning_parameters.copy() + (["TuningJobName"] if multiple_tuning_jobs else []) # If we use early stopping and at least some jobs were # stopped early, we want to be able to discriminate @@ -292,7 +318,7 @@ def render_detail_charts(): if discrete: # Individually coloring the values only if we don't already # use the colors to show the different tuning jobs - logger.info(f"{parameter_type}, {tuning_parameter}") + logger.info("%s, %s", parameter_type, tuning_parameter) if not multiple_tuning_jobs: charts[-1] = charts[-1].encode(color=f"{tuning_parameter}:N") charts[-1] = ( @@ -383,15 +409,14 @@ def render_progress_chart(): ) .encode( x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), - y=alt.Y(f"cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), + y=alt.Y("cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), stroke=alt.Stroke("TuningJobName:N", legend=None), ) ) if advanced: return cum_obj_chart + progress_chart - else: - return progress_chart + return progress_chart progress_chart = render_progress_chart() @@ -403,7 +428,7 @@ def render_progress_chart(): .transform_density(objective_name, bandwidth=0.01) .mark_area() .encode( - x=alt.X(f"value:Q", scale=objective_scale, title=objective_name), + x=alt.X("value:Q", scale=objective_scale, title=objective_name), y="density:Q", ) ) @@ -586,12 +611,20 @@ def render_progress_chart(): return overview_row & detail_rows & job_level_rows -# Ensure proper parameter name characters for altair 5+ def _clean_parameter_name(s): + """ Helper method to ensure proper parameter name characters for altair 5+ """ return s.replace(":", "_").replace(".", "_") def _prepare_training_job_metrics(jobs): + """Fetches and combines CloudWatch metrics for multiple training jobs. + + Args: + jobs (list): List of (job_name, start_time, end_time) tuples. + + Returns: + pandas.DataFrame: Combined metrics DataFrame with 'TrainingJobName' column. + """ df = pd.DataFrame() for job_name, start_time, end_time in jobs: job_df = get_cw_job_metrics( @@ -600,7 +633,7 @@ def _prepare_training_job_metrics(jobs): end_time=pd.Timestamp(end_time) + pd.DateOffset(hours=8), ) if job_df is None: - logger.info(f"No CloudWatch metrics for {job_name}. Skipping.") + logger.info("No CloudWatch metrics for %s. Skipping.", job_name) continue job_df["TrainingJobName"] = job_name @@ -608,7 +641,8 @@ def _prepare_training_job_metrics(jobs): return df -def _prepare_consolidated_df(trials_df, objective_name): +def _prepare_consolidated_df(trials_df): + """Merges training job metrics with trials data into a consolidated DataFrame.""" if trials_df.empty: return pd.DataFrame() @@ -630,6 +664,9 @@ def _prepare_consolidated_df(trials_df, objective_name): def _get_df(tuning_job_name, filter_out_stopped=False): + """Retrieves hyperparameter tuning job results and returns preprocessed DataFrame with + tuning metrics and parameters.""" + tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name) df = tuner.dataframe() @@ -670,8 +707,9 @@ def _get_df(tuning_job_name, filter_out_stopped=False): # A float then? df[parameter_name] = df[parameter_name].astype(float) - except Exception as e: - # Trouble, as this was not a number just pretending to be a string, but an actual string with charracters. Leaving the value untouched + except Exception: + # Trouble, as this was not a number just pretending to be a string, but an actual string with + # characters. Leaving the value untouched # Ex: Caught exception could not convert string to float: 'sqrt' pass @@ -695,7 +733,7 @@ def _get_tuning_job_names_with_parents(tuning_job_names): for cfg in tuning_job_result["WarmStartConfig"]["ParentHyperParameterTuningJobs"] ] if parent_jobs: - logger.info(f'Tuning job {tuning_job_name}\'s parents: {", ".join(parent_jobs)}') + logger.info("Tuning job %s's parents: %s", tuning_job_name, ", ".join(parent_jobs)) all_tuning_job_names.extend([tuning_job_name, *parent_jobs]) # return de-duplicated tuning job names @@ -703,6 +741,17 @@ def _get_tuning_job_names_with_parents(tuning_job_names): def get_job_analytics_data(tuning_job_names): + """Retrieves and processes analytics data from hyperparameter tuning jobs. + + Args: + tuning_job_names (str or list): Single tuning job name or list of names/tuner objects. + + Returns: + tuple: (DataFrame with training results, tuned parameters list, objective name, is_minimize flag). + + Raises: + ValueError: If tuning jobs have different objectives or optimization directions. + """ if not isinstance(tuning_job_names, list): tuning_job_names = [tuning_job_names] @@ -729,7 +778,7 @@ def get_job_analytics_data(tuning_job_names): HyperParameterTuningJobName=tuning_job_name ) status = tuning_job_result["HyperParameterTuningJobStatus"] - logger.info(f"Tuning job {tuning_job_name:25s} status: {status}") + logger.info("Tuning job %-25s status: %s", tuning_job_name, status) df = pd.concat([df, _get_df(tuning_job_name)]) @@ -786,15 +835,16 @@ def get_job_analytics_data(tuning_job_names): df[objective_name] = df.pop("FinalObjectiveValue") # Fix potential issue with dates represented as objects, instead of a timestamp - # This can in other cases lead to https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ + # This can in other cases lead to: + # https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ # Have only observed this for TrainingEndTime, but will be on the lookout dfor TrainingStartTime as well now df["TrainingEndTime"] = pd.to_datetime(df["TrainingEndTime"]) df["TrainingStartTime"] = pd.to_datetime(df["TrainingStartTime"]) logger.info("") - logger.info(f"Number of training jobs with valid objective: {len(df)}") - logger.info(f"Lowest: {min(df[objective_name])} Highest {max(df[objective_name])}") + logger.info("Number of training jobs with valid objective: %d", len(df)) + logger.info("Lowest: %s Highest %s", min(df[objective_name]), max(df[objective_name])) tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] - return df, tuned_parameters, objective_name, is_minimize \ No newline at end of file + return df, tuned_parameters, objective_name, is_minimize diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 17468b5593..35d468feeb 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2119,7 +2119,10 @@ def _add_estimator( @staticmethod def visualize_jobs( - tuning_jobs: Union[str, 'sagemaker.tuner.HyperparameterTuner', List[Union[str, 'sagemaker.tuner.HyperparameterTuner']]], + tuning_jobs: Union[ + str, 'sagemaker.tuner.HyperparameterTuner', + List[Union[str, 'sagemaker.tuner.HyperparameterTuner']] + ], return_dfs: bool = False, job_metrics: Optional[List[str]] = None, trials_only: bool = False, @@ -2128,7 +2131,8 @@ def visualize_jobs( """Create an interactive visualization based on altair charts using the sagemaker.amtviz package. Args: - tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): One or more tuning jobs to create + tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): + One or more tuning jobs to create visualization for. return_dfs: (bool): Option to return trials and full dataframe. job_metrics: (list[str]): Metrics to be used in charts. @@ -2160,8 +2164,10 @@ def visualize_jobs( ) def visualize_job( - self, return_dfs: bool = False, - job_metrics: Optional[List[str]] = None, trials_only: bool = False, advanced: bool = False + self, return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False ): """Convenience method on instance level for visualize_jobs(). See static method visualize_jobs(). diff --git a/tests/unit/test_tuner_visualize.py b/tests/unit/test_tuner_visualize.py index ea9835a408..8f17f4d2db 100644 --- a/tests/unit/test_tuner_visualize.py +++ b/tests/unit/test_tuner_visualize.py @@ -10,6 +10,9 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Tests related to amtviz.visualization""" +from __future__ import absolute_import + import pandas as pd import pytest from mock import Mock, patch, MagicMock @@ -24,7 +27,6 @@ HYPERPARAMETER_RANGES, METRIC_DEFINITIONS ) -from sagemaker.session_settings import SessionSettings # Visualization specific imports from sagemaker.amtviz.visualization import visualize_tuning_job, get_job_analytics_data from tests.unit.tuner_visualize_test_utils import ( @@ -40,13 +42,13 @@ FULL_DF_COLUMNS, TRIALS_DF_TRAINING_JOB_NAMES, TRIALS_DF_TRAINING_JOB_STATUSES, - TUNING_JOB_NAMES, TRIALS_DF_VALID_F1_VALUES, FILTERED_TUNING_JOB_DF_DATA, TUNING_RANGES ) import altair as alt + def create_sagemaker_session(): boto_mock = Mock(name="boto_session") sms = Mock( @@ -59,6 +61,7 @@ def create_sagemaker_session(): sms.sagemaker_config = {} return sms + @pytest.fixture() def sagemaker_session(): return create_sagemaker_session() @@ -82,6 +85,7 @@ def tuner(estimator): estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS ) + @pytest.fixture() def tuner2(estimator): return HyperparameterTuner( @@ -130,7 +134,7 @@ def test_visualize_jobs_altair_not_installed(capsys): # Test basic method call if altair is installed def test_visualize_jobs_altair_installed(mock_visualize_tuning_job): # Mock successful import of altair - with patch("importlib.import_module") as mock_import: + with patch("importlib.import_module"): result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) assert result == "mock_chart" @@ -157,6 +161,7 @@ def test_visualize_jobs(mock_visualize_tuning_job): advanced=True ) + # Test the instance method visualize_job() on a stubbed tuner object def test_visualize_job(tuner, mock_visualize_tuning_job): # With default parameters @@ -180,6 +185,7 @@ def test_visualize_job(tuner, mock_visualize_tuning_job): advanced=True ) + # Test the static method visualize_jobs() on multiple stubbed tuner objects def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): result = HyperparameterTuner.visualize_jobs([tuner, tuner2]) @@ -202,6 +208,7 @@ def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): advanced=True ) + # Test direct method call for basic chart return type and default render settings def test_visualize_tuning_job_analytics_data_results_in_altair_chart(mock_get_job_analytics_data): result = visualize_tuning_job("mock_job") @@ -259,7 +266,11 @@ def test_visualize_tuning_job_trials_only(mock_get_job_analytics_data): # Check if all parameters are correctly passed to the (mocked) create_charts method @patch("sagemaker.amtviz.visualization.create_charts") -def test_visualize_tuning_job_with_full_df(mock_create_charts, mock_get_job_analytics_data, mock_prepare_consolidated_df): +def test_visualize_tuning_job_with_full_df( + mock_create_charts, + mock_get_job_analytics_data, + mock_prepare_consolidated_df +): mock_create_charts.return_value = alt.Chart() visualize_tuning_job("dummy_job") @@ -300,4 +311,4 @@ def test_get_job_analytics_data(mock_hyperparameter_tuning_job_analytics): assert df.columns.tolist() == TRIALS_DF_COLUMNS assert tuned_parameters == TUNED_PARAMETERS assert objective_name == OBJECTIVE_NAME - assert is_minimize is False \ No newline at end of file + assert is_minimize is False diff --git a/tests/unit/tuner_visualize_test_utils.py b/tests/unit/tuner_visualize_test_utils.py index 3f66794a00..17a993717c 100644 --- a/tests/unit/tuner_visualize_test_utils.py +++ b/tests/unit/tuner_visualize_test_utils.py @@ -10,10 +10,12 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +from __future__ import absolute_import TRIALS_DF_COLUMNS = [ 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobName', - 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' + 'TrainingJobStatus', + 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' ] FULL_DF_COLUMNS = [ @@ -34,9 +36,9 @@ TUNING_JOB_NAMES = [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] TRIALS_DF_VALID_F1_VALUES = [0.950, 0.896] -FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', - 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] - +FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', + 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', + 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] TUNED_PARAMETERS = ['n-estimators', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'criterion'] OBJECTIVE_NAME = 'valid-f1' @@ -90,12 +92,36 @@ 'TuningJobName': [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] } -TUNING_RANGES = [{'Name': 'n-estimators', 'MinValue': '1', 'MaxValue': '200', 'ScalingType': 'Auto'}, - {'Name': 'max-depth', 'MinValue': '1', 'MaxValue': '20', 'ScalingType': 'Auto'}, - {'Name': 'min-samples-leaf', 'MinValue': '1', 'MaxValue': '10', 'ScalingType': 'Auto'}, - {'Name': 'min-weight-fraction-leaf', 'MinValue': '0.01', 'MaxValue': '0.5', 'ScalingType': 'Auto'}, - {'Name': 'criterion', 'Values': ['"gini"', '"entropy"', '"log_loss"']}] - +TUNING_RANGES = [ + { + 'Name': 'n-estimators', + 'MinValue': '1', + 'MaxValue': '200', + 'ScalingType': 'Auto' + }, + { + 'Name': 'max-depth', + 'MinValue': '1', + 'MaxValue': '20', + 'ScalingType': 'Auto' + }, + { + 'Name': 'min-samples-leaf', + 'MinValue': '1', + 'MaxValue': '10', + 'ScalingType': 'Auto' + }, + { + 'Name': 'min-weight-fraction-leaf', + 'MinValue': '0.01', + 'MaxValue': '0.5', + 'ScalingType': 'Auto' + }, + { + 'Name': 'criterion', + 'Values': ['"gini"', '"entropy"', '"log_loss"'] + } +] TUNING_JOB_RESULT = { 'HyperParameterTuningJobName': TUNING_JOB_NAME_1, @@ -107,4 +133,4 @@ } }, 'HyperParameterTuningJobStatus': 'Completed', -} \ No newline at end of file +} From 4b7180d74b111882374f1dcfe695cf99a562d54c Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Fri, 23 May 2025 09:25:29 +0200 Subject: [PATCH 11/13] documentation: add docstring for amtviz module --- src/sagemaker/amtviz/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py index 2b05e7b0cf..4cf92c7268 100644 --- a/src/sagemaker/amtviz/__init__.py +++ b/src/sagemaker/amtviz/__init__.py @@ -10,7 +10,16 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -"""Placeholder docstring""" +"""Amazon SageMaker Automatic Model Tuning Visualization module. + +This module provides visualization capabilities for SageMaker hyperparameter tuning jobs. +It enables users to create interactive visualizations to analyze and understand the +performance of hyperparameter optimization experiments. + +Example: + >>> from sagemaker.amtviz import visualize_tuning_job + >>> visualize_tuning_job('my-tuning-job') +""" from __future__ import absolute_import from sagemaker.amtviz.visualization import visualize_tuning_job From 75631ef1488287aee8ace021a4090565fac40c29 Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Tue, 27 May 2025 21:13:43 +0200 Subject: [PATCH 12/13] fix: fix docstyle and flake8 errors --- src/sagemaker/amtviz/job_metrics.py | 4 +-- src/sagemaker/amtviz/visualization.py | 42 +++++++++++++++------------ src/sagemaker/tuner.py | 9 +++--- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py index f84457f9da..d726ee514a 100644 --- a/src/sagemaker/amtviz/job_metrics.py +++ b/src/sagemaker/amtviz/job_metrics.py @@ -104,7 +104,7 @@ def _get_metric_data( start_time: datetime, end_time: datetime ) -> pd.DataFrame: - """Fetches CloudWatch metrics between timestamps and returns a DataFrame with selected columns.""" + """Fetches CloudWatch metrics between timestamps, returns a DataFrame with selected columns.""" start_time = start_time - timedelta(hours=1) end_time = end_time + timedelta(hours=1) response = cw.get_metric_data(MetricDataQueries=queries, StartTime=start_time, EndTime=end_time) @@ -135,7 +135,7 @@ def _collect_metrics( start_time: datetime, end_time: Optional[datetime] ) -> pd.DataFrame: - """Collects SageMaker training job metrics from CloudWatch based on given dimensions and time range.""" + """Collects SageMaker training job metrics from CloudWatch for dimensions and time range.""" df = pd.DataFrame() for dim_name, dim_value in dimensions: response = cw.list_metrics( diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py index bb66195771..7c129d29ba 100644 --- a/src/sagemaker/amtviz/visualization.py +++ b/src/sagemaker/amtviz/visualization.py @@ -8,8 +8,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -This module provides visualization capabilities for SageMaker hyperparameter tuning jobs. +"""This module provides visualization capabilities for SageMaker hyperparameter tuning jobs. It contains utilities to create interactive visualizations of hyperparameter tuning results using Altair charts. The module enables users to analyze and understand the performance @@ -83,8 +82,7 @@ def visualize_tuning_job( trials_only: bool = False, advanced: bool = False, ) -> Union[alt.Chart, Tuple[alt.Chart, pd.DataFrame, pd.DataFrame]]: - """ - Visualize SageMaker hyperparameter tuning jobs. + """Visualize SageMaker hyperparameter tuning jobs. Args: tuning_jobs: Single tuning job or list of tuning jobs (name or HyperparameterTuner object) @@ -147,8 +145,7 @@ def create_charts( color_trials: bool = False, advanced: bool = False, ) -> alt.Chart: - """ - Create visualization charts for hyperparameter tuning results. + """Create visualization charts for hyperparameter tuning results. Args: trials_df: DataFrame containing trials data @@ -240,7 +237,8 @@ def create_charts( # If we have multiple tuning jobs, we also want to be able # to discriminate based on the individual tuning job, so # we just treat them as an additional tuning parameter - tuning_parameters = tuning_parameters.copy() + (["TuningJobName"] if multiple_tuning_jobs else []) + tuning_job_param = ["TuningJobName"] if multiple_tuning_jobs else [] + tuning_parameters = tuning_parameters.copy() + tuning_job_param # If we use early stopping and at least some jobs were # stopped early, we want to be able to discriminate @@ -331,7 +329,7 @@ def render_detail_charts(): bandwidth=0.01, groupby=[tuning_parameter], # https://github.com/vega/altair/issues/3203#issuecomment-2141558911 - # Specifying extent no longer necessary (>5.1.2). Leaving the work around in it for now. + # Specifying extent no longer necessary (>5.1.2). extent=[ trials_df[objective_name].min(), trials_df[objective_name].max(), @@ -612,7 +610,7 @@ def render_progress_chart(): def _clean_parameter_name(s): - """ Helper method to ensure proper parameter name characters for altair 5+ """ + """Helper method to ensure proper parameter name characters for altair 5+""" return s.replace(":", "_").replace(".", "_") @@ -664,8 +662,10 @@ def _prepare_consolidated_df(trials_df): def _get_df(tuning_job_name, filter_out_stopped=False): - """Retrieves hyperparameter tuning job results and returns preprocessed DataFrame with - tuning metrics and parameters.""" + """Retrieves hyperparameter tuning job results and returns preprocessed DataFrame. + + Returns a DataFrame containing tuning metrics and parameters for the specified job. + """ tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name) @@ -707,10 +707,12 @@ def _get_df(tuning_job_name, filter_out_stopped=False): # A float then? df[parameter_name] = df[parameter_name].astype(float) - except Exception: - # Trouble, as this was not a number just pretending to be a string, but an actual string with - # characters. Leaving the value untouched - # Ex: Caught exception could not convert string to float: 'sqrt' + except (ValueError, TypeError, AttributeError): + # Catch exceptions that might occur during string manipulation or type conversion + # - ValueError: Could not convert string to float/int + # - TypeError: Object doesn't support the operation + # - AttributeError: Object doesn't have replace method + # Leaving the value untouched pass return df @@ -747,7 +749,7 @@ def get_job_analytics_data(tuning_job_names): tuning_job_names (str or list): Single tuning job name or list of names/tuner objects. Returns: - tuple: (DataFrame with training results, tuned parameters list, objective name, is_minimize flag). + tuple: (DataFrame with training results, tuned params list, objective name, is_minimize). Raises: ValueError: If tuning jobs have different objectives or optimization directions. @@ -828,7 +830,8 @@ def get_job_analytics_data(tuning_job_names): if isinstance(val, str) and val.startswith('"'): try: df[column_name] = df[column_name].apply(lambda x: int(x.replace('"', ""))) - except: # noqa: E722 nosec b110 if we fail, we just continue with what we had + except (ValueError, TypeError, AttributeError): + # noqa: E722 nosec b110 if we fail, we just continue with what we had pass # Value is not an int, but a string df = df.sort_values("FinalObjectiveValue", ascending=is_minimize) @@ -836,8 +839,9 @@ def get_job_analytics_data(tuning_job_names): # Fix potential issue with dates represented as objects, instead of a timestamp # This can in other cases lead to: - # https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ - # Have only observed this for TrainingEndTime, but will be on the lookout dfor TrainingStartTime as well now + # https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type- + # date-not-json-serializable/ + # Seen this for TrainingEndTime, but will watch TrainingStartTime as well now. df["TrainingEndTime"] = pd.to_datetime(df["TrainingEndTime"]) df["TrainingStartTime"] = pd.to_datetime(df["TrainingStartTime"]) diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 35d468feeb..06e1b8d615 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2128,8 +2128,8 @@ def visualize_jobs( trials_only: bool = False, advanced: bool = False ): - """Create an interactive visualization based on altair charts using the sagemaker.amtviz - package. + """Create interactive visualization via altair charts using the sagemaker.amtviz package. + Args: tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): One or more tuning jobs to create @@ -2147,9 +2147,9 @@ def visualize_jobs( importlib.import_module('altair') except ImportError: - print("Altair is not installed. To use the visualization feature, please install Altair:") + print("Altair is not installed. Install Altair to use the visualization feature:") print(" pip install altair") - print("After installing Altair, you can use the methods visualize_jobs or visualize_job.") + print("After installing Altair, use the methods visualize_jobs or visualize_job.") return None # If altair is installed, proceed with visualization @@ -2170,6 +2170,7 @@ def visualize_job( advanced: bool = False ): """Convenience method on instance level for visualize_jobs(). + See static method visualize_jobs(). """ return HyperparameterTuner.visualize_jobs( From facaf27af2a82171e3c78c5d2c419edee08de245 Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Fri, 27 Jun 2025 21:53:41 +0200 Subject: [PATCH 13/13] fix: code reformat using black --- src/sagemaker/amtviz/__init__.py | 3 +- src/sagemaker/amtviz/job_metrics.py | 12 +- src/sagemaker/amtviz/visualization.py | 21 ++- src/sagemaker/tuner.py | 14 +- tests/unit/test_tuner_visualize.py | 89 +++++----- tests/unit/tuner_visualize_test_utils.py | 201 +++++++++++++---------- 6 files changed, 178 insertions(+), 162 deletions(-) diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py index 4cf92c7268..8554b32c4a 100644 --- a/src/sagemaker/amtviz/__init__.py +++ b/src/sagemaker/amtviz/__init__.py @@ -23,4 +23,5 @@ from __future__ import absolute_import from sagemaker.amtviz.visualization import visualize_tuning_job -__all__ = ['visualize_tuning_job'] + +__all__ = ["visualize_tuning_job"] diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py index d726ee514a..b99886941f 100644 --- a/src/sagemaker/amtviz/job_metrics.py +++ b/src/sagemaker/amtviz/job_metrics.py @@ -100,9 +100,7 @@ def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> D def _get_metric_data( - queries: List[Dict[str, Any]], - start_time: datetime, - end_time: datetime + queries: List[Dict[str, Any]], start_time: datetime, end_time: datetime ) -> pd.DataFrame: """Fetches CloudWatch metrics between timestamps, returns a DataFrame with selected columns.""" start_time = start_time - timedelta(hours=1) @@ -131,9 +129,7 @@ def _get_metric_data( @disk_cache def _collect_metrics( - dimensions: List[Tuple[str, str]], - start_time: datetime, - end_time: Optional[datetime] + dimensions: List[Tuple[str, str]], start_time: datetime, end_time: Optional[datetime] ) -> pd.DataFrame: """Collects SageMaker training job metrics from CloudWatch for dimensions and time range.""" df = pd.DataFrame() @@ -159,9 +155,7 @@ def _collect_metrics( def get_cw_job_metrics( - job_name: str, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None + job_name: str, start_time: Optional[datetime] = None, end_time: Optional[datetime] = None ) -> pd.DataFrame: """Retrieves CloudWatch metrics for a SageMaker training job. diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py index 7c129d29ba..7f09117d1e 100644 --- a/src/sagemaker/amtviz/visualization.py +++ b/src/sagemaker/amtviz/visualization.py @@ -100,6 +100,7 @@ def visualize_tuning_job( try: from IPython import get_ipython, display + if get_ipython(): # Running in a Jupyter Notebook display(trials_df.head(10)) @@ -110,9 +111,7 @@ def visualize_tuning_job( # Not running in a Jupyter Notebook logger.info(trials_df.head(10).to_string()) - full_df = ( - _prepare_consolidated_df(trials_df) if not trials_only else pd.DataFrame() - ) + full_df = _prepare_consolidated_df(trials_df) if not trials_only else pd.DataFrame() trials_df.columns = trials_df.columns.map(_clean_parameter_name) full_df.columns = full_df.columns.map(_clean_parameter_name) @@ -216,9 +215,11 @@ def create_charts( jobs_props["stroke"] = alt.condition( job_highlight_selection, alt.StrokeValue("gold"), - alt.Stroke("TuningJobName:N", legend=None) - if multiple_tuning_jobs - else alt.StrokeValue("white"), + ( + alt.Stroke("TuningJobName:N", legend=None) + if multiple_tuning_jobs + else alt.StrokeValue("white") + ), ) opacity = alt.condition(brush, alt.value(1.0), alt.value(0.35)) @@ -759,9 +760,11 @@ def get_job_analytics_data(tuning_job_names): # Ensure to create a list of tuning job names (strings) tuning_job_names = [ - tuning_job.describe()["HyperParameterTuningJobName"] - if isinstance(tuning_job, sagemaker.tuner.HyperparameterTuner) - else tuning_job + ( + tuning_job.describe()["HyperParameterTuningJobName"] + if isinstance(tuning_job, sagemaker.tuner.HyperparameterTuner) + else tuning_job + ) for tuning_job in tuning_job_names ] diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 06e1b8d615..d9b052770b 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2120,13 +2120,14 @@ def _add_estimator( @staticmethod def visualize_jobs( tuning_jobs: Union[ - str, 'sagemaker.tuner.HyperparameterTuner', - List[Union[str, 'sagemaker.tuner.HyperparameterTuner']] + str, + "sagemaker.tuner.HyperparameterTuner", + List[Union[str, "sagemaker.tuner.HyperparameterTuner"]], ], return_dfs: bool = False, job_metrics: Optional[List[str]] = None, trials_only: bool = False, - advanced: bool = False + advanced: bool = False, ): """Create interactive visualization via altair charts using the sagemaker.amtviz package. @@ -2144,7 +2145,7 @@ def visualize_jobs( """ try: # Check if altair is installed - importlib.import_module('altair') + importlib.import_module("altair") except ImportError: print("Altair is not installed. Install Altair to use the visualization feature:") @@ -2164,10 +2165,11 @@ def visualize_jobs( ) def visualize_job( - self, return_dfs: bool = False, + self, + return_dfs: bool = False, job_metrics: Optional[List[str]] = None, trials_only: bool = False, - advanced: bool = False + advanced: bool = False, ): """Convenience method on instance level for visualize_jobs(). diff --git a/tests/unit/test_tuner_visualize.py b/tests/unit/test_tuner_visualize.py index 8f17f4d2db..8397ae8e25 100644 --- a/tests/unit/test_tuner_visualize.py +++ b/tests/unit/test_tuner_visualize.py @@ -19,14 +19,13 @@ import sagemaker from sagemaker.estimator import Estimator from sagemaker.session_settings import SessionSettings -from sagemaker.tuner import ( - HyperparameterTuner -) +from sagemaker.tuner import HyperparameterTuner from tests.unit.tuner_test_utils import ( OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, - METRIC_DEFINITIONS + METRIC_DEFINITIONS, ) + # Visualization specific imports from sagemaker.amtviz.visualization import visualize_tuning_job, get_job_analytics_data from tests.unit.tuner_visualize_test_utils import ( @@ -44,7 +43,7 @@ TRIALS_DF_TRAINING_JOB_STATUSES, TRIALS_DF_VALID_F1_VALUES, FILTERED_TUNING_JOB_DF_DATA, - TUNING_RANGES + TUNING_RANGES, ) import altair as alt @@ -56,7 +55,7 @@ def create_sagemaker_session(): boto_session=boto_mock, config=None, local_mode=False, - settings=SessionSettings() + settings=SessionSettings(), ) sms.sagemaker_config = {} return sms @@ -103,12 +102,7 @@ def mock_visualize_tuning_job(): @pytest.fixture def mock_get_job_analytics_data(): with patch("sagemaker.amtviz.visualization.get_job_analytics_data") as mock: - mock.return_value = ( - pd.DataFrame(TRIALS_DF_DATA), - TUNED_PARAMETERS, - OBJECTIVE_NAME, - True - ) + mock.return_value = (pd.DataFrame(TRIALS_DF_DATA), TUNED_PARAMETERS, OBJECTIVE_NAME, True) yield mock @@ -144,21 +138,22 @@ def test_visualize_jobs(mock_visualize_tuning_job): result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) assert result == "mock_chart" mock_visualize_tuning_job.assert_called_once_with( - TUNING_JOB_NAMES, - return_dfs=False, - job_metrics=None, - trials_only=False, - advanced=False + TUNING_JOB_NAMES, return_dfs=False, job_metrics=None, trials_only=False, advanced=False ) # Vary the parameters and check if they have been passed correctly result = HyperparameterTuner.visualize_jobs( - [TUNING_JOB_NAME_1], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + [TUNING_JOB_NAME_1], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True, + ) mock_visualize_tuning_job.assert_called_with( [TUNING_JOB_NAME_1], return_dfs=True, job_metrics="job_metrics", trials_only=True, - advanced=True + advanced=True, ) @@ -168,21 +163,15 @@ def test_visualize_job(tuner, mock_visualize_tuning_job): result = tuner.visualize_job() assert result == "mock_chart" mock_visualize_tuning_job.assert_called_once_with( - tuner, - return_dfs=False, - job_metrics=None, - trials_only=False, - advanced=False + tuner, return_dfs=False, job_metrics=None, trials_only=False, advanced=False ) # With varying parameters - result = tuner.visualize_job(return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + result = tuner.visualize_job( + return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True + ) assert result == "mock_chart" mock_visualize_tuning_job.assert_called_with( - tuner, - return_dfs=True, - job_metrics="job_metrics", - trials_only=True, - advanced=True + tuner, return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True ) @@ -191,21 +180,22 @@ def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): result = HyperparameterTuner.visualize_jobs([tuner, tuner2]) assert result == "mock_chart" mock_visualize_tuning_job.assert_called_once_with( - [tuner, tuner2], - return_dfs=False, - job_metrics=None, - trials_only=False, - advanced=False + [tuner, tuner2], return_dfs=False, job_metrics=None, trials_only=False, advanced=False ) # Vary the parameters and check if they have been passed correctly result = HyperparameterTuner.visualize_jobs( - [[tuner, tuner2]], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + [[tuner, tuner2]], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True, + ) mock_visualize_tuning_job.assert_called_with( [[tuner, tuner2]], return_dfs=True, job_metrics="job_metrics", trials_only=True, - advanced=True + advanced=True, ) @@ -226,10 +216,10 @@ def test_visualize_tuning_job_return_dfs(mock_get_job_analytics_data, mock_prepa assert isinstance(trials_df, pd.DataFrame) assert trials_df.shape == (2, len(TRIALS_DF_COLUMNS)) assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS - assert trials_df['TrainingJobName'].tolist() == TRIALS_DF_TRAINING_JOB_NAMES - assert trials_df['TrainingJobStatus'].tolist() == TRIALS_DF_TRAINING_JOB_STATUSES - assert trials_df['TuningJobName'].tolist() == TUNING_JOB_NAMES - assert trials_df['valid-f1'].tolist() == TRIALS_DF_VALID_F1_VALUES + assert trials_df["TrainingJobName"].tolist() == TRIALS_DF_TRAINING_JOB_NAMES + assert trials_df["TrainingJobStatus"].tolist() == TRIALS_DF_TRAINING_JOB_STATUSES + assert trials_df["TuningJobName"].tolist() == TUNING_JOB_NAMES + assert trials_df["valid-f1"].tolist() == TRIALS_DF_VALID_F1_VALUES # Assertions for full_df assert isinstance(full_df, pd.DataFrame) @@ -244,7 +234,7 @@ def test_visualize_tuning_job_empty_trials(mock_get_job_analytics_data): pd.DataFrame(), # empty dataframe TUNED_PARAMETERS, OBJECTIVE_NAME, - True + True, ) charts = visualize_tuning_job("empty_job") assert charts.empty @@ -267,9 +257,7 @@ def test_visualize_tuning_job_trials_only(mock_get_job_analytics_data): # Check if all parameters are correctly passed to the (mocked) create_charts method @patch("sagemaker.amtviz.visualization.create_charts") def test_visualize_tuning_job_with_full_df( - mock_create_charts, - mock_get_job_analytics_data, - mock_prepare_consolidated_df + mock_create_charts, mock_get_job_analytics_data, mock_prepare_consolidated_df ): mock_create_charts.return_value = alt.Chart() visualize_tuning_job("dummy_job") @@ -298,10 +286,15 @@ def test_visualize_tuning_job_with_full_df( @patch("sagemaker.HyperparameterTuningJobAnalytics") def test_get_job_analytics_data(mock_hyperparameter_tuning_job_analytics): # Mock sagemaker's describe_hyper_parameter_tuning_job and some internal methods - sagemaker.amtviz.visualization.sm.describe_hyper_parameter_tuning_job = Mock(return_value=TUNING_JOB_RESULT) + sagemaker.amtviz.visualization.sm.describe_hyper_parameter_tuning_job = Mock( + return_value=TUNING_JOB_RESULT + ) sagemaker.amtviz.visualization._get_tuning_job_names_with_parents = Mock( - return_value=[TUNING_JOB_NAME_1, TUNING_JOB_NAME_2]) - sagemaker.amtviz.visualization._get_df = Mock(return_value=pd.DataFrame(FILTERED_TUNING_JOB_DF_DATA)) + return_value=[TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] + ) + sagemaker.amtviz.visualization._get_df = Mock( + return_value=pd.DataFrame(FILTERED_TUNING_JOB_DF_DATA) + ) mock_tuning_job_instance = MagicMock() mock_hyperparameter_tuning_job_analytics.return_value = mock_tuning_job_instance mock_tuning_job_instance.tuning_ranges.values.return_value = TUNING_RANGES diff --git a/tests/unit/tuner_visualize_test_utils.py b/tests/unit/tuner_visualize_test_utils.py index 17a993717c..d9524ff7e6 100644 --- a/tests/unit/tuner_visualize_test_utils.py +++ b/tests/unit/tuner_visualize_test_utils.py @@ -13,124 +13,147 @@ from __future__ import absolute_import TRIALS_DF_COLUMNS = [ - 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobName', - 'TrainingJobStatus', - 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' + "criterion", + "max-depth", + "min-samples-leaf", + "min-weight-fraction-leaf", + "n-estimators", + "TrainingJobName", + "TrainingJobStatus", + "TrainingStartTime", + "TrainingEndTime", + "TrainingElapsedTimeSeconds", + "TuningJobName", + "valid-f1", ] FULL_DF_COLUMNS = [ - 'value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', - 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', - 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' + "value", + "ts", + "label", + "rel_ts", + "TrainingJobName", + "criterion", + "max-depth", + "min-samples-leaf", + "min-weight-fraction-leaf", + "n-estimators", + "TrainingJobStatus", + "TrainingStartTime", + "TrainingEndTime", + "TrainingElapsedTimeSeconds", + "TuningJobName", + "valid-f1", ] TRIALS_DF_TRAINING_JOB_NAMES = [ - 'random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1' + "random-240712-1545-019-4ac17a84", + "random-240712-1545-021-fcd64dc1", ] -TRIALS_DF_TRAINING_JOB_STATUSES = ['Completed', 'Completed'] +TRIALS_DF_TRAINING_JOB_STATUSES = ["Completed", "Completed"] -TUNING_JOB_NAME_1 = 'random-240712-1500' -TUNING_JOB_NAME_2 = 'bayesian-240712-1600' +TUNING_JOB_NAME_1 = "random-240712-1500" +TUNING_JOB_NAME_2 = "bayesian-240712-1600" TUNING_JOB_NAMES = [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] TRIALS_DF_VALID_F1_VALUES = [0.950, 0.896] -FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', - 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', - 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] +FULL_DF_COLUMNS = [ + "value", + "ts", + "label", + "rel_ts", + "TrainingJobName", + "criterion", + "max-depth", + "min-samples-leaf", + "min-weight-fraction-leaf", + "n-estimators", + "TrainingJobStatus", + "TrainingStartTime", + "TrainingEndTime", + "TrainingElapsedTimeSeconds", + "TuningJobName", + "valid-f1", +] -TUNED_PARAMETERS = ['n-estimators', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'criterion'] -OBJECTIVE_NAME = 'valid-f1' +TUNED_PARAMETERS = [ + "n-estimators", + "max-depth", + "min-samples-leaf", + "min-weight-fraction-leaf", + "criterion", +] +OBJECTIVE_NAME = "valid-f1" TRIALS_DF_DATA = { - 'criterion': ['gini', 'log_loss'], - 'max-depth': [18.0, 8.0], - 'min-samples-leaf': [3.0, 10.0], - 'min-weight-fraction-leaf': [0.011596, 0.062067], - 'n-estimators': [110.0, 18.0], - 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1'], - 'TrainingJobStatus': ['Completed', 'Completed'], - 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:56:50+02:00'], - 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:57:29+02:00'], - 'TrainingElapsedTimeSeconds': [44.0, 39.0], - 'TuningJobName': TUNING_JOB_NAMES, - 'valid-f1': [0.950, 0.896] + "criterion": ["gini", "log_loss"], + "max-depth": [18.0, 8.0], + "min-samples-leaf": [3.0, 10.0], + "min-weight-fraction-leaf": [0.011596, 0.062067], + "n-estimators": [110.0, 18.0], + "TrainingJobName": ["random-240712-1545-019-4ac17a84", "random-240712-1545-021-fcd64dc1"], + "TrainingJobStatus": ["Completed", "Completed"], + "TrainingStartTime": ["2024-07-12 17:55:59+02:00", "2024-07-12 17:56:50+02:00"], + "TrainingEndTime": ["2024-07-12 17:56:43+02:00", "2024-07-12 17:57:29+02:00"], + "TrainingElapsedTimeSeconds": [44.0, 39.0], + "TuningJobName": TUNING_JOB_NAMES, + "valid-f1": [0.950, 0.896], } FULL_DF_DATA = { - 'value': [0.951000, 0.950000], - 'ts': ['2024-07-12 15:56:00', '2024-07-12 15:56:00'], - 'label': ['valid-precision', 'valid-recall'], - 'rel_ts': ['1970-01-01 01:00:00', '1970-01-01 01:00:00'], - 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-019-4ac17a84'], - 'criterion': ['gini', 'gini'], - 'max-depth': [18.0, 18.0], - 'min-samples-leaf': [3.0, 3.0], - 'min-weight-fraction-leaf': [0.011596, 0.011596], - 'n-estimators': [110.0, 110.0], - 'TrainingJobStatus': ['Completed', 'Completed'], - 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:55:59+02:00'], - 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:56:43+02:00'], - 'TrainingElapsedTimeSeconds': [44.0, 45.0], - 'TuningJobName': ['random-240712-1545', 'random-240712-1545'], - 'valid-f1': [0.9500, 0.9500] + "value": [0.951000, 0.950000], + "ts": ["2024-07-12 15:56:00", "2024-07-12 15:56:00"], + "label": ["valid-precision", "valid-recall"], + "rel_ts": ["1970-01-01 01:00:00", "1970-01-01 01:00:00"], + "TrainingJobName": ["random-240712-1545-019-4ac17a84", "random-240712-1545-019-4ac17a84"], + "criterion": ["gini", "gini"], + "max-depth": [18.0, 18.0], + "min-samples-leaf": [3.0, 3.0], + "min-weight-fraction-leaf": [0.011596, 0.011596], + "n-estimators": [110.0, 110.0], + "TrainingJobStatus": ["Completed", "Completed"], + "TrainingStartTime": ["2024-07-12 17:55:59+02:00", "2024-07-12 17:55:59+02:00"], + "TrainingEndTime": ["2024-07-12 17:56:43+02:00", "2024-07-12 17:56:43+02:00"], + "TrainingElapsedTimeSeconds": [44.0, 45.0], + "TuningJobName": ["random-240712-1545", "random-240712-1545"], + "valid-f1": [0.9500, 0.9500], } FILTERED_TUNING_JOB_DF_DATA = { - 'criterion': ['log_loss', 'gini'], - 'max-depth': [10.0, 16.0], - 'min-samples-leaf': [7.0, 2.0], - 'min-weight-fraction-leaf': [0.160910, 0.069803], - 'n-estimators': [67.0, 79.0], - 'TrainingJobName': ['random-240712-1545-050-c0b5c10a', 'random-240712-1545-049-2db2ec05'], - 'TrainingJobStatus': ['Completed', 'Completed'], - 'FinalObjectiveValue': [0.8190, 0.8910], - 'TrainingStartTime': ['2024-07-12 18:09:48+02:00', '2024-07-12 18:09:45+02:00'], - 'TrainingEndTime': ['2024-07-12 18:10:28+02:00', '2024-07-12 18:10:23+02:00'], - 'TrainingElapsedTimeSeconds': [40.0, 38.0], - 'TuningJobName': [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] + "criterion": ["log_loss", "gini"], + "max-depth": [10.0, 16.0], + "min-samples-leaf": [7.0, 2.0], + "min-weight-fraction-leaf": [0.160910, 0.069803], + "n-estimators": [67.0, 79.0], + "TrainingJobName": ["random-240712-1545-050-c0b5c10a", "random-240712-1545-049-2db2ec05"], + "TrainingJobStatus": ["Completed", "Completed"], + "FinalObjectiveValue": [0.8190, 0.8910], + "TrainingStartTime": ["2024-07-12 18:09:48+02:00", "2024-07-12 18:09:45+02:00"], + "TrainingEndTime": ["2024-07-12 18:10:28+02:00", "2024-07-12 18:10:23+02:00"], + "TrainingElapsedTimeSeconds": [40.0, 38.0], + "TuningJobName": [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2], } TUNING_RANGES = [ + {"Name": "n-estimators", "MinValue": "1", "MaxValue": "200", "ScalingType": "Auto"}, + {"Name": "max-depth", "MinValue": "1", "MaxValue": "20", "ScalingType": "Auto"}, + {"Name": "min-samples-leaf", "MinValue": "1", "MaxValue": "10", "ScalingType": "Auto"}, { - 'Name': 'n-estimators', - 'MinValue': '1', - 'MaxValue': '200', - 'ScalingType': 'Auto' - }, - { - 'Name': 'max-depth', - 'MinValue': '1', - 'MaxValue': '20', - 'ScalingType': 'Auto' + "Name": "min-weight-fraction-leaf", + "MinValue": "0.01", + "MaxValue": "0.5", + "ScalingType": "Auto", }, - { - 'Name': 'min-samples-leaf', - 'MinValue': '1', - 'MaxValue': '10', - 'ScalingType': 'Auto' - }, - { - 'Name': 'min-weight-fraction-leaf', - 'MinValue': '0.01', - 'MaxValue': '0.5', - 'ScalingType': 'Auto' - }, - { - 'Name': 'criterion', - 'Values': ['"gini"', '"entropy"', '"log_loss"'] - } + {"Name": "criterion", "Values": ['"gini"', '"entropy"', '"log_loss"']}, ] TUNING_JOB_RESULT = { - 'HyperParameterTuningJobName': TUNING_JOB_NAME_1, - 'HyperParameterTuningJobConfig': { - 'Strategy': 'Random', - 'HyperParameterTuningJobObjective': { - 'Type': 'Maximize', - 'MetricName': 'valid-f1' - } + "HyperParameterTuningJobName": TUNING_JOB_NAME_1, + "HyperParameterTuningJobConfig": { + "Strategy": "Random", + "HyperParameterTuningJobObjective": {"Type": "Maximize", "MetricName": "valid-f1"}, }, - 'HyperParameterTuningJobStatus': 'Completed', + "HyperParameterTuningJobStatus": "Completed", }