22
33import logging
44import io
5+ import os
56
67from botocore .exceptions import ClientError
78from codeguru_profiler_agent .utils .log_exception import log_exception
89from codeguru_profiler_agent .reporter .reporter import Reporter
910from codeguru_profiler_agent .metrics .with_timer import with_timer
1011from codeguru_profiler_agent .sdk_reporter .profile_encoder import ProfileEncoder
12+ from codeguru_profiler_agent .agent_metadata .aws_lambda import HANDLER_ENV_NAME_FOR_CODEGURU_KEY , \
13+ LAMBDA_TASK_ROOT , LAMBDA_RUNTIME_DIR
1114
1215logger = logging .getLogger (__name__ )
13-
16+ AWS_EXECUTION_ENV_KEY = "AWS_EXECUTION_ENV"
1417
1518class SdkReporter (Reporter ):
1619 """
1720 Handles communication with the CodeGuru Profiler Service backend.
1821 Encodes profiles using the ProfilerEncoder and reports them using the CodeGuru profiler SDK.
1922 """
20-
23+ is_create_pg_called_during_submit_profile = False
2124 def __init__ (self , environment ):
2225 """
2326 :param environment: dependency container dictionary for the current profiler.
@@ -51,6 +54,11 @@ def setup(self):
5154 def refresh_configuration (self ):
5255 """
5356 Refresh the agent configuration by calling the profiler backend service.
57+
58+ Note:
59+ For an agent running on AWS Lambda, if the environment variables for Profiling using
60+ Lambda layers are set, it tries to create a Profiling Group whenever a ResourceNotFoundException
61+ is encountered.
5462 """
5563 try :
5664 fleet_instance_id = self .metadata .fleet_info .get_fleet_instance_id ()
@@ -67,9 +75,18 @@ def refresh_configuration(self):
6775 # whole process because the customer may fix this on their side by creating/changing the profiling group.
6876 # We handle service exceptions like this in boto3
6977 # see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
70- if error .response ['Error' ]['Code' ] in [ 'ResourceNotFoundException' , ' ValidationException'] :
78+ if error .response ['Error' ]['Code' ] == ' ValidationException' :
7179 self .agent_config_merger .disable_profiling ()
72- self ._log_request_failed (operation = "configure_agent" , exception = error )
80+ self ._log_request_failed (operation = "configure_agent" , exception = error )
81+ if error .response ['Error' ]['Code' ] == 'ResourceNotFoundException' :
82+ if self .should_auto_create_profiling_group ():
83+ logger .info (
84+ "Profiling group not found. Will try to create a profiling group "
85+ "with name = {} and compute platform = {} and retry calling configure agent after 5 minutes"
86+ .format (self .profiling_group_name , 'AWSLambda' ))
87+ self .create_profiling_group ()
88+ else :
89+ self .agent_config_merger .disable_profiling ()
7390 except Exception as e :
7491 self ._log_request_failed (operation = "configure_agent" , exception = e )
7592
@@ -80,6 +97,11 @@ def report(self, profile):
8097
8198 :param profile: Profile to be encoded and reported to the profiler backend service.
8299 :return: True if profile gets reported successfully; False otherwise.
100+
101+ Note:
102+ For an agent running on AWS Lambda, if the environment variables for Profiling using
103+ Lambda layers are set, it tries to create a Profiling Group whenever a ResourceNotFoundException
104+ is encountered.
83105 """
84106 try :
85107 profile_stream = self ._encode_profile (profile )
@@ -90,11 +112,61 @@ def report(self, profile):
90112 )
91113 logger .info ("Reported profile successfully" )
92114 return True
115+ except ClientError as error :
116+ if error .response ['Error' ]['Code' ] == 'ResourceNotFoundException' :
117+ if self .should_auto_create_profiling_group ():
118+ self .__class__ .is_create_pg_called_during_submit_profile = True
119+ logger .info (
120+ "Profiling group not found. Will try to create a profiling group "
121+ "with name = {} and compute platform = {}" .format (self .profiling_group_name , 'AWSLambda' ))
122+ self .create_profiling_group ()
123+ return False
93124 except Exception as e :
94125 self ._log_request_failed (operation = "post_agent_profile" , exception = e )
95126 return False
96127
128+ @with_timer ("createProfilingGroup" , measurement = "wall-clock-time" )
129+ def create_profiling_group (self ):
130+ """
131+ Create a Profiling Group for the AWS Lambda function.
132+ """
133+ try :
134+ self .codeguru_client_builder .codeguru_client .create_profiling_group (
135+ profilingGroupName = self .profiling_group_name ,
136+ computePlatform = 'AWSLambda'
137+ )
138+ logger .info ("Created Lambda Profiling Group with name " + str (self .profiling_group_name ))
139+ except ClientError as error :
140+ if error .response ['Error' ]['Code' ] == 'ConflictException' :
141+ logger .info ("Profiling Group with name {} already exists. Please use a different name."
142+ .format (self .profiling_group_name ))
143+ except Exception as e :
144+ self ._log_request_failed (operation = "create_profiling_group" , exception = e )
145+
146+ def should_auto_create_profiling_group (self ):
147+ """
148+ Currently the only condition we check is to verify that the Compute Platform is AWS Lambda.
149+ In future, other checks could be places inside this method.
150+ """
151+ return self .is_compute_platform_lambda ()
152+
153+ def is_compute_platform_lambda (self ):
154+ """
155+ Check if the compute platform is AWS Lambda.
156+ """
157+ does_lambda_task_root_exist = os .environ .get (LAMBDA_TASK_ROOT )
158+ does_lambda_runtime_dir_exist = os .environ .get (LAMBDA_RUNTIME_DIR )
159+ return bool (does_lambda_task_root_exist ) and bool (does_lambda_runtime_dir_exist )
160+
97161 @staticmethod
98162 def _log_request_failed (operation , exception ):
99163 log_exception (logger , "Failed to call the CodeGuru Profiler service for the {} operation: {}"
100164 .format (operation , str (exception )))
165+
166+ @classmethod
167+ def check_create_pg_called_during_submit_profile (cls ):
168+ return cls .is_create_pg_called_during_submit_profile
169+
170+ @classmethod
171+ def reset_check_create_pg_called_during_submit_profile_flag (cls ):
172+ cls .is_create_pg_called_during_submit_profile = False
0 commit comments