1- from codeguru_profiler_agent .utils .synchronization import synchronized
1+ import logging
2+ import os
3+
4+ logger = logging .getLogger (__name__ )
25
36
47class ErrorsMetadata :
58 def __init__ (self ):
6- self .errors_count = 0
7- self .sdk_client_errors = 0
8- self .configure_agent_errors = 0
9- self .configure_agent_rnfe_auto_create_enabled_errors = 0
10- self .create_profiling_group_errors = 0
11- self .post_agent_profile_errors = 0
12- self .post_agent_profile_rnfe_auto_create_enabled_errors = 0
9+ self .reset ()
1310
1411 def reset (self ):
12+ """
13+ We want to differentiate API call errors more granularly. We want to gather ResourceNotFoundException errors
14+ because we are going to get this exception with auto-create feature and want to monitor how many times
15+ the agent is not able to create the PG and resulting in subsequent ResourceNotFoundException.
16+ """
1517 self .errors_count = 0
1618 self .sdk_client_errors = 0
1719 self .configure_agent_errors = 0
@@ -20,12 +22,10 @@ def reset(self):
2022 self .post_agent_profile_errors = 0
2123 self .post_agent_profile_rnfe_auto_create_enabled_errors = 0
2224
23- """
24- This needs to be compliant with errors count schema.
25- https://code.amazon.com/packages/SkySailProfileIonSchema/blobs/811cc0e7e406e37a5b878acf31468be3dcd2963d/--/src/main/resources/schema/DebugInfo.isl#L21
26- """
27-
2825 def serialize_to_json (self ):
26+ """
27+ This needs to be compliant with errors count schema.
28+ """
2929 return {
3030 "errorsCount" : self .errors_count ,
3131 "sdkClientErrors" : self .sdk_client_errors ,
@@ -36,35 +36,86 @@ def serialize_to_json(self):
3636 "postAgentProfileRnfeAutoCreateEnabledErrors" : self .post_agent_profile_rnfe_auto_create_enabled_errors
3737 }
3838
39- @synchronized
4039 def increment_sdk_error (self , error_type ):
40+ """
41+ ErrorsCount is the umbrella of all the kinds of error we want to capture. Currently we have only SdkClientErrors
42+ in it. SdkClientErrors is comprised of different API level errors like ConfigureAgentErrors,
43+ PostAgentProfileErrors, CreateProfilingGroupErrors.
44+ :param error_type: The type of API level error that we want to capture.
45+ """
4146 self .errors_count += 1
4247 self .sdk_client_errors += 1
4348
49+ """
50+ Special handling for ResourceNotFoundException errors.
51+ For example configureAgentRnfeAutoCreateEnabledErrors is also a configureAgentErrors.
52+ """
4453 if error_type == "configureAgentErrors" :
4554 self .configure_agent_errors += 1
4655 elif error_type == "configureAgentRnfeAutoCreateEnabledErrors" :
56+ self .configure_agent_errors += 1
4757 self .configure_agent_rnfe_auto_create_enabled_errors += 1
4858 elif error_type == "createProfilingGroupErrors" :
4959 self .create_profiling_group_errors += 1
5060 elif error_type == "postAgentProfileErrors" :
5161 self .post_agent_profile_errors += 1
5262 elif error_type == "postAgentProfileRnfeAutoCreateEnabledErrors" :
63+ self .post_agent_profile_errors += 1
5364 self .post_agent_profile_rnfe_auto_create_enabled_errors += 1
5465
5566 def record_sdk_error (self , error_type ):
5667 self .increment_sdk_error (error_type )
5768
5869
5970class AgentDebugInfo :
60- def __init__ (self , errors_metadata ):
71+ def __init__ (self , errors_metadata = None , agent_start_time = None , timer = None ):
72+ self .process_id = get_process_id ()
6173 self .errors_metadata = errors_metadata
74+ self .agent_start_time = agent_start_time
75+ self .timer = timer
6276
6377 def serialize_to_json (self ):
6478 """
6579 This needs to be compliant with agent debug info schema.
66- https://code.amazon.com/packages/SkySailProfileIonSchema/blobs/811cc0e7e406e37a5b878acf31468be3dcd2963d/--/src/main/resources/schema/DebugInfo.isl#L21
6780 """
68- return {
69- "errorsCount" : self .errors_metadata .serialize_to_json ()
70- }
81+ json = {}
82+
83+ self .add_agent_start_time (json )
84+ self .add_process_id (json )
85+ self .add_errors_metadata (json )
86+ self .add_generic_metrics (json )
87+
88+ return json
89+
90+ def add_agent_start_time (self , json ):
91+ if self .agent_start_time is not None :
92+ json ["agentStartTime" ] = int (self .agent_start_time )
93+
94+ def add_errors_metadata (self , json ):
95+ if self .errors_metadata is not None :
96+ json ["errorsCount" ] = self .errors_metadata .serialize_to_json ()
97+
98+ def add_process_id (self , json ):
99+ if self .process_id is not None :
100+ json ["processId" ] = self .process_id
101+
102+ def add_generic_metrics (self , json ):
103+ if self .timer is not None and self .timer .metrics :
104+ generic_metrics = {}
105+
106+ for metric in self .timer .metrics :
107+ metric_value = self .timer .metrics [metric ]
108+ generic_metrics [metric + "_max" ] = metric_value .max
109+ generic_metrics [metric + "_average" ] = metric_value .average ()
110+
111+ if generic_metrics :
112+ json ["genericMetrics" ] = generic_metrics
113+
114+
115+ def get_process_id ():
116+ try :
117+ return os .getpid ()
118+ except Exception as e :
119+ logger .info ("Failed to get the process id, " + repr (e ))
120+ return None
121+
0 commit comments