Skip to content
This repository was archived by the owner on Jun 11, 2024. It is now read-only.

Commit 61d78ce

Browse files
committed
fixing har entry scope issue
1 parent 86edbf0 commit 61d78ce

File tree

4 files changed

+108
-93
lines changed

4 files changed

+108
-93
lines changed

browserup-proxy-core/src/main/resources/mitmproxy/har_dump.py

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from datetime import timezone
88
import dateutil.parser
99

10+
import copy
11+
1012
import asyncio
1113

1214
from mitmproxy import ctx
@@ -175,7 +177,6 @@ class HarDumpAddOn:
175177
def __init__(self):
176178
self.num = 0
177179
self.har = None
178-
self.har_entry = None
179180
self.har_page_count = 0
180181
self.har_capture_types = []
181182
self.current_har_page = None
@@ -507,32 +508,36 @@ def consume_http_connect_timing(self, client_conn):
507508
return self.http_connect_timings.pop(client_conn, None)
508509
return None
509510

510-
def create_har_entry_with_default_response(self, request):
511-
full_url = self.get_full_url(request)
511+
def populate_har_entry_with_default_response(self, flow):
512+
full_url = self.get_full_url(flow.request)
512513

513514
ctx.log.debug('Creating new har entry for request: {}'.format(full_url))
514515

515-
self.har_entry = self.generate_har_entry()
516-
self.har_entry['pageref'] = self.get_current_page_ref()
517-
self.har_entry['startedDateTime'] = datetime.fromtimestamp(
518-
request.timestamp_start, timezone.utc).isoformat()
516+
har_entry = flow.server_conn.currentHarEntry
517+
518+
har_entry['pageref'] = self.get_current_page_ref()
519+
har_entry['startedDateTime'] = datetime.fromtimestamp(flow.request.timestamp_start, timezone.utc).isoformat()
519520
har_request = self.generate_har_entry_request()
520-
har_request['method'] = request.method
521+
har_request['method'] = flow.request.method
521522
har_request['url'] = full_url
522-
har_request['httpVersion'] = request.http_version
523-
har_request['queryString'] = self.name_value(request.query or {})
524-
har_request['headersSize'] = len(str(request.headers))
523+
har_request['httpVersion'] = flow.request.http_version
524+
har_request['queryString'] = self.name_value(flow.request.query or {})
525+
har_request['headersSize'] = len(str(flow.request.headers))
525526
har_response = self.generate_har_entry_response_for_failure()
526527

527-
self.har_entry['request'] = har_request
528-
self.har_entry['response'] = har_response
528+
har_entry['request'] = har_request
529+
har_entry['response'] = har_response
529530

530-
self.har['log']['entries'].append(self.har_entry)
531+
def append_har_entry(self, har_entry):
532+
har = self.get_or_create_har(DEFAULT_PAGE_REF, DEFAULT_PAGE_TITLE, True)
533+
har['log']['entries'].append(har_entry)
531534

532535
def request(self, flow):
533536
if 'WhiteListFiltered' in flow.metadata or 'BlackListFiltered' in flow.metadata:
534537
return
535538

539+
self.populate_har_entry_with_default_response(flow)
540+
536541
req_url = 'none'
537542
if flow.request is not None:
538543
req_url = flow.request.url
@@ -541,53 +546,52 @@ def request(self, flow):
541546

542547
self.get_or_create_har(DEFAULT_PAGE_REF, DEFAULT_PAGE_TITLE, True)
543548

544-
self.create_har_entry_with_default_response(flow.request)
545-
546549
if HarCaptureTypes.REQUEST_COOKIES in self.har_capture_types:
547550
self.capture_request_cookies(flow)
548551

549552
if HarCaptureTypes.REQUEST_HEADERS in self.har_capture_types:
550553
self.capture_request_headers(flow)
551554

552555
if HarCaptureTypes.RESPONSE_CONTENT in self.har_capture_types:
553-
self.capture_request_content(flow.request)
556+
self.capture_request_content(flow)
554557

555-
self.har_entry['request']['bodySize'] = \
558+
har_entry = flow.server_conn.currentHarEntry
559+
har_entry['request']['bodySize'] = \
556560
len(flow.request.raw_content) if flow.request.raw_content else 0
557561

558562
connect_timing = self.consume_http_connect_timing(flow.client_conn)
559563
if connect_timing is not None:
560-
self.har_entry['timings']['sslNanos'] = connect_timing['sslHandshakeTimeNanos']
561-
self.har_entry['timings']['connectNanos'] = connect_timing['connectTimeNanos']
562-
self.har_entry['timings']['blockedNanos'] = connect_timing['blockedTimeNanos']
563-
self.har_entry['timings']['dnsNanos'] = connect_timing['dnsTimeNanos']
564+
har_entry['timings']['sslNanos'] = connect_timing['sslHandshakeTimeNanos']
565+
har_entry['timings']['connectNanos'] = connect_timing['connectTimeNanos']
566+
har_entry['timings']['blockedNanos'] = connect_timing['blockedTimeNanos']
567+
har_entry['timings']['dnsNanos'] = connect_timing['dnsTimeNanos']
564568

565569
def capture_request_cookies(self, flow):
566-
self.har_entry['request']['cookies'] = \
570+
har_entry = flow.metadata['currentHarEntry']
571+
har_entry['request']['cookies'] = \
567572
self.format_request_cookies(flow.request.cookies.fields)
568573

569574
def capture_request_headers(self, flow):
570-
self.har_entry['request']['headers'] = \
575+
har_entry = flow.metadata['currentHarEntry']
576+
har_entry['request']['headers'] = \
571577
self.name_value(flow.request.headers)
572578

573-
def capture_request_content(self, request):
579+
def capture_request_content(self, flow):
580+
har_entry = flow.metadata['currentHarEntry']
574581
params = [
575582
{"name": a, "value": b}
576-
for a, b in request.urlencoded_form.items(multi=True)
583+
for a, b in flow.request.urlencoded_form.items(multi=True)
577584
]
578-
self.har_entry["request"]["postData"] = {
579-
"mimeType": request.headers.get("Content-Type", ""),
580-
"text": request.get_text(strict=False),
585+
har_entry["request"]["postData"] = {
586+
"mimeType": flow.request.headers.get("Content-Type", ""),
587+
"text": flow.request.get_text(strict=False),
581588
"params": params
582589
}
583590

584591
def response(self, flow):
585-
ctx.log.debug('Incoming response for request to url: {}'.format(flow.request.url))
592+
har_entry = flow.server_conn.currentHarEntry
586593

587-
if self.har_entry is not None:
588-
ctx.log.debug(
589-
'Response handling for request: {} for current har entry with url: {}'
590-
.format(flow.request.url, self.har_entry['request']['url']))
594+
ctx.log.debug('Incoming response for request to url: {}'.format(flow.request.url))
591595

592596
if 'WhiteListFiltered' in flow.metadata or 'BlackListFiltered' in flow.metadata:
593597
ctx.log.debug('Black/White list filtered, return nothing.')
@@ -610,7 +614,7 @@ def response(self, flow):
610614
SERVERS_SEEN.add(flow.server_conn)
611615

612616
timings = self.calculate_timings(connect_time, flow, ssl_time)
613-
timings['dnsNanos'] = int(self.har_entry['timings']['dnsNanos'])
617+
timings['dnsNanos'] = int(har_entry['timings']['dnsNanos'])
614618

615619
full_time = sum(v for v in timings.values() if v > -1)
616620

@@ -655,14 +659,14 @@ def response(self, flow):
655659
har_response["headersSize"] = len(str(flow.response.headers))
656660
har_response["bodySize"] = response_body_size
657661

658-
self.har_entry['response'] = har_response
659-
self.har_entry['time'] = self.nano_to_ms(full_time)
660-
self.har_entry['pageref'] = self.get_current_page_ref()
662+
har_entry['response'] = har_response
663+
har_entry['time'] = self.nano_to_ms(full_time)
664+
har_entry['pageref'] = self.get_current_page_ref()
661665

662-
self.har_entry['timings'] = timings
666+
har_entry['timings'] = timings
663667

664668
if flow.server_conn.connected():
665-
self.har_entry["serverIPAddress"] = str(
669+
har_entry["serverIPAddress"] = str(
666670
flow.server_conn.ip_address[0])
667671

668672
def calculate_timings(self, connect_time, flow, ssl_time):

browserup-proxy-core/src/main/resources/mitmproxy/http_connect_capture.py

Lines changed: 53 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
CONNECTION_FAILED_ERROR_MESSAGE = "Unable to connect to host"
1010
RESPONSE_TIMED_OUT_ERROR_MESSAGE = "Response timed out"
1111

12+
1213
class HttpConnectCaptureResource:
1314

1415
def addon_path(self):
@@ -54,21 +55,23 @@ def generate_http_connect_timing(self):
5455

5556
# TCP Callbacks
5657

57-
def tcp_resolving_server_address_finished(self, sever_conn):
58-
self.populate_dns_timings()
58+
def tcp_resolving_server_address_finished(self, server_conn):
59+
if not hasattr(server_conn, 'currentHarEntry'):
60+
return
61+
self.populate_dns_timings(server_conn)
5962
self.dns_resolution_finished_nanos = self.now_time_nanos()
6063

6164
if self.dns_resolution_started_nanos > 0:
62-
self.get_http_connect_timing()['dnsTimeNanos'] = self.dns_resolution_finished_nanos - self.dns_resolution_started_nanos
65+
self.get_http_connect_timing()[
66+
'dnsTimeNanos'] = self.dns_resolution_finished_nanos - self.dns_resolution_started_nanos
6367
else:
6468
self.get_http_connect_timing()['dnsTimeNanos'] = 0
6569

66-
def tcp_resolving_server_address_started(self, sever_conn):
70+
def tcp_resolving_server_address_started(self, server_conn):
6771
self.dns_resolution_started_nanos = int(round(self.now_time_nanos()))
6872
self.connection_started_nanos = int(round(self.now_time_nanos()))
6973
self.proxy_to_server_resolution_started()
7074

71-
7275
# SSL Callbacks
7376
def ssl_handshake_started(self, flow):
7477
self.ssl_handshake_started_nanos = int(round(self.now_time_nanos()))
@@ -77,18 +80,19 @@ def ssl_handshake_started(self, flow):
7780

7881
def http_connect(self, flow):
7982
self.http_connect_timing = self.get_http_connect_timing()
80-
self.har_dump_addon.http_connect_timings[flow.client_conn] = self.http_connect_timing
83+
self.har_dump_addon.http_connect_timings[
84+
flow.client_conn] = self.http_connect_timing
8185

8286
def http_proxy_to_server_request_started(self, flow):
8387
self.send_started_nanos = self.now_time_nanos()
8488

8589
def http_proxy_to_server_request_finished(self, flow):
8690
self.send_finished_nanos = self.now_time_nanos()
8791
if self.send_started_nanos > 0:
88-
self.get_har_entry()['timings'][
92+
self.get_har_entry(flow.server_conn)['timings'][
8993
'send'] = self.send_finished_nanos - self.send_started_nanos
9094
else:
91-
self.get_har_entry()['timings']['send'] = 0
95+
self.get_har_entry(flow.server_conn)['timings']['send'] = 0
9296

9397
def http_server_to_proxy_response_receiving(self, flow):
9498
self.response_receive_started_nanos = self.now_time_nanos()
@@ -104,101 +108,99 @@ def proxy_to_server_connection_succeeded(self, f):
104108
self.connection_succeeded_time_nanos = self.now_time_nanos()
105109

106110
if self.connection_started_nanos > 0:
107-
self.get_http_connect_timing()['connectTimeNanos'] = self.connection_succeeded_time_nanos - self.connection_started_nanos
111+
self.get_http_connect_timing()[
112+
'connectTimeNanos'] = self.connection_succeeded_time_nanos - self.connection_started_nanos
108113
else:
109114
self.get_http_connect_timing()['connectTimeNanos'] = 0
110115

111116
if self.ssl_handshake_started_nanos > 0:
112-
self.get_http_connect_timing()['sslHandshakeTimeNanos'] = self.connection_succeeded_time_nanos - self.ssl_handshake_started_nanos
117+
self.get_http_connect_timing()[
118+
'sslHandshakeTimeNanos'] = self.connection_succeeded_time_nanos - self.ssl_handshake_started_nanos
113119
else:
114120
self.get_http_connect_timing()['sslHandshakeTimeNanos'] = 0
115121

116122
def error(self, flow):
117123
req_host_port = flow.request.host
118124
if flow.request.port != 80:
119125
req_host_port = req_host_port + ':' + str(flow.request.port)
120-
original_error = HttpConnectCaptureAddOn.get_original_exception(
121-
flow.error)
126+
original_error = HttpConnectCaptureAddOn.get_original_exception(flow.error)
127+
128+
self.har_dump_addon.populate_har_entry_with_default_response(flow)
122129

123130
if 'Name or service not known' in str(original_error):
124-
self.proxy_to_server_resolution_failed(flow, req_host_port,
125-
original_error)
131+
self.proxy_to_server_resolution_failed(flow, req_host_port, original_error)
126132
elif isinstance(original_error, TcpTimeout):
127-
self.server_to_proxy_response_timed_out(flow, req_host_port,
128-
original_error)
133+
self.server_to_proxy_response_timed_out(flow, req_host_port, original_error)
129134
else:
130135
self.proxy_to_server_connection_failed(flow, original_error)
131136

132137
# Populate data
133138

134-
def populate_dns_timings(self):
135-
if self.dns_resolution_started_nanos > 0 and self.get_har_entry():
139+
def populate_dns_timings(self, server_conn):
140+
har_entry = self.get_har_entry(server_conn)
141+
if self.dns_resolution_started_nanos > 0 and har_entry:
136142
time_now = self.now_time_nanos()
137143
dns_nanos = time_now - self.dns_resolution_started_nanos
138-
self.get_har_entry()['timings']['dnsNanos'] = dns_nanos
144+
har_entry['timings']['dnsNanos'] = dns_nanos
139145

140-
def populate_timings_for_failed_connect(self):
146+
def populate_timings_for_failed_connect(self, flow):
147+
har_entry = self.get_har_entry(flow.server_conn)
141148
if self.connection_started_nanos > 0:
142149
connect_nanos = self.now_time_nanos() - self.connection_started_nanos
143-
self.get_har_entry()['timings']['connectNanos'] = connect_nanos
144-
self.populate_dns_timings()
150+
har_entry['timings']['connectNanos'] = connect_nanos
151+
self.populate_dns_timings(flow.server_conn)
145152

146153
def populate_server_ip_address(self, flow, original_error):
147154
if flow.server_conn is not None and flow.server_conn.ip_address is not None:
148-
self.get_har_entry()['serverIPAddress'] = str(
155+
self.get_har_entry(flow.server_conn)['serverIPAddress'] = str(
149156
flow.server_conn.ip_address[0])
150157

151158
def get_resource(self):
152159
return HttpConnectCaptureResource(self)
153160

154-
def proxy_to_server_resolution_failed(self, flow, req_host_port,
155-
original_error):
161+
def proxy_to_server_resolution_failed(self, flow, req_host_port, original_error):
156162
msg = RESOLUTION_FAILED_ERROR_MESSAGE + req_host_port
157-
self.create_har_entry_for_failed_connect(flow.request, msg)
158-
self.populate_dns_timings()
163+
self.create_har_entry_for_failed_connect(flow, msg)
164+
self.populate_dns_timings(flow.server_conn)
159165
self.populate_server_ip_address(flow, original_error)
160166

161-
self.get_har_entry()['time'] = self.calculate_total_elapsed_time()
167+
self.get_har_entry(flow.server_conn)['time'] = self.calculate_total_elapsed_time(flow)
162168

163169
def proxy_to_server_connection_failed(self, flow, original_error):
164170
msg = CONNECTION_FAILED_ERROR_MESSAGE
165-
self.create_har_entry_for_failed_connect(flow.request, msg)
166-
self.populate_timings_for_failed_connect()
171+
self.create_har_entry_for_failed_connect(flow, msg)
172+
self.populate_timings_for_failed_connect(flow)
167173
self.populate_server_ip_address(flow, original_error)
168174

169-
self.get_har_entry()['time'] = self.calculate_total_elapsed_time()
175+
self.get_har_entry(flow.server_conn)['time'] = self.calculate_total_elapsed_time(flow)
170176

171-
def server_to_proxy_response_timed_out(self, flow, req_host_port,
172-
original_error):
177+
def server_to_proxy_response_timed_out(self, flow, req_host_port, original_error):
173178
msg = RESPONSE_TIMED_OUT_ERROR_MESSAGE
174-
self.create_har_entry_for_failed_connect(flow.request, msg)
175-
self.populate_timings_for_failed_connect()
179+
self.create_har_entry_for_failed_connect(flow, msg)
180+
self.populate_timings_for_failed_connect(flow)
176181
self.populate_server_ip_address(flow, original_error)
177182

178183
current_time_nanos = self.now_time_nanos()
179184

185+
har_entry = self.get_har_entry(flow.server_conn)
186+
180187
if self.send_started_nanos > 0 and self.send_finished_nanos == 0:
181-
self.get_har_entry()['timings'][
182-
'sendNanos'] = current_time_nanos - self.send_started_nanos
188+
har_entry['timings']['sendNanos'] = current_time_nanos - self.send_started_nanos
183189

184190
elif self.send_finished_nanos > 0 and self.response_receive_started_nanos == 0:
185-
self.get_har_entry()['timings'][
186-
'waitNanos'] = current_time_nanos - self.send_finished_nanos
191+
har_entry['timings']['waitNanos'] = current_time_nanos - self.send_finished_nanos
187192

188193
elif self.response_receive_started_nanos > 0:
189-
self.get_har_entry()['timings'][
190-
'receiveNanos'] = current_time_nanos - self.response_receive_started_nanos
191-
192-
self.get_har_entry()['time'] = self.calculate_total_elapsed_time()
194+
har_entry['timings']['receiveNanos'] = current_time_nanos - self.response_receive_started_nanos
193195

194-
def create_har_entry_for_failed_connect(self, request, msg):
195-
if not self.get_har_entry():
196-
self.har_dump_addon.create_har_entry_with_default_response(request)
196+
har_entry['time'] = self.calculate_total_elapsed_time(flow)
197197

198-
self.get_har_entry()['response']['_errorMessage'] = msg
198+
def create_har_entry_for_failed_connect(self, flow, msg):
199+
har_entry = self.get_har_entry(flow.server_conn)
200+
har_entry['response']['_errorMessage'] = msg
199201

200-
def calculate_total_elapsed_time(self):
201-
timings = self.get_har_entry()['timings']
202+
def calculate_total_elapsed_time(self, flow):
203+
timings = self.get_har_entry(flow.server_conn)['timings']
202204
result = (0 if timings.get('blockedNanos', -1) == -1 else timings['blockedNanos']) + \
203205
(0 if timings.get('dnsNanos', -1) == -1 else timings['dnsNanos']) + \
204206
(0 if timings.get('connectNanos', -1) == -1 else timings['connectNanos']) + \
@@ -207,8 +209,8 @@ def calculate_total_elapsed_time(self):
207209
(0 if timings.get('receiveNanos', -1) == -1 else timings['receiveNanos'])
208210
return self.nano_to_ms(result)
209211

210-
def get_har_entry(self):
211-
return self.har_dump_addon.har_entry
212+
def get_har_entry(self, server_conn):
213+
return server_conn.currentHarEntry
212214

213215
def get_http_connect_timing(self):
214216
if self.http_connect_timing is None:

browserup-proxy-core/src/main/resources/mitmproxy/init_flow.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,17 @@ def __init__(self):
4444
def get_resource(self):
4545
return InitFlowResource(self)
4646

47+
def http_connect(self, flow):
48+
if not hasattr(flow.server_conn, 'currentHarEntry'):
49+
self.init_har_entry(flow)
50+
4751
def request(self, flow):
48-
self.har_dump_addon.har_entry = None
52+
if not hasattr(flow.server_conn, 'currentHarEntry'):
53+
self.init_har_entry(flow)
54+
55+
def init_har_entry(self, flow):
56+
setattr(flow.server_conn, 'currentHarEntry', self.har_dump_addon.generate_har_entry())
57+
self.har_dump_addon.append_har_entry(flow.server_conn.currentHarEntry)
4958

5059
addons = [
5160
InitFlowAddOn()

0 commit comments

Comments
 (0)