Skip to content
1 change: 1 addition & 0 deletions changelog.d/19212.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add HTTP 413 response when incoming request is too large.
13 changes: 13 additions & 0 deletions synapse/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@
# the max size of a (canonical-json-encoded) event
MAX_PDU_SIZE = 65536

# The maximum allowed size of an HTTP request.
# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
MAX_REQUEST_SIZE = 200 * MAX_PDU_SIZE

# Max/min size of ints in canonical JSON
CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
Expand Down
14 changes: 2 additions & 12 deletions synapse/app/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from twisted.web.resource import Resource

import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE
from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.app import check_bind_error
from synapse.config import ConfigError
from synapse.config._base import format_config_error
Expand Down Expand Up @@ -843,17 +843,7 @@ def sdnotify(state: bytes) -> None:
def max_request_body_size(config: HomeServerConfig) -> int:
"""Get a suitable maximum size for incoming HTTP requests"""

# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
max_request_size = 200 * MAX_PDU_SIZE
max_request_size = MAX_REQUEST_SIZE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
max_request_size = MAX_REQUEST_SIZE
# Baseline default for any request that isn't configured in the homeserver config
max_request_size = MAX_REQUEST_SIZE


# if we have a media repo enabled, we may need to allow larger uploads than that
if config.media.can_load_media_repo:
Expand Down
95 changes: 78 additions & 17 deletions synapse/http/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#
#
import contextlib
import json
import logging
import time
from http import HTTPStatus
Expand All @@ -36,6 +37,7 @@
from twisted.web.resource import IResource, Resource
from twisted.web.server import Request

from synapse.api.errors import Codes
from synapse.config.server import ListenerConfig
from synapse.http import get_request_user_agent, redact_uri
from synapse.http.proxy import ProxySite
Expand Down Expand Up @@ -146,34 +148,93 @@ def __repr__(self) -> str:

# Twisted machinery: this method is called by the Channel once the full request has
# been received, to dispatch the request to a resource.
#
# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
# from an attacker uploading something bigger than the available RAM and crashing
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once we Twisted releases a fix and we update to a
# version that is patched
def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None:
# In the case of a Content-Length header being present, and it's value being too
# large, this will make debugging issues due to overly large requests much
# easier. Currently we handle such cases in `handleContentChunk` and abort the
# connection without providing a proper HTTP response.
#
# Attempting to write an HTTP response from within `handleContentChunk` does not
# work, so the code here has been added to at least provide a response in the
# case of the Content-Length header being present.
content_length_headers = self.requestHeaders.getRawHeaders(b"Content-Length")
if content_length_headers is not None:
if len(content_length_headers) != 1:
logger.warning(
"Dropping request from %s because it contains multiple Content-Length headers: %s %s",
self.client,
command.decode("ascii", errors="replace"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To align with this comment that says they are the same: #19212 (comment)

Suggested change
command.decode("ascii", errors="replace"),
self.get_method(),

self.get_redacted_uri(),
)
self.loseConnection()
Copy link
Contributor

@MadLittleMods MadLittleMods Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be nice to respond with an actual HTTP status code and a good error explaining why.

In the case of multiple Content-Length headers -> 400 Bad Request

In the case of no Content-Length header -> 411 Length Required

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, in the case of multiple headers we can do that.
In the case of no header, we can't jump to that conclusion so easily. It depends on the HTTP version. Required in HTTP 1, not required if there is a Transfer-Encoding: chunked header in the HTTP 1.1, and not required in HTTP 2.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Content-Length

We could bake in all these additional checks if you think it doesn't blow this PR up. I'm not sure if twisted already handles these cases or not.

return
Comment on lines +162 to +170
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explain why we do this.

In the SBG we say this type thing:

element-hq/sbg -> crates/matrix-sbg-module-oauth-par/src/lib.rs#L597-L624

// If there are 0 or multiple `client_id` query parameters, return
// an error. We don't want to even try to pick the right one if
// there are multiple as we could run into problems similar to
// request smuggling vulnerabilities which rely on the mismatch of
// how different systems interpret information.


try:
content_length = int(content_length_headers[0])
Comment on lines +160 to +173
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nicer if this kind of thing was a bit more contained in a block instead of separately validating and then doing a cheeky lookup again.

As an example of how we do this in Rust:
element-hq/sbg -> crates/matrix-sbg-module-oauth-par/src/lib.rs#L597-L624

It could be pulled out into a nested function here 🤷 - Since we should be using this kind of pattern wherever we're looking at headers, it could also be a nice helper utility which raises a certain exception type which can be caught here and we react as we do now here.

if content_length > self._max_request_body_size:
self.method, self.uri = command, path
self.clientproto = version
logger.warning(
"Rejecting request from %s because Content-Length %d exceeds maximum size %d: %s %s",
self.client,
content_length,
self._max_request_body_size,
self.get_method(),
self.get_redacted_uri(),
)

self.code = HTTPStatus.REQUEST_ENTITY_TOO_LARGE.value
self.code_message = bytes(
HTTPStatus.REQUEST_ENTITY_TOO_LARGE.phrase, "ascii"
)

error_response_json = {
"errcode": Codes.TOO_LARGE,
"error": "Request content is too large",
}
error_response_bytes = (json.dumps(error_response_json)).encode()

self.responseHeaders.setRawHeaders(
b"Content-Type", [b"application/json"]
)
self.responseHeaders.setRawHeaders(
b"content-length", [f"{len(error_response_bytes)}"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
b"content-length", [f"{len(error_response_bytes)}"]
b"Content-Length", [f"{len(error_response_bytes)}"]

)
self.write(error_response_bytes)
self.loseConnection()
return
except ValueError:
# Invalid Content-Length header, let it proceed
pass
Comment on lines +207 to +208
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?


# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
# from an attacker uploading something bigger than the available RAM and crashing
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once Twisted releases a fix and we update to a
# version that is patched
# See: https://github.com/element-hq/synapse/security/advisories/GHSA-rfq8-j7rh-8hf2
if command == b"POST":
ctype = self.requestHeaders.getRawHeaders(b"content-type")
if ctype and b"multipart/form-data" in ctype[0]:
self.method, self.uri = command, path
self.clientproto = version
logger.warning(
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
self.get_method(),
self.get_redacted_uri(),
)

self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value
self.code_message = bytes(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii"
)
self.responseHeaders.setRawHeaders(b"content-length", [b"0"])

logger.warning(
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
command,
path,
)
self.responseHeaders.setRawHeaders(b"content-length", [b"0"])
self.write(b"")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should at-least leave a FIXME comment here about returning a better error response like we did above

self.loseConnection()
return
Expand Down
37 changes: 37 additions & 0 deletions tests/http/test_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from twisted.internet.address import IPv6Address
from twisted.internet.testing import MemoryReactor, StringTransport

from synapse.app._base import max_request_body_size
from synapse.app.homeserver import SynapseHomeServer
from synapse.server import HomeServer
from synapse.util.clock import Clock
Expand Down Expand Up @@ -143,3 +144,39 @@ def test_content_type_multipart(self) -> None:

# we should get a 415
self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ")

def test_content_length_too_large(self) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also have a test for oversized_length content but small false Content-Length header

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what the best way to write a test for this is.
I did a bunch of testing and Synapse (Twisted) handles this by truncating the HTTP request at the specified size of the Content-Length. So the request will go through to the server correctly but the body will be silently truncated. The rest of the bytes are dropped.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(test hasn't been added yet)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a bunch of testing and Synapse (Twisted) handles this by truncating the HTTP request at the specified size of the Content-Length. So the request will go through to the server correctly but the body will be silently truncated. The rest of the bytes are dropped.

Perhaps some request with a basic JSON body that will be cut-off and we expect M_NOT_JSON 🤷

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After more testing, it seems that Synapse doesn't care if a Content-Length is present and the value is smaller than the actual size of the content. It just deals with it.
The HTTP spec isn't exactly clear what is the correct thing to do in this case. And googling didn't give great results.
This section of the spec (see point 4) could be taken to mean we should be responding with 400 Bad Request in this case.

I can make that change here (to add the assertion).

"""HTTP requests with Content-Length exceeding max size should be rejected with 413"""
self.hs.start_listening()

# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)

# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)

# Send a request with Content-Length header that exceeds the limit.
# Default max is 50MB (from media max_upload_size), so send something larger.
oversized_length = 1 + max_request_body_size(self.hs.config)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(oversized_length).encode() + b"\r\n"
b"\r\n"
b"" + b"x" * oversized_length + b"\r\n"
b"\r\n"
)

# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)

# We should get a 413 Content Too Large
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 413 ")
self.assertSubstring("M_TOO_LARGE", response)
6 changes: 0 additions & 6 deletions tests/rest/client/test_login.py
Original file line number Diff line number Diff line change
Expand Up @@ -1728,9 +1728,6 @@ def test_username_picker_use_displayname_avatar_and_email(self) -> None:
content_is_form=True,
custom_headers=[
("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
Comment on lines -1731 to -1733
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make_request already adds a Content-Length header for this same reason. These all result in duplicate headers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I'm guessing we caught this thanks to our new asserts 👏

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. What we had wasn't necessarily incorrect according to the spec (see the last point in that section)
But making the request invalid is more correct now since we weren't comparing the values between the different Content-Length headers before.

],
)
self.assertEqual(chan.code, 302, chan.result)
Expand Down Expand Up @@ -1818,9 +1815,6 @@ def test_username_picker_dont_use_displayname_avatar_or_email(self) -> None:
content_is_form=True,
custom_headers=[
("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
],
)
self.assertEqual(chan.code, 302, chan.result)
Expand Down
4 changes: 0 additions & 4 deletions tests/rest/client/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -2590,7 +2590,6 @@ def test_authenticated_media(self) -> None:
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
Expand Down Expand Up @@ -2750,7 +2749,6 @@ def test_authenticated_media_etag(self) -> None:
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
Expand Down Expand Up @@ -2909,7 +2907,6 @@ def upload_media(self, size: int) -> FakeChannel:
access_token=self.tok,
shorthand=False,
content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
)

def test_upload_under_limit(self) -> None:
Expand Down Expand Up @@ -3074,7 +3071,6 @@ def upload_media(self, size: int, tok: str) -> FakeChannel:
access_token=tok,
shorthand=False,
content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
)

def test_upload_under_limit(self) -> None:
Expand Down
2 changes: 0 additions & 2 deletions tests/rest/client/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,6 @@ def upload_media(
filename: The filename of the media to be uploaded
expect_code: The return code to expect from attempting to upload the media
"""
image_length = len(image_data)
path = "/_matrix/media/r0/upload?filename=%s" % (filename,)
channel = make_request(
self.reactor,
Expand All @@ -621,7 +620,6 @@ def upload_media(
path,
content=image_data,
access_token=tok,
custom_headers=[("Content-Length", str(image_length))],
)

assert channel.code == expect_code, "Expected: %d, got: %d, resp: %r" % (
Expand Down
10 changes: 8 additions & 2 deletions tests/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
from twisted.web.resource import IResource
from twisted.web.server import Request, Site

from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.config.database import DatabaseConnectionConfig
from synapse.config.homeserver import HomeServerConfig
from synapse.events.auto_accept_invites import InviteAutoAccepter
Expand Down Expand Up @@ -241,7 +242,6 @@ def writeSequence(self, data: Iterable[bytes]) -> None:

def loseConnection(self) -> None:
self.unregisterProducer()
self.transport.loseConnection()

# Type ignore: mypy doesn't like the fact that producer isn't an IProducer.
def registerProducer(self, producer: IProducer, streaming: bool) -> None:
Expand Down Expand Up @@ -428,7 +428,13 @@ def make_request(

channel = FakeChannel(site, reactor, ip=client_ip)

req = request(channel, site, our_server_name="test_server")
# `max_request_body_size` copied from `synapse/app/_base.py -> max_request_body_size()`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment no longer necessary as we're using MAX_REQUEST_SIZE now

req = request(
channel,
site,
our_server_name="test_server",
max_request_body_size=MAX_REQUEST_SIZE,
)
channel.request = req

req.content = BytesIO(content)
Expand Down
Loading