diff --git a/.config/nextest.toml b/.config/nextest.toml index bd35988a09a..a549d4068f2 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -9,5 +9,10 @@ filter = 'test(::run_in_isolation::)' test-group = 'run-in-isolation' threads-required = 32 +[[profile.default.overrides]] +filter = 'test(::run_in_isolation::)' +test-group = 'run-in-isolation' +threads-required = 32 + [profile.default] -slow-timeout = { period = "20s", terminate-after = 3 } +slow-timeout = { period = "10s", terminate-after = 3 } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ea4d92fef5..4d6126a9e77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -338,4 +338,4 @@ jobs: steps: - uses: actions/checkout@v5 - run: pip install --user codespell[toml] - - run: codespell --ignore-words-list=ans,atmost,crate,inout,ratatui,ser,stayin,swarmin,worl --skip=CHANGELOG.md + - run: codespell --ignore-words-list=ans,atmost,crate,inout,ratatui,ser,stayin,swarmin,worl,keep-alives --skip=CHANGELOG.md diff --git a/Cargo.lock b/Cargo.lock index d39ce4c93b2..1c00b3ccf76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,17 +16,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "aead" -version = "0.6.0-rc.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8202ab55fcbf46ca829833f347a82a2a4ce0596f0304ac322c2d100030cd56" -dependencies = [ - "bytes", - "crypto-common", - "inout", -] - [[package]] name = "ahash" version = "0.8.12" @@ -102,22 +91,22 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -253,9 +242,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "axum" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871" +checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" dependencies = [ "axum-core", "axum-macros", @@ -317,9 +306,9 @@ dependencies = [ [[package]] name = "axum-server" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "495c05f60d6df0093e8fb6e74aa5846a0ad06abaf96d76166283720bf740f8ab" +checksum = "c1ab4a3ec9ea8a657c72d99a03a824af695bd0fb5ec639ccbd9cd3543b41a5f9" dependencies = [ "arc-swap", "bytes", @@ -348,12 +337,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "base16ct" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b59d472eab27ade8d770dcb11da7201c11234bef9f82ce7aa517be028d462b" - [[package]] name = "base32" version = "0.5.1" @@ -426,12 +409,11 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.11.0-rc.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ef36a6fcdb072aa548f3da057640ec10859eb4e91ddf526ee648d50c76a949" +checksum = "96eb4cdd6cf1b31d671e9efe75c5d1ec614776856cefbe109ca373554a6d514f" dependencies = [ "hybrid-array", - "zeroize", ] [[package]] @@ -448,9 +430,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "cast" @@ -460,9 +442,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.43" +version = "1.2.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" +checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07" dependencies = [ "find-msvc-tools", "shlex", @@ -486,18 +468,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" -[[package]] -name = "chacha20" -version = "0.10.0-rc.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bd162f2b8af3e0639d83f28a637e4e55657b7a74508dba5a9bf4da523d5c9e9" -dependencies = [ - "cfg-if", - "cipher", - "cpufeatures", - "zeroize", -] - [[package]] name = "chrono" version = "0.4.42" @@ -537,23 +507,11 @@ dependencies = [ "half", ] -[[package]] -name = "cipher" -version = "0.5.0-rc.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e12a13eb01ded5d32ee9658d94f553a19e804204f2dc811df69ab4d9e0cb8c7" -dependencies = [ - "block-buffer", - "crypto-common", - "inout", - "zeroize", -] - [[package]] name = "clap" -version = "4.5.51" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ "clap_builder", "clap_derive", @@ -561,9 +519,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.51" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" dependencies = [ "anstream", "anstyle", @@ -795,39 +753,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8235645834fbc6832939736ce2f2d08192652269e11010a6240f61b908a1c6" dependencies = [ "hybrid-array", - "rand_core 0.9.3", -] - -[[package]] -name = "crypto_box" -version = "0.10.0-pre.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bda4de3e070830cf3a27a394de135b6709aefcc54d1e16f2f029271254a6ed9" -dependencies = [ - "aead", - "chacha20", - "crypto_secretbox", - "curve25519-dalek", - "salsa20", - "serdect", - "subtle", - "zeroize", -] - -[[package]] -name = "crypto_secretbox" -version = "0.2.0-pre.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54532aae6546084a52cef855593daf9555945719eeeda9974150e0def854873e" -dependencies = [ - "aead", - "chacha20", - "cipher", - "hybrid-array", - "poly1305", - "salsa20", - "subtle", - "zeroize", ] [[package]] @@ -841,7 +766,7 @@ dependencies = [ "curve25519-dalek-derive", "digest", "fiat-crypto", - "rand_core 0.9.3", + "rand_core", "rustc_version", "serde", "subtle", @@ -881,9 +806,9 @@ checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" [[package]] name = "der" -version = "0.8.0-rc.9" +version = "0.8.0-rc.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9d8dd2f26c86b27a2a8ea2767ec7f9df7a89516e4794e54ac01ee618dda3aa4" +checksum = "02c1d73e9668ea6b6a28172aa55f3ebec38507131ce179051c8033b5c6037653" dependencies = [ "const-oid", "pem-rfc7468", @@ -1038,9 +963,9 @@ checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "ed25519" -version = "3.0.0-rc.1" +version = "3.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ef49c0b20c0ad088893ad2a790a29c06a012b3f05bcfc66661fd22a94b32129" +checksum = "594435fe09e345ee388e4e8422072ff7dfeca8729389fbd997b3f5504c44cd47" dependencies = [ "pkcs8", "serde", @@ -1055,7 +980,7 @@ checksum = "ad207ed88a133091f83224265eac21109930db09bedcad05d5252f2af2de20a1" dependencies = [ "curve25519-dalek", "ed25519", - "rand_core 0.9.3", + "rand_core", "serde", "sha2", "signature", @@ -1124,6 +1049,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "fastbloom" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a" +dependencies = [ + "getrandom 0.3.4", + "libm", + "rand", + "siphasher", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -1138,9 +1075,9 @@ checksum = "64cd1e32ddd350061ae6edb1b082d7c54915b5c672c389143b9a63403a109f24" [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "flume" @@ -1159,12 +1096,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - [[package]] name = "foldhash" version = "0.2.0" @@ -1192,9 +1123,9 @@ dependencies = [ [[package]] name = "fs-err" -version = "3.1.3" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ad492b2cf1d89d568a43508ab24f98501fe03f2f31c01e1d0fe7366a71745d2" +checksum = "62d91fd049c123429b018c47887d3f75a265540dd3c30ba9cb7bae9197edb03a" dependencies = [ "autocfg", "tokio", @@ -1542,9 +1473,9 @@ dependencies = [ [[package]] name = "governor" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "444405bbb1a762387aa22dd569429533b54a1d8759d35d3b64cb39b0293eaa19" +checksum = "6e23d5986fd4364c2fb7498523540618b4b8d92eec6c36a02e565f66748e2f79" dependencies = [ "cfg-if", "dashmap", @@ -1552,12 +1483,12 @@ dependencies = [ "futures-timer", "futures-util", "getrandom 0.3.4", - "hashbrown 0.15.5", + "hashbrown 0.16.1", "nonzero_ext", "parking_lot", "portable-atomic", "quanta", - "rand 0.9.2", + "rand", "smallvec", "spinning_top", "web-time", @@ -1610,24 +1541,13 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash 0.1.5", -] - -[[package]] -name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", - "foldhash 0.2.0", + "foldhash", ] [[package]] @@ -1679,7 +1599,7 @@ dependencies = [ "idna", "ipnet", "once_cell", - "rand 0.9.2", + "rand", "ring", "rustls", "serde", @@ -1704,7 +1624,7 @@ dependencies = [ "moka", "once_cell", "parking_lot", - "rand 0.9.2", + "rand", "resolv-conf", "rustls", "serde", @@ -1823,14 +1743,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f471e0a81b2f90ffc0cb2f951ae04da57de8baa46fa99112b062a5173a5088d0" dependencies = [ "typenum", - "zeroize", ] [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -1868,9 +1787,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56" dependencies = [ "base64", "bytes", @@ -1884,7 +1803,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -1995,6 +1914,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "identity-hash" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfdd7caa900436d8f13b2346fe10257e0c05c1f1f9e351f4f5d57c03bd5f45da" + [[package]] name = "idna" version = "1.1.0" @@ -2031,7 +1956,7 @@ dependencies = [ "hyper", "hyper-util", "log", - "rand 0.9.2", + "rand", "tokio", "url", "xmltree", @@ -2039,19 +1964,19 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", ] [[package]] name = "indicatif" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" dependencies = [ "console", "portable-atomic", @@ -2061,15 +1986,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "inout" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7357b6e7aa75618c7864ebd0634b115a7218b0615f4cb1df33ac3eca23943d4" -dependencies = [ - "hybrid-array", -] - [[package]] name = "instant" version = "0.1.13" @@ -2105,9 +2021,9 @@ dependencies = [ [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -2117,14 +2033,12 @@ dependencies = [ name = "iroh" version = "0.95.1" dependencies = [ - "aead", "axum", "backon", "bytes", "cfg_aliases", "clap", "console_error_panic_hook", - "crypto_box", "data-encoding", "derive_more 2.0.1", "ed25519-dalek", @@ -2153,18 +2067,20 @@ dependencies = [ "portmapper", "postcard", "pretty_assertions", - "rand 0.9.2", - "rand_chacha 0.9.0", + "rand", + "rand_chacha", "reqwest", + "rustc-hash", "rustls", "rustls-pki-types", - "rustls-platform-verifier", + "rustls-platform-verifier 0.5.3", "rustls-webpki", "serde", "serde_json", "smallvec", "strum", "swarm-discovery", + "sync_wrapper", "time", "tokio", "tokio-stream", @@ -2191,9 +2107,9 @@ dependencies = [ "n0-error", "postcard", "proptest", - "rand 0.9.2", - "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand", + "rand_chacha", + "rand_core", "serde", "serde_json", "serde_test", @@ -2214,7 +2130,7 @@ dependencies = [ "iroh-quinn", "n0-error", "n0-future", - "rand 0.9.2", + "rand", "rcgen", "rustls", "tokio", @@ -2248,8 +2164,8 @@ dependencies = [ "n0-error", "n0-future", "pkarr", - "rand 0.9.2", - "rand_chacha 0.9.0", + "rand", + "rand_chacha", "rcgen", "redb", "regex", @@ -2309,8 +2225,7 @@ dependencies = [ [[package]] name = "iroh-quinn" version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde160ebee7aabede6ae887460cd303c8b809054224815addf1469d54a6fcf7" +source = "git+https://github.com/n0-computer/quinn?branch=main-iroh#238057833af99ad7ae123dd9144436ee37a2d1e7" dependencies = [ "bytes", "cfg_aliases", @@ -2322,6 +2237,7 @@ dependencies = [ "socket2 0.5.10", "thiserror 2.0.17", "tokio", + "tokio-stream", "tracing", "web-time", ] @@ -2329,17 +2245,19 @@ dependencies = [ [[package]] name = "iroh-quinn-proto" version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "929d5d8fa77d5c304d3ee7cae9aede31f13908bd049f9de8c7c0094ad6f7c535" +source = "git+https://github.com/n0-computer/quinn?branch=main-iroh#238057833af99ad7ae123dd9144436ee37a2d1e7" dependencies = [ "bytes", - "getrandom 0.2.16", - "rand 0.8.5", + "fastbloom", + "getrandom 0.3.4", + "identity-hash", + "lru-slab", + "rand", "ring", "rustc-hash", "rustls", "rustls-pki-types", - "rustls-platform-verifier", + "rustls-platform-verifier 0.6.2", "slab", "thiserror 2.0.17", "tinyvec", @@ -2349,16 +2267,14 @@ dependencies = [ [[package]] name = "iroh-quinn-udp" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53afaa1049f7c83ea1331f5ebb9e6ebc5fdd69c468b7a22dd598b02c9bcc973" +version = "0.6.0" +source = "git+https://github.com/n0-computer/quinn?branch=main-iroh#238057833af99ad7ae123dd9144436ee37a2d1e7" dependencies = [ "cfg_aliases", "libc", - "once_cell", "socket2 0.5.10", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2370,7 +2286,6 @@ dependencies = [ "bytes", "cfg_aliases", "clap", - "crypto_box", "dashmap", "data-encoding", "derive_more 2.0.1", @@ -2392,8 +2307,8 @@ dependencies = [ "pkarr", "postcard", "proptest", - "rand 0.9.2", - "rand_chacha 0.9.0", + "rand", + "rand_chacha", "rcgen", "reloadable-state", "reqwest", @@ -2469,9 +2384,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -2489,6 +2404,12 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + [[package]] name = "libredox" version = "0.1.10" @@ -2563,7 +2484,7 @@ version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f" dependencies = [ - "hashbrown 0.16.0", + "hashbrown 0.16.1", ] [[package]] @@ -2572,11 +2493,17 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac-addr" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d25b0e0b648a86960ac23b7ad4abb9717601dec6f66c165f5b037f3f03065f" + [[package]] name = "mainline" -version = "6.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be6c12ff79bfbf65bcbec84882a4bf700177df6d83a7b866c6a01cda7db4777" +checksum = "6ff27d378ca495eaf3be8616d5d7319c1c18e93fd60e13698fcdc7e19448f1a4" dependencies = [ "crc", "document-features", @@ -2668,22 +2595,20 @@ dependencies = [ [[package]] name = "n0-error" -version = "0.1.0" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a4839a11b62f1fdd75be912ee20634053c734c2240e867ded41c7f50822c549" +checksum = "c7d5969a2f40e9d9ed121a789c415f4114ac2b28e5731c080bdefee217d3b3fb" dependencies = [ - "derive_more 2.0.1", "n0-error-macros", "spez", ] [[package]] name = "n0-error-macros" -version = "0.1.0" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed2a7e5ca3cb5729d4a162d7bcab5b338bed299a2fee8457568d7e0a747ed89" +checksum = "9a6908df844696d9af91c7c3950d50e52d67df327d02a95367f95bbf177d6556" dependencies = [ - "heck", "proc-macro2", "quote", "syn", @@ -2691,9 +2616,9 @@ dependencies = [ [[package]] name = "n0-future" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439e746b307c1fd0c08771c3cafcd1746c3ccdb0d9c7b859d3caded366b6da76" +checksum = "8c0709ac8235ce13b82bc4d180ee3c42364b90c1a8a628c3422d991d75a728b5" dependencies = [ "cfg_aliases", "derive_more 1.0.0", @@ -2712,9 +2637,9 @@ dependencies = [ [[package]] name = "n0-watcher" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38acf13c1ddafc60eb7316d52213467f8ccb70b6f02b65e7d97f7799b1f50be4" +checksum = "ba717c22ceec021ace0ff7674bf8fd60c9394605740a8201678fc1cb3a7398f6" dependencies = [ "derive_more 2.0.1", "n0-error", @@ -2723,13 +2648,14 @@ dependencies = [ [[package]] name = "netdev" -version = "0.38.2" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ab878b4c90faf36dab10ea51d48c69ae9019bcca47c048a7c9b273d5d7a823" +checksum = "35a703aa1a87cd885b9f674922445a42dbb0c0f4f1b28fef21b227ae32375d21" dependencies = [ "dlopen2", "ipnet", "libc", + "mac-addr", "netlink-packet-core", "netlink-packet-route", "netlink-sys", @@ -2789,8 +2715,7 @@ dependencies = [ [[package]] name = "netwatch" version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26f2acd376ef48b6c326abf3ba23c449e0cb8aa5c2511d189dd8a8a3bfac889b" +source = "git+https://github.com/n0-computer/net-tools?branch=quinn-udp-git#7fc6d4483b449739e0e7a630022afba7e9b95c55" dependencies = [ "atomic-waker", "bytes", @@ -2898,6 +2823,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -3021,9 +2947,9 @@ dependencies = [ [[package]] name = "pem-rfc7468" -version = "1.0.0-rc.3" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e58fab693c712c0d4e88f8eb3087b6521d060bcaf76aeb20cb192d809115ba" +checksum = "a6305423e0e7738146434843d1694d621cce767262b2a86910beab705e4493d9" dependencies = [ "base64ct", ] @@ -3110,9 +3036,9 @@ dependencies = [ [[package]] name = "pkcs8" -version = "0.11.0-rc.7" +version = "0.11.0-rc.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93eac55f10aceed84769df670ea4a32d2ffad7399400d41ee1c13b1cd8e1b478" +checksum = "77089aec8290d0b7bb01b671b091095cf1937670725af4fd73d47249f03b12c0" dependencies = [ "der", "spki", @@ -3146,16 +3072,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "poly1305" -version = "0.9.0-rc.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb78a635f75d76d856374961deecf61031c0b6f928c83dc9c0924ab6c019c298" -dependencies = [ - "cpufeatures", - "universal-hash", -] - [[package]] name = "portable-atomic" version = "1.11.1" @@ -3165,8 +3081,7 @@ checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "portmapper" version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b575f975dcf03e258b0c7ab3f81497d7124f508884c37da66a7314aa2a8d467" +source = "git+https://github.com/n0-computer/net-tools?branch=quinn-udp-git#7fc6d4483b449739e0e7a630022afba7e9b95c55" dependencies = [ "base64", "bytes", @@ -3180,7 +3095,7 @@ dependencies = [ "n0-error", "netwatch", "num_enum", - "rand 0.9.2", + "rand", "serde", "smallvec", "socket2 0.6.1", @@ -3289,8 +3204,8 @@ dependencies = [ "bit-vec", "bitflags", "num-traits", - "rand 0.9.2", - "rand_chacha 0.9.0", + "rand", + "rand_chacha", "rand_xorshift", "regex-syntax", "rusty-fork", @@ -3332,7 +3247,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.6.1", + "socket2 0.5.10", "thiserror 2.0.17", "tokio", "tracing", @@ -3348,7 +3263,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand", "ring", "rustc-hash", "rustls", @@ -3369,16 +3284,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.5.10", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -3389,35 +3304,14 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", + "rand_chacha", + "rand_core", ] [[package]] @@ -3427,16 +3321,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", + "rand_core", ] [[package]] @@ -3454,7 +3339,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" dependencies = [ - "rand_core 0.9.3", + "rand_core", ] [[package]] @@ -3618,9 +3503,9 @@ dependencies = [ [[package]] name = "resolv-conf" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b3789b30bd25ba102de4beabd95d21ac45b69b1be7d14522bab988c526d6799" +checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" [[package]] name = "ring" @@ -3675,9 +3560,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "log", "once_cell", @@ -3775,6 +3660,27 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs 1.0.4", + "windows-sys 0.61.2", +] + [[package]] name = "rustls-platform-verifier-android" version = "0.1.1" @@ -3816,16 +3722,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "salsa20" -version = "0.11.0-rc.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3ff3b81c8a6e381bc1673768141383f9328048a60edddcfc752a8291a138443" -dependencies = [ - "cfg-if", - "cipher", -] - [[package]] name = "same-file" version = "1.0.6" @@ -4012,16 +3908,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serdect" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3ef0e35b322ddfaecbc60f34ab448e157e48531288ee49fafbb053696b8ffe2" -dependencies = [ - "base16ct", - "serde", -] - [[package]] name = "sha1" version = "0.11.0-rc.2" @@ -4067,18 +3953,18 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] [[package]] name = "signature" -version = "3.0.0-rc.4" +version = "3.0.0-rc.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc280a6ff65c79fbd6622f64d7127f32b85563bca8c53cd2e9141d6744a9056d" +checksum = "2a0251c9d6468f4ba853b6352b190fb7c1e405087779917c238445eb03993826" [[package]] name = "simdutf8" @@ -4095,6 +3981,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.11" @@ -4254,7 +4146,7 @@ checksum = "790d8444f7db1e88f70aed3234cab8e42c48e05360bfc86ca7dce0d9a5d95d26" dependencies = [ "acto", "hickory-proto", - "rand 0.9.2", + "rand", "socket2 0.5.10", "thiserror 2.0.17", "tokio", @@ -4263,9 +4155,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.108" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -4531,9 +4423,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -4556,7 +4448,7 @@ dependencies = [ "getrandom 0.3.4", "http 1.3.1", "httparse", - "rand 0.9.2", + "rand", "ring", "rustls-pki-types", "simdutf8", @@ -4821,19 +4713,9 @@ checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "unit-prefix" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" - -[[package]] -name = "universal-hash" -version = "0.6.0-rc.2" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55be643b40a21558f44806b53ee9319595bc7ca6896372e4e08e5d7d83c9cd6" -dependencies = [ - "crypto-common", - "subtle", -] +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" [[package]] name = "untrusted" @@ -4933,9 +4815,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -4946,9 +4828,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -4959,9 +4841,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4969,9 +4851,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", @@ -4982,21 +4864,29 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373" +checksum = "25e90e66d265d3a1efc0e72a54809ab90b9c0c515915c67cdf658689d2c22c6c" dependencies = [ + "async-trait", + "cast", "js-sys", + "libm", "minicov", + "nu-ansi-term", + "num-traits", + "oorandom", + "serde", + "serde_json", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", @@ -5004,9 +4894,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" +checksum = "7150335716dce6028bead2b848e72f47b45e7b9422f64cccdc23bedca89affc1" dependencies = [ "proc-macro2", "quote", @@ -5028,9 +4918,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -5052,23 +4942,23 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75c7f0ef91146ebfb530314f5f1d24528d7f0767efbfd31dce919275413e393e" dependencies = [ - "webpki-root-certs 1.0.3", + "webpki-root-certs 1.0.4", ] [[package]] name = "webpki-root-certs" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d651ec480de84b762e7be71e6efa7461699c19d9e2c272c8d93455f567786e" +checksum = "ee3e3b5f5e80bc89f30ce8d0343bf4e5f12341c51f3e26cbeecbc7c85443e85b" dependencies = [ "rustls-pki-types", ] [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ "rustls-pki-types", ] @@ -5101,7 +4991,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -5750,18 +5640,18 @@ checksum = "2164e798d9e3d84ee2c91139ace54638059a3b23e361f5c11781c2c6459bde0f" [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "43fa6694ed34d6e57407afbccdeecfa268c470a7d2a5b0cf49ce9fcc345afb90" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "c640b22cd9817fae95be82f0d2f90b11f7605f6c319d16705c459b27ac2cbc26" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 0d0681861f5..656cf126d28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,3 +40,21 @@ unexpected_cfgs = { level = "warn", check-cfg = ["cfg(iroh_docsrs)", "cfg(iroh_l [workspace.lints.clippy] unused-async = "warn" + + +[patch.crates-io] +iroh-quinn = { git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } +iroh-quinn-proto = { git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } +iroh-quinn-udp = { git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } + +netwatch = { git = "https://github.com/n0-computer/net-tools", branch = "quinn-udp-git" } +portmapper = { git = "https://github.com/n0-computer/net-tools", branch = "quinn-udp-git" } + +# iroh-quinn = { path = "../quinn/quinn" } +# iroh-quinn-proto = { path = "../quinn/quinn-proto" } +# iroh-quinn-udp = { path = "../quinn/quinn-udp" } + +# [patch."https://github.com/n0-computer/quinn"] +# iroh-quinn = { path = "../quinn/quinn" } +# iroh-quinn-proto = { path = "../quinn/quinn-proto" } +# iroh-quinn-udp = { path = "../quinn/quinn-udp" } diff --git a/deny.toml b/deny.toml index cc23a1d1440..1e96919b93e 100644 --- a/deny.toml +++ b/deny.toml @@ -27,4 +27,7 @@ ignore = [ ] [sources] -allow-git = [] +allow-git = [ + "https://github.com/n0-computer/quinn", + "https://github.com/n0-computer/net-tools" +] diff --git a/iroh-base/src/endpoint_addr.rs b/iroh-base/src/endpoint_addr.rs index 342f472ec41..76548518dd2 100644 --- a/iroh-base/src/endpoint_addr.rs +++ b/iroh-base/src/endpoint_addr.rs @@ -54,6 +54,18 @@ pub enum TransportAddr { Ip(SocketAddr), } +impl TransportAddr { + /// Whether this is a transport address via a relay server. + pub fn is_relay(&self) -> bool { + matches!(self, Self::Relay(_)) + } + + /// Whether this is an IP transport address. + pub fn is_ip(&self) -> bool { + matches!(self, Self::Ip(_)) + } +} + impl EndpointAddr { /// Creates a new [`EndpointAddr`] with no network level addresses. /// diff --git a/iroh-relay/Cargo.toml b/iroh-relay/Cargo.toml index 17428cbffbf..7a88b7539d2 100644 --- a/iroh-relay/Cargo.toml +++ b/iroh-relay/Cargo.toml @@ -42,8 +42,8 @@ postcard = { version = "1", default-features = false, features = [ "use-std", "experimental-derive", ] } -quinn = { package = "iroh-quinn", version = "0.14.0", default-features = false, features = ["rustls-ring"] } -quinn-proto = { package = "iroh-quinn-proto", version = "0.13.0" } +quinn = { package = "iroh-quinn", git = "https://github.com/n0-computer/quinn", branch = "main-iroh", default-features = false, features = ["rustls-ring"] } +quinn-proto = { package = "iroh-quinn-proto", git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } rand = "0.9.2" reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", @@ -61,7 +61,6 @@ tokio-rustls = { version = "0.26", default-features = false, features = [ "logging", "ring", ] } -sha1 = "0.11.0-rc.2" tokio-util = { version = "0.7", features = ["io-util", "io", "codec", "rt"] } tracing = "0.1" url = { version = "2.5.3", features = ["serde"] } @@ -85,6 +84,7 @@ time = { version = "0.3.37", optional = true } tokio-rustls-acme = { version = "0.8", optional = true } tokio-websockets = { version = "0.12", features = ["rustls-bring-your-own-connector", "ring", "getrandom", "rand", "server"], optional = true } # server-side websocket implementation simdutf8 = { version = "0.1.5", optional = true } # minimal version fix +sha1 = { version = "0.11.0-rc.2", optional = true } toml = { version = "0.9", optional = true } tracing-subscriber = { version = "0.3", features = [ "env-filter", @@ -115,7 +115,6 @@ getrandom = { version = "0.3.2", features = ["wasm_js"] } [dev-dependencies] clap = { version = "4", features = ["derive"] } -crypto_box = { version = "0.10.0-pre.0", features = ["serde", "chacha20"] } proptest = "1.2.0" rand_chacha = "0.9" tokio = { version = "1", features = [ @@ -151,9 +150,9 @@ server = [ "dep:tokio-rustls-acme", "dep:tokio-websockets", "dep:simdutf8", + "dep:sha1", "dep:toml", "dep:tracing-subscriber", - "quinn/log", "quinn/platform-verifier", "quinn/runtime-tokio", "iroh-metrics/service", diff --git a/iroh-relay/src/client.rs b/iroh-relay/src/client.rs index 3bba68cdb41..1a276d27091 100644 --- a/iroh-relay/src/client.rs +++ b/iroh-relay/src/client.rs @@ -288,7 +288,7 @@ impl ClientBuilder { let conn = Conn::new(conn, self.key_cache.clone(), &self.secret_key).await?; event!( - target: "events.net.relay.connected", + target: "iroh::_events::net::relay::connected", Level::DEBUG, url = %self.url, ); @@ -343,7 +343,7 @@ impl ClientBuilder { let conn = Conn::new(ws_stream, self.key_cache.clone(), &self.secret_key).await?; event!( - target: "events.net.relay.connected", + target: "iroh::_events::net::relay::connected", Level::DEBUG, url = %self.url, ); diff --git a/iroh-relay/src/client/conn.rs b/iroh-relay/src/client/conn.rs index a361f8ac967..542980adbe1 100644 --- a/iroh-relay/src/client/conn.rs +++ b/iroh-relay/src/client/conn.rs @@ -10,7 +10,7 @@ use std::{ use iroh_base::SecretKey; use n0_error::{ensure, stack_error}; use n0_future::{Sink, Stream}; -use tracing::debug; +use tracing::trace; use super::KeyCache; #[cfg(not(wasm_browser))] @@ -88,9 +88,9 @@ impl Conn { let mut conn = WsBytesFramed { io }; // exchange information with the server - debug!("server_handshake: started"); + trace!("server_handshake: started"); handshake::clientside(&mut conn, secret_key).await?; - debug!("server_handshake: done"); + trace!("server_handshake: done"); Ok(Self { conn, key_cache }) } diff --git a/iroh-relay/src/client/tls.rs b/iroh-relay/src/client/tls.rs index b5c585c4685..b2629cbd6f4 100644 --- a/iroh-relay/src/client/tls.rs +++ b/iroh-relay/src/client/tls.rs @@ -138,7 +138,6 @@ impl MaybeTlsStreamBuilder { async fn dial_url_direct(&self) -> Result { use tokio::net::TcpStream; - debug!(%self.url, "dial url"); let dst_ip = self .dns_resolver .resolve_host(&self.url, self.prefer_ipv6, DNS_TIMEOUT) @@ -147,7 +146,7 @@ impl MaybeTlsStreamBuilder { let port = url_port(&self.url).ok_or_else(|| e!(DialError::InvalidTargetPort))?; let addr = SocketAddr::new(dst_ip, port); - debug!("connecting to {}", addr); + trace!("connecting to {}", addr); let tcp_stream = time::timeout(DIAL_ENDPOINT_TIMEOUT, async move { TcpStream::connect(addr).await }) diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 8530b39a20d..1ffcaa59130 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -21,37 +21,29 @@ crate-type = ["lib", "cdylib"] workspace = true [dependencies] -aead = { version = "=0.6.0-rc.2", features = ["bytes"] } backon = { version = "1.4" } bytes = "1.7" -crypto_box = { version = "0.10.0-pre.0", features = ["serde", "chacha20"] } data-encoding = "2.2" -derive_more = { version = "2.0.1", features = [ - "debug", - "display", - "from", - "try_into", - "deref", - "from_str" -] } +derive_more = { version = "2.0.1", features = ["debug", "display", "from", "try_into", "deref", "from_str", "into_iterator"] } ed25519-dalek = { version = "3.0.0-pre.1", features = ["serde", "rand_core", "zeroize", "pkcs8", "pem"] } http = "1" iroh-base = { version = "0.95.1", default-features = false, features = ["key", "relay"], path = "../iroh-base" } iroh-relay = { version = "0.95", path = "../iroh-relay", default-features = false } n0-future = "0.3.0" n0-error = "0.1.0" -n0-watcher = "0.5" +n0-watcher = "0.6" netwatch = { version = "0.12" } pin-project = "1" pkarr = { version = "5", default-features = false, features = ["relays"] } -quinn = { package = "iroh-quinn", version = "0.14.0", default-features = false, features = ["rustls-ring"] } -quinn-proto = { package = "iroh-quinn-proto", version = "0.13.0" } -quinn-udp = { package = "iroh-quinn-udp", version = "0.5.7" } +quinn = { package = "iroh-quinn", git = "https://github.com/n0-computer/quinn", branch = "main-iroh", default-features = false, features = ["rustls-ring"] } +quinn-proto = { package = "iroh-quinn-proto", git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } +quinn-udp = { package = "iroh-quinn-udp", git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } rand = "0.9.2" reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", "stream", ] } +rustc-hash = "2" rustls = { version = "0.23.33", default-features = false, features = ["ring"] } serde = { version = "1.0.219", features = ["derive", "rc"] } smallvec = "1.11.1" @@ -82,14 +74,15 @@ futures-util = "0.3" # test_utils axum = { version = "0.8", optional = true } +sync_wrapper = { version = "1.0.2", features = ["futures"] } # non-wasm-in-browser dependencies [target.'cfg(not(all(target_family = "wasm", target_os = "unknown")))'.dependencies] hickory-resolver = "0.25.1" igd-next = { version = "0.16", features = ["aio_tokio"] } -netdev = { version = "0.38.1" } +netdev = { version = "0.39.0" } portmapper = { version = "0.12", default-features = false } -quinn = { package = "iroh-quinn", version = "0.14.0", default-features = false, features = ["runtime-tokio", "rustls-ring"] } +quinn = { package = "iroh-quinn", git = "https://github.com/n0-computer/quinn", branch = "main-iroh", default-features = false, features = ["runtime-tokio", "rustls-ring"] } tokio = { version = "1", features = [ "io-util", "macros", @@ -135,6 +128,7 @@ tokio = { version = "1", features = [ serde_json = "1" iroh-relay = { path = "../iroh-relay", default-features = false, features = ["test-utils", "server"] } tracing-test = "0.2.5" +# tracing-test = { git = "https://github.com/Frando/tracing-test", branch = "feat/color-and-filter-on-cli", features = ["pretty-log-printing"] } clap = { version = "4", features = ["derive"] } tracing-subscriber = { version = "0.3", features = [ "env-filter", @@ -152,11 +146,13 @@ wasm-bindgen-test = "0.3" cfg_aliases = { version = "0.2.1" } [features] -default = ["metrics"] +default = ["metrics", "fast-apple-datapath"] metrics = ["iroh-metrics/metrics", "iroh-relay/metrics", "portmapper/metrics"] test-utils = ["iroh-relay/test-utils", "iroh-relay/server", "dep:axum"] discovery-local-network = ["dep:swarm-discovery"] discovery-pkarr-dht = ["pkarr/dht"] +# Use private Apple APIs to send multiple packets in a single syscall. +fast-apple-datapath = ["quinn/fast-apple-datapath"] [package.metadata.docs.rs] all-features = true diff --git a/iroh/bench/Cargo.toml b/iroh/bench/Cargo.toml index 6bcaa0e17a1..92796c9cc37 100644 --- a/iroh/bench/Cargo.toml +++ b/iroh/bench/Cargo.toml @@ -8,11 +8,11 @@ publish = false [dependencies] bytes = "1.7" hdrhistogram = { version = "7.2", default-features = false } -iroh = { path = ".." } -iroh-metrics = "0.37" +iroh = { path = "..", default-features = false } +iroh-metrics = { version = "0.37", optional = true } n0-future = "0.3.0" n0-error = "0.1.0" -quinn = { package = "iroh-quinn", version = "0.14" } +quinn = { package = "iroh-quinn", git = "https://github.com/n0-computer/quinn", branch = "main-iroh" } rand = "0.9.2" rcgen = "0.14" rustls = { version = "0.23.33", default-features = false, features = ["ring"] } @@ -28,5 +28,8 @@ tracing-subscriber = { version = "0.3.0", default-features = false, features = [ ] } [features] -default = [] +default = ["metrics", "fast-apple-datapath"] +metrics = ["iroh/metrics", "iroh-metrics"] local-relay = ["iroh/test-utils"] +# Use private Apple APIs to send multiple packets in a single syscall. +fast-apple-datapath = ["iroh/fast-apple-datapath", "quinn/fast-apple-datapath"] diff --git a/iroh/bench/src/bin/bulk.rs b/iroh/bench/src/bin/bulk.rs index b38d87e02f3..acc0cf8ce40 100644 --- a/iroh/bench/src/bin/bulk.rs +++ b/iroh/bench/src/bin/bulk.rs @@ -1,9 +1,11 @@ +#[cfg(feature = "metrics")] use std::collections::BTreeMap; use clap::Parser; #[cfg(not(any(target_os = "freebsd", target_os = "openbsd", target_os = "netbsd")))] use iroh_bench::quinn; use iroh_bench::{Commands, Opt, configure_tracing_subscriber, iroh, rt, s2n}; +#[cfg(feature = "metrics")] use iroh_metrics::{MetricValue, MetricsGroup}; use n0_error::Result; @@ -52,6 +54,7 @@ pub fn run_iroh(opt: Opt) -> Result<()> { iroh::server_endpoint(&runtime, &relay_url, &opt) }; + #[cfg(feature = "metrics")] let endpoint_metrics = endpoint.metrics().clone(); let server_thread = std::thread::spawn(move || { @@ -86,6 +89,7 @@ pub fn run_iroh(opt: Opt) -> Result<()> { } } + #[cfg(feature = "metrics")] if opt.metrics { // print metrics println!("\nMetrics:"); @@ -158,6 +162,7 @@ pub fn run_s2n(_opt: s2n::Opt) -> Result<()> { unimplemented!() } +#[cfg(feature = "metrics")] fn collect_and_print(category: &'static str, metrics: &dyn MetricsGroup) { let mut map = BTreeMap::new(); for item in metrics.iter() { diff --git a/iroh/bench/src/iroh.rs b/iroh/bench/src/iroh.rs index 2f1430873ee..8ab164e980e 100644 --- a/iroh/bench/src/iroh.rs +++ b/iroh/bench/src/iroh.rs @@ -6,7 +6,7 @@ use std::{ use bytes::Bytes; use iroh::{ Endpoint, EndpointAddr, RelayMode, RelayUrl, - endpoint::{Connection, ConnectionError, RecvStream, SendStream, TransportConfig}, + endpoint::{Connection, ConnectionError, QuinnTransportConfig, RecvStream, SendStream}, }; use n0_error::{Result, StackResultExt, StdResultExt}; use tracing::{trace, warn}; @@ -34,11 +34,9 @@ pub fn server_endpoint( #[cfg(feature = "local-relay")] { builder = builder.insecure_skip_relay_cert_verify(relay_url.is_some()); - let path_selection = match opt.only_relay { - true => iroh::endpoint::PathSelection::RelayOnly, - false => iroh::endpoint::PathSelection::default(), - }; - builder = builder.path_selection(path_selection); + if opt.only_relay { + builder = builder.clear_ip_transports(); + } } let ep = builder .alpns(vec![ALPN.to_vec()]) @@ -95,11 +93,9 @@ pub async fn connect_client( #[cfg(feature = "local-relay")] { builder = builder.insecure_skip_relay_cert_verify(relay_url.is_some()); - let path_selection = match opt.only_relay { - true => iroh::endpoint::PathSelection::RelayOnly, - false => iroh::endpoint::PathSelection::default(), - }; - builder = builder.path_selection(path_selection); + if opt.only_relay { + builder = builder.clear_ip_transports(); + } } let endpoint = builder .alpns(vec![ALPN.to_vec()]) @@ -126,17 +122,16 @@ pub async fn connect_client( Ok((endpoint, connection)) } -pub fn transport_config(max_streams: usize, initial_mtu: u16) -> TransportConfig { +pub fn transport_config(max_streams: usize, initial_mtu: u16) -> QuinnTransportConfig { // High stream windows are chosen because the amount of concurrent streams // is configurable as a parameter. - let mut config = TransportConfig::default(); + let mut config = QuinnTransportConfig::default(); config.max_concurrent_uni_streams(max_streams.try_into().unwrap()); config.initial_mtu(initial_mtu); - // TODO: re-enable when we upgrade quinn version - // let mut acks = quinn::AckFrequencyConfig::default(); - // acks.ack_eliciting_threshold(10u32.into()); - // config.ack_frequency_config(Some(acks)); + let mut acks = quinn::AckFrequencyConfig::default(); + acks.ack_eliciting_threshold(10u32.into()); + config.ack_frequency_config(Some(acks)); config } diff --git a/iroh/bench/src/lib.rs b/iroh/bench/src/lib.rs index ebbc5d8176e..ed14be45208 100644 --- a/iroh/bench/src/lib.rs +++ b/iroh/bench/src/lib.rs @@ -74,6 +74,8 @@ pub struct Opt { #[cfg(feature = "local-relay")] #[clap(long, default_value_t = false)] pub only_relay: bool, + #[clap(long, default_value_t = false)] + pub use_ipv6: bool, } pub enum EndpointSelector { diff --git a/iroh/bench/src/quinn.rs b/iroh/bench/src/quinn.rs index cd97729ecd5..01aed0bf9c6 100644 --- a/iroh/bench/src/quinn.rs +++ b/iroh/bench/src/quinn.rs @@ -1,5 +1,5 @@ use std::{ - net::{IpAddr, Ipv4Addr, SocketAddr}, + net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}, sync::Arc, time::{Duration, Instant}, }; @@ -32,13 +32,15 @@ pub fn server_endpoint( let mut server_config = quinn::ServerConfig::with_single_cert(cert_chain, key).unwrap(); server_config.transport = Arc::new(transport_config(opt.max_streams, opt.initial_mtu)); + let addr = if opt.use_ipv6 { + IpAddr::V6(Ipv6Addr::LOCALHOST) + } else { + IpAddr::V4(Ipv4Addr::LOCALHOST) + }; + let endpoint = { let _guard = rt.enter(); - quinn::Endpoint::server( - server_config, - SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0), - ) - .unwrap() + quinn::Endpoint::server(server_config, SocketAddr::new(addr, 0)).unwrap() }; let server_addr = endpoint.local_addr().unwrap(); (server_addr, endpoint) @@ -69,8 +71,13 @@ pub async fn connect_client( server_cert: CertificateDer<'_>, opt: Opt, ) -> Result<(::quinn::Endpoint, Connection)> { - let endpoint = - quinn::Endpoint::client(SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0)).unwrap(); + let addr = if opt.use_ipv6 { + IpAddr::V6(Ipv6Addr::LOCALHOST) + } else { + IpAddr::V4(Ipv4Addr::LOCALHOST) + }; + + let endpoint = quinn::Endpoint::client(SocketAddr::new(addr, 0)).unwrap(); let mut roots = RootCertStore::empty(); roots.add(server_cert).anyerr()?; @@ -103,11 +110,11 @@ pub fn transport_config(max_streams: usize, initial_mtu: u16) -> TransportConfig let mut config = TransportConfig::default(); config.max_concurrent_uni_streams(max_streams.try_into().unwrap()); config.initial_mtu(initial_mtu); + config.max_concurrent_multipath_paths(16); - // TODO: re-enable when we upgrade quinn version - // let mut acks = quinn::AckFrequencyConfig::default(); - // acks.ack_eliciting_threshold(10u32.into()); - // config.ack_frequency_config(Some(acks)); + let mut acks = quinn::AckFrequencyConfig::default(); + acks.ack_eliciting_threshold(10u32.into()); + config.ack_frequency_config(Some(acks)); config } diff --git a/iroh/examples/auth-hook.rs b/iroh/examples/auth-hook.rs new file mode 100644 index 00000000000..3201f03d53d --- /dev/null +++ b/iroh/examples/auth-hook.rs @@ -0,0 +1,348 @@ +//! Implementation of authentication using iroh hooks +//! +//! This implements an auth protocol that works with iroh hooks. +//! It allows to put authentication in front of iroh protocols. The protocols don't need any special support. +//! Authentication is handled prior to establishing the connections, over a separate connection. + +use iroh::{Endpoint, EndpointAddr, protocol::Router}; +use n0_error::{Result, StdResultExt}; + +use crate::echo::Echo; + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + let server_router = accept_side(b"secret!!").await?; + server_router.endpoint().online().await; + let server_addr = server_router.endpoint().addr(); + + println!("-- no --"); + let res = connect_side_no_auth(server_addr.clone()).await; + println!("echo without auth: {:#}", res.unwrap_err()); + + println!("-- wrong --"); + let res = connect_side(server_addr.clone(), b"dunno").await; + println!("echo with wrong auth: {:#}", res.unwrap_err()); + + println!("-- correct --"); + let res = connect_side(server_addr.clone(), b"secret!!").await; + println!("echo with correct auth: {res:?}"); + + server_router.shutdown().await.anyerr()?; + + Ok(()) +} + +async fn connect_side(remote_addr: EndpointAddr, token: &[u8]) -> Result<()> { + let (auth_hook, auth_task) = auth::outgoing(token.to_vec()); + let endpoint = Endpoint::builder().hooks(auth_hook).bind().await?; + let _guard = auth_task.spawn(endpoint.clone()); + Echo::connect(&endpoint, remote_addr, b"hello there!").await +} + +async fn connect_side_no_auth(remote_addr: EndpointAddr) -> Result<()> { + let endpoint = Endpoint::bind().await?; + Echo::connect(&endpoint, remote_addr, b"hello there!").await +} + +async fn accept_side(token: &[u8]) -> Result { + let (auth_hook, auth_protocol) = auth::incoming(token.to_vec()); + let endpoint = Endpoint::builder().hooks(auth_hook).bind().await?; + + let router = Router::builder(endpoint) + .accept(auth::ALPN, auth_protocol) + .accept(echo::ALPN, Echo) + .spawn(); + + Ok(router) +} + +mod echo { + //! A bare-bones protocol with no knowledge of auth whatsoever. + + use iroh::{ + Endpoint, EndpointAddr, + endpoint::Connection, + protocol::{AcceptError, ProtocolHandler}, + }; + use n0_error::{Result, StdResultExt, anyerr}; + + #[derive(Debug, Clone)] + pub struct Echo; + + pub const ALPN: &[u8] = b"iroh-example/echo/0"; + + impl Echo { + pub async fn connect( + endpoint: &Endpoint, + remote: impl Into, + message: &[u8], + ) -> Result<()> { + let conn = endpoint.connect(remote, ALPN).await?; + let (mut send, mut recv) = conn.open_bi().await.anyerr()?; + send.write_all(message).await.anyerr()?; + send.finish().anyerr()?; + let response = recv.read_to_end(1000).await.anyerr()?; + conn.close(0u32.into(), b"bye!"); + if response == message { + Ok(()) + } else { + Err(anyerr!("Received invalid response")) + } + } + } + + impl ProtocolHandler for Echo { + async fn accept(&self, connection: Connection) -> Result<(), AcceptError> { + let (mut send, mut recv) = connection.accept_bi().await?; + tokio::io::copy(&mut recv, &mut send).await?; + send.finish()?; + connection.closed().await; + Ok(()) + } + } +} + +mod auth { + //! Authentication hook + + use std::{ + collections::{HashMap, HashSet, hash_map}, + sync::{Arc, Mutex}, + }; + + use iroh::{ + Endpoint, EndpointAddr, EndpointId, + endpoint::{AfterHandshakeOutcome, BeforeConnectOutcome, Connection, EndpointHooks}, + protocol::{AcceptError, ProtocolHandler}, + }; + use n0_error::{AnyError, Result, StackResultExt, StdResultExt, anyerr}; + use n0_future::task::AbortOnDropHandle; + use quinn::ConnectionError; + use tokio::{ + sync::{mpsc, oneshot}, + task::JoinSet, + }; + use tracing::debug; + + pub const ALPN: &[u8] = b"iroh-example/auth/0"; + + const CLOSE_ACCEPTED: u32 = 1; + const CLOSE_DENIED: u32 = 403; + + /// Outgoing side: Use this if you want to pre-auth outgoing connections. + pub fn outgoing(token: Vec) -> (OutgoingAuthHook, OutgoingAuthTask) { + let (tx, rx) = mpsc::channel(16); + let hook = OutgoingAuthHook { tx }; + let connector = OutgoingAuthTask { + token, + rx, + allowed_remotes: Default::default(), + pending_remotes: Default::default(), + tasks: JoinSet::new(), + }; + (hook, connector) + } + + type AuthResult = Result<(), Arc>; + + /// Hook to mount on the endpoint builder. + #[derive(Debug)] + pub struct OutgoingAuthHook { + tx: mpsc::Sender<(EndpointId, oneshot::Sender)>, + } + + impl OutgoingAuthHook { + async fn authenticate(&self, remote_id: EndpointId) -> Result<()> { + let (tx, rx) = oneshot::channel(); + self.tx + .send((remote_id, tx)) + .await + .std_context("authenticator stopped")?; + rx.await + .std_context("authenticator stopped")? + .context("failed to authenticate") + } + } + + impl EndpointHooks for OutgoingAuthHook { + async fn before_connect<'a>( + &'a self, + remote_addr: &'a EndpointAddr, + alpn: &'a [u8], + ) -> BeforeConnectOutcome { + // Don't intercept auth request themsevles + if alpn == ALPN { + BeforeConnectOutcome::Accept + } else { + match self.authenticate(remote_addr.id).await { + Ok(()) => BeforeConnectOutcome::Accept, + Err(err) => { + debug!("authentication denied: {err:#}"); + BeforeConnectOutcome::Reject + } + } + } + } + } + + /// Connector task that initiates pre-auth request. Call [`Self::spawn`] once the endpoint is built. + pub struct OutgoingAuthTask { + token: Vec, + rx: mpsc::Receiver<(EndpointId, oneshot::Sender)>, + allowed_remotes: HashSet, + pending_remotes: HashMap>>, + tasks: JoinSet<(EndpointId, Result<()>)>, + } + + impl OutgoingAuthTask { + pub fn spawn(self, endpoint: Endpoint) -> AbortOnDropHandle<()> { + AbortOnDropHandle::new(tokio::spawn(self.run(endpoint))) + } + + async fn run(mut self, endpoint: Endpoint) { + loop { + tokio::select! { + msg = self.rx.recv() => { + let Some((remote_id, tx)) = msg else { + break; + }; + self.handle_msg(&endpoint, remote_id, tx); + } + Some(res) = self.tasks.join_next(), if !self.tasks.is_empty() => { + let (remote_id, res) = res.expect("connect task panicked"); + let res = res.map_err(Arc::new); + self.handle_task(remote_id, res); + } + } + } + } + + fn handle_msg( + &mut self, + endpoint: &Endpoint, + remote_id: EndpointId, + tx: oneshot::Sender>>, + ) { + if self.allowed_remotes.contains(&remote_id) { + tx.send(Ok(())).ok(); + } else { + match self.pending_remotes.entry(remote_id) { + hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().push(tx); + } + hash_map::Entry::Vacant(entry) => { + let endpoint = endpoint.clone(); + let token = self.token.clone(); + self.tasks.spawn(async move { + let res = Self::connect(endpoint, remote_id, token).await; + (remote_id, res) + }); + entry.insert(vec![tx]); + } + } + } + } + + fn handle_task(&mut self, remote_id: EndpointId, res: Result<(), Arc>) { + if res.is_ok() { + self.allowed_remotes.insert(remote_id); + } + let senders = self.pending_remotes.remove(&remote_id); + for tx in senders.into_iter().flatten() { + tx.send(res.clone()).ok(); + } + } + + async fn connect(endpoint: Endpoint, remote_id: EndpointId, token: Vec) -> Result<()> { + let conn = endpoint.connect(remote_id, ALPN).await?; + let mut stream = conn.open_uni().await.anyerr()?; + stream.write_all(&token).await.anyerr()?; + stream.finish().anyerr()?; + let reason = conn.closed().await; + if let ConnectionError::ApplicationClosed(code) = &reason + && code.error_code.into_inner() as u32 == CLOSE_ACCEPTED + { + Ok(()) + } else if let ConnectionError::ApplicationClosed(code) = &reason + && code.error_code.into_inner() as u32 == CLOSE_DENIED + { + Err(anyerr!("authentication denied by remote")) + } else { + Err(AnyError::from_std(reason)) + } + } + } + + /// Incoming side: Use this if you want to only accept connections from peers with successful pre-auth requests. + pub fn incoming(token: Vec) -> (IncomingAuthHook, AuthProtocol) { + let allowed_remotes: Arc>> = Default::default(); + let hook = IncomingAuthHook { + allowed_remotes: allowed_remotes.clone(), + }; + let protocol = AuthProtocol { + allowed_remotes, + token, + }; + (hook, protocol) + } + + /// Accept-side auth hook: Mount this onto the endpoint. + /// + /// This will reject incoming connections if the remote did not successfully authenticate before. + #[derive(Debug)] + pub struct IncomingAuthHook { + allowed_remotes: Arc>>, + } + + impl EndpointHooks for IncomingAuthHook { + async fn after_handshake<'a>( + &'a self, + conn: &'a iroh::endpoint::ConnectionInfo, + ) -> AfterHandshakeOutcome { + if conn.alpn() == ALPN + || self + .allowed_remotes + .lock() + .expect("poisoned") + .contains(&conn.remote_id()) + { + AfterHandshakeOutcome::Accept + } else { + AfterHandshakeOutcome::Reject { + error_code: 403u32.into(), + reason: b"not authenticated".to_vec(), + } + } + } + } + + /// Accept-side auth protocol. Mount this on the router to accept authentication requests. + #[derive(Debug, Clone)] + pub struct AuthProtocol { + token: Vec, + allowed_remotes: Arc>>, + } + + impl ProtocolHandler for AuthProtocol { + /// The `accept` method is called for each incoming connection for our ALPN. + /// + /// The returned future runs on a newly spawned tokio task, so it can run as long as + /// the connection lasts. + async fn accept(&self, connection: Connection) -> Result<(), AcceptError> { + let mut stream = connection.accept_uni().await?; + let token = stream.read_to_end(256).await.anyerr()?; + let remote_id = connection.remote_id(); + if token == self.token { + self.allowed_remotes + .lock() + .expect("poisoned") + .insert(remote_id); + connection.close(CLOSE_ACCEPTED.into(), b"accepted"); + } else { + connection.close(CLOSE_DENIED.into(), b"rejected"); + } + Ok(()) + } + } +} diff --git a/iroh/examples/monitor-connections.rs b/iroh/examples/monitor-connections.rs new file mode 100644 index 00000000000..465593fdac8 --- /dev/null +++ b/iroh/examples/monitor-connections.rs @@ -0,0 +1,137 @@ +use std::{sync::Arc, time::Duration}; + +use iroh::{ + Endpoint, RelayMode, Watcher, + endpoint::{AfterHandshakeOutcome, ConnectionInfo, EndpointHooks}, +}; +use n0_error::{Result, StackResultExt, StdResultExt, ensure_any}; +use n0_future::task::AbortOnDropHandle; +use tokio::{ + sync::mpsc::{UnboundedReceiver, UnboundedSender}, + task::JoinSet, +}; +use tracing::{Instrument, info, info_span}; + +const ALPN: &[u8] = b"iroh/test"; + +#[tokio::main] +async fn main() -> Result { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()), + ) + .init(); + + let monitor = Monitor::new(); + let server = Endpoint::empty_builder(RelayMode::Disabled) + .alpns(vec![ALPN.to_vec()]) + .hooks(monitor.clone()) + .bind() + .instrument(info_span!("server")) + .await?; + let server_addr = server.addr(); + + let count = 2; + + let client_task = tokio::spawn( + async move { + let client = Endpoint::empty_builder(RelayMode::Disabled) + .bind() + .instrument(info_span!("client")) + .await?; + for _i in 0..count { + let conn = client.connect(server_addr.clone(), ALPN).await?; + let mut s = conn.accept_uni().await.anyerr()?; + let data = s.read_to_end(2).await.anyerr()?; + ensure_any!(data == b"hi", "unexpected data"); + conn.close(23u32.into(), b"bye"); + } + client.close().await; + n0_error::Ok(client) + } + .instrument(info_span!("client")), + ); + + let server_task = tokio::spawn( + async move { + for _i in 0..count { + let conn = server + .accept() + .await + .context("server endpoint closed")? + .await?; + let mut s = conn.open_uni().await.anyerr()?; + s.write_all(b"hi").await.anyerr()?; + s.finish().anyerr()?; + conn.closed().await; + } + server.close().await; + n0_error::Ok(()) + } + .instrument(info_span!("server")), + ); + client_task.await.std_context("client")?.context("client")?; + server_task.await.std_context("server")?.context("server")?; + tokio::time::sleep(Duration::from_secs(1)).await; + drop(monitor); + Ok(()) +} + +/// Our connection monitor impl. +/// +/// This here only logs connection open and close events via tracing. +/// It could also maintain a datastructure of all connections, or send the stats to some metrics service. +#[derive(Clone, Debug)] +struct Monitor { + tx: UnboundedSender, + _task: Arc>, +} + +impl EndpointHooks for Monitor { + async fn after_handshake(&self, conn: &ConnectionInfo) -> AfterHandshakeOutcome { + self.tx.send(conn.clone()).ok(); + AfterHandshakeOutcome::Accept + } +} + +impl Monitor { + fn new() -> Self { + let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); + let task = tokio::spawn(Self::run(rx).instrument(info_span!("watcher"))); + Self { + tx, + _task: Arc::new(AbortOnDropHandle::new(task)), + } + } + + async fn run(mut rx: UnboundedReceiver) { + let mut tasks = JoinSet::new(); + loop { + tokio::select! { + Some(conn) = rx.recv() => { + let alpn = String::from_utf8_lossy(conn.alpn()).to_string(); + let remote = conn.remote_id().fmt_short(); + let rtt = conn.paths().peek().iter().map(|p| p.stats().rtt).min(); + info!(%remote, %alpn, ?rtt, "new connection"); + tasks.spawn(async move { + match conn.closed().await { + Some((close_reason, stats)) => { + // We have access to the final stats of the connection! + info!(%remote, %alpn, ?close_reason, udp_rx=stats.udp_rx.bytes, udp_tx=stats.udp_tx.bytes, "connection closed"); + } + None => { + // The connection was closed before we could register our stats-on-close listener. + info!(%remote, %alpn, "connection closed before tracking started"); + } + } + }.instrument(tracing::Span::current())); + } + Some(res) = tasks.join_next(), if !tasks.is_empty() => res.expect("conn close task panicked"), + else => break, + } + while let Some(res) = tasks.join_next().await { + res.expect("conn close task panicked"); + } + } + } +} diff --git a/iroh/examples/remote-info.rs b/iroh/examples/remote-info.rs new file mode 100644 index 00000000000..6b9010cab98 --- /dev/null +++ b/iroh/examples/remote-info.rs @@ -0,0 +1,439 @@ +//! Example for using an iroh hook to collect information about remote endpoints. +//! +//! This implements a [`RemoteMap`] which collects information about all connections and paths from an iroh endpoint. +//! The remote map can be cloned and inspected from other tasks at any time. It contains both data about all +//! currently active connections, and an aggregate status for each remote that remains available even after +//! all connections to the endpoint have been closed. + +use std::time::{Duration, SystemTime}; + +use iroh::{Endpoint, EndpointAddr}; +use n0_error::{Result, StackResultExt, StdResultExt, ensure_any}; +use n0_future::IterExt; +use tracing::{Instrument, info, info_span}; + +use crate::remote_map::RemoteMap; + +const ALPN: &[u8] = b"iroh/test"; + +#[tokio::main(flavor = "multi_thread")] +async fn main() -> Result { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()), + ) + .init(); + + // Create the remote map and hook. + let (hook, remote_map) = RemoteMap::new(); + + // Bind our endpoint and install the remote map hook. + let server = Endpoint::builder() + .alpns(vec![ALPN.to_vec()]) + .hooks(hook) + .bind() + .instrument(info_span!("server")) + .await?; + // Wait for our endpoint to be fully online. + server.online().await; + let server_addr = server.addr(); + + // Spawn a task that creates `count` client endpoints that each connect to our server. + let count = 3; + let client_task = tokio::spawn(run_clients(server_addr, count)); + + // Spawn a task that prints info from the remote map while some connections are active. + // You can use this info to make decisions about remotes. + let _inspect_task = tokio::task::spawn({ + let remote_map = remote_map.clone(); + async move { + // Wait a bit. + tokio::time::sleep(Duration::from_millis(500)).await; + println!("== while connections are active == "); + log_active(&remote_map); + log_aggregate(&remote_map); + println!(); + } + }); + + // Let the server accept `count` connections in parallel. + // The server keeps all connections open for at least 500 milliseconds. + std::iter::repeat_with(async || { + let conn = server + .accept() + .await + .context("server endpoint closed")? + .await?; + info!("accepted"); + let mut s = conn.open_uni().await.anyerr()?; + // wait a bit. + tokio::time::sleep(Duration::from_millis(500)).await; + s.write_all(b"hi").await.anyerr()?; + s.finish().anyerr()?; + conn.closed().await; + info!("closed"); + n0_error::Ok(()) + }) + .take(count) + .enumerate() + .map(|(i, fut)| fut.instrument(info_span!("server-conn", %i))) + .try_join_all() + .await?; + + // Print the remote map again. + println!("== all connections closed =="); + log_active(&remote_map); + log_aggregate(&remote_map); + + server.close().await; + client_task.await.std_context("client")?.context("client")?; + + Ok(()) +} + +/// Uses the current connection info to print info about a remote. +/// +/// Uses the info about *currently active* connections, which return `None` if no connections are active. +fn log_active(remote_map: &RemoteMap) { + println!("current remote state:"); + for (id, info) in remote_map.read().iter() { + println!( + "[{}] is_active {}, connections {}, ip_path {:?}, relay_path {:?}, current_min_rtt {:?}", + id.fmt_short(), + info.is_active(), + info.connections().count(), + info.has_ip_path(), + info.has_relay_path(), + info.current_min_rtt() + ); + } +} + +/// Uses the aggregated info to print info about a remote. +/// +/// The aggregated info is updated for all connection and path changes, and stays at the latest values +/// even if all connections are closed. +fn log_aggregate(remote_map: &RemoteMap) { + println!("aggregate remote state:"); + for (id, info) in remote_map.read().iter() { + let aggregate = info.aggregate(); + println!( + "[{}] min_rtt {:?}, max_rtt {:?}, ip_path {:?}, relay_path {}, last_update {:?} ago", + id.fmt_short(), + aggregate.rtt_min, + aggregate.rtt_max, + aggregate.ip_path, + aggregate.relay_path, + SystemTime::now() + .duration_since(aggregate.last_update) + .unwrap() + ); + } +} + +async fn run_clients(server_addr: EndpointAddr, count: usize) -> Result { + std::iter::repeat_with(async || { + let client = Endpoint::builder() + .bind() + .instrument(info_span!("client")) + .await?; + let conn = client.connect(server_addr.clone(), ALPN).await?; + info!("connected"); + let mut s = conn.accept_uni().await.anyerr()?; + let data = s.read_to_end(2).await.anyerr()?; + ensure_any!(data == b"hi", "unexpected data"); + conn.close(23u32.into(), b"bye"); + info!("closed"); + client.close().await; + n0_error::Ok(()) + }) + .take(count) + .enumerate() + .map(|(i, fut)| fut.instrument(info_span!("client", %i))) + .try_join_all() + .await?; + Ok(()) +} + +mod remote_map { + //! Implementation of a remote map and hook to track information about all remote endpoints to which an iroh endpoint + //! has connections with. + + use std::{ + collections::HashMap, + sync::{Arc, RwLock, RwLockReadGuard}, + time::{Duration, SystemTime}, + }; + + use iroh::{ + EndpointId, Watcher, + endpoint::{AfterHandshakeOutcome, ConnectionInfo, EndpointHooks, PathInfo}, + }; + use n0_future::task::AbortOnDropHandle; + use tokio::{sync::mpsc, task::JoinSet}; + use tokio_stream::StreamExt; + use tracing::{Instrument, debug, info, info_span}; + + /// Information about a remote info. + #[derive(Debug, Default)] + pub struct RemoteInfo { + aggregate: Aggregate, + connections: HashMap, + } + + /// Aggregate information about a remote info. + #[derive(Debug)] + pub struct Aggregate { + /// Minimal RTT observed over all paths to this remote. + pub rtt_min: Duration, + /// Maximal RTT observed over all paths to this remote. + pub rtt_max: Duration, + /// Whether we ever had an IP path to this remote. + pub ip_path: bool, + /// Whether we ever had a relay path to this remote. + pub relay_path: bool, + /// Time this aggregate was last updated. + pub last_update: SystemTime, + } + + impl Default for Aggregate { + fn default() -> Self { + Self { + rtt_min: Duration::MAX, + rtt_max: Duration::ZERO, + ip_path: false, + relay_path: false, + last_update: SystemTime::UNIX_EPOCH, + } + } + } + + impl Aggregate { + fn update(&mut self, path: &PathInfo) { + self.last_update = SystemTime::now(); + if path.is_ip() { + self.ip_path = true; + } + if path.is_relay() { + self.relay_path = true; + } + let stats = path.stats(); + debug!("path update addr {:?} {stats:?}", path.remote_addr()); + self.rtt_min = self.rtt_min.min(stats.rtt); + self.rtt_max = self.rtt_max.max(stats.rtt); + } + } + + impl RemoteInfo { + /// Returns an aggregate of stats for this remote. + /// + /// This includes info from closed connections. + pub fn aggregate(&self) -> &Aggregate { + &self.aggregate + } + + /// Returns the minimal RTT of all currently active paths. + /// + /// Returns `None` if there are no active connections. + pub fn current_min_rtt(&self) -> Option { + self.connections() + .flat_map(|c| c.paths().get()) + .map(|path| path.stats().rtt) + .min() + } + + /// Returns whether any active connection to the remote has an active IP path. + /// + /// Returns `None` if there are no active connections. + pub fn has_ip_path(&self) -> Option { + self.connections() + .flat_map(|c| c.paths().get()) + .filter(|path| path.is_ip()) + .map(|_| true) + .next() + } + + /// Returns whether any active connection to the remote has an active relay path. + /// + /// Returns `None` if there are no active connections. + pub fn has_relay_path(&self) -> Option { + self.connections() + .flat_map(|c| c.paths().get()) + .filter(|path| path.is_relay()) + .map(|_| true) + .next() + } + + /// Returns `true` if there are active connections to this node. + pub fn is_active(&self) -> bool { + !self.connections.is_empty() + } + + /// Returns an iterator over [`ConnectionInfo`] for currently active connections to this remote. + pub fn connections(&self) -> impl Iterator { + self.connections.values() + } + } + + type RemoteMapInner = Arc>>; + + /// Contains information about remote nodes our endpoint has or had connections with. + #[derive(Clone, Debug)] + pub struct RemoteMap { + map: RemoteMapInner, + _task: Arc>, + } + + /// Hook to collect information about remote endpoints from an endpoint. + #[derive(Debug)] + pub struct RemoteMapHook { + tx: mpsc::Sender, + } + + impl EndpointHooks for RemoteMapHook { + async fn after_handshake(&self, conn: &ConnectionInfo) -> AfterHandshakeOutcome { + info!(remote=%conn.remote_id().fmt_short(), "after_handshake"); + self.tx.send(conn.clone()).await.ok(); + AfterHandshakeOutcome::Accept + } + } + + impl RemoteMap { + /// Creates a new [`RemoteMapHook`] and [`RemoteMap`]. + pub fn new() -> (RemoteMapHook, Self) { + Self::with_max_retention(Duration::from_secs(60 * 5)) + } + + /// Creates a new [`RemoteMapHook`] and [`RemoteMap`] and configure the retention time. + /// + /// `retention_time` is the time entries for remote endpoints remain in the map after the last connection has closed. + pub fn with_max_retention(retention_time: Duration) -> (RemoteMapHook, Self) { + let (tx, rx) = mpsc::channel(8); + let map = RemoteMapInner::default(); + let task = tokio::spawn( + Self::run(rx, map.clone(), retention_time) + .instrument(info_span!("remote-map-task")), + ); + let map = Self { + map, + _task: Arc::new(AbortOnDropHandle::new(task)), + }; + let hook = RemoteMapHook { tx }; + (hook, map) + } + + /// Read the current state of the remote map. + /// + /// Returns a [`RwLockReadGuard`] with the actual remote map. Don't hold over await points! + pub fn read(&self) -> RwLockReadGuard<'_, HashMap> { + self.map.read().expect("poisoned") + } + + async fn run( + mut rx: mpsc::Receiver, + map: RemoteMapInner, + retention_time: Duration, + ) { + let mut tasks = JoinSet::new(); + let mut conn_id = 0; + + // Spawn a task to clear expired entries. + let expiry_task = tasks.spawn(Self::clear_expired(retention_time, map.clone())); + + // Main loop + loop { + tokio::select! { + conn = rx.recv() => { + match conn { + Some(conn) => { + conn_id += 1; + Self::on_connection(&mut tasks, map.clone(), conn_id, conn); + }, + None => break, + } + } + Some(res) = tasks.join_next(), if !tasks.is_empty() => { + res.expect("conn close task panicked"); + } + } + } + + // Abort expiry task and join remaining tasks. + expiry_task.abort(); + while let Some(res) = tasks.join_next().await { + if let Err(err) = &res + && !err.is_cancelled() + { + res.expect("conn close task panicked"); + } + } + } + + fn on_connection( + tasks: &mut JoinSet<()>, + map: RemoteMapInner, + conn_id: u64, + conn: ConnectionInfo, + ) { + // Store conn info for full introspection possibility. + { + let mut inner = map.write().expect("poisoned"); + inner + .entry(conn.remote_id()) + .or_default() + .connections + .insert(conn_id, conn.clone()); + } + + // Track connection closing to clear up the map. + tasks.spawn({ + let conn = conn.clone(); + let map = map.clone(); + async move { + conn.closed().await; + { + let mut inner = map.write().expect("poisoned"); + let info = inner.entry(conn.remote_id()).or_default(); + info.connections.remove(&conn_id); + info.aggregate.last_update = SystemTime::now(); + } + } + .instrument(tracing::Span::current()) + }); + + // Track path changes to update stats aggregate. + tasks.spawn({ + async move { + let mut path_updates = conn.paths().stream(); + while let Some(paths) = path_updates.next().await { + { + let mut inner = map.write().expect("poisoned"); + let info = inner.entry(conn.remote_id()).or_default(); + for path in paths { + info.aggregate.update(&path); + } + } + } + } + .instrument(tracing::Span::current()) + }); + } + + async fn clear_expired( + retention_time: Duration, + map: Arc>>, + ) { + let mut interval = tokio::time::interval(retention_time); + loop { + interval.tick().await; + let now = SystemTime::now(); + let mut inner = map.write().expect("poisoned"); + inner.retain(|_remote, info| { + info.is_active() + || now.duration_since(info.aggregate().last_update).unwrap() + < retention_time + }); + } + } + } +} diff --git a/iroh/examples/transfer.rs b/iroh/examples/transfer.rs index 00cb52c9588..3b6c54eb7bf 100644 --- a/iroh/examples/transfer.rs +++ b/iroh/examples/transfer.rs @@ -10,16 +10,16 @@ use data_encoding::HEXLOWER; use indicatif::HumanBytes; use iroh::{ Endpoint, EndpointAddr, EndpointId, RelayMap, RelayMode, RelayUrl, SecretKey, TransportAddr, + Watcher, discovery::{ dns::DnsDiscovery, pkarr::{N0_DNS_PKARR_RELAY_PROD, N0_DNS_PKARR_RELAY_STAGING, PkarrPublisher}, }, dns::{DnsResolver, N0_DNS_ENDPOINT_ORIGIN_PROD, N0_DNS_ENDPOINT_ORIGIN_STAGING}, - endpoint::ConnectionError, + endpoint::{ConnectionError, PathInfoList}, }; use n0_error::{Result, StackResultExt, StdResultExt}; use n0_future::task::AbortOnDropHandle; -use n0_watcher::Watcher as _; use tokio_stream::StreamExt; use tracing::{info, warn}; use url::Url; @@ -242,16 +242,7 @@ impl EndpointArgs { } if self.relay_only { - #[cfg(feature = "test-utils")] - { - builder = builder.path_selection(iroh::endpoint::PathSelection::RelayOnly) - } - #[cfg(not(feature = "test-utils"))] - { - n0_error::bail_any!( - "Must have the `discovery-local-network` enabled when using the `--mdns` flag" - ); - } + builder = builder.clear_ip_transports(); } if let Some(host) = self.dns_server { @@ -280,7 +271,7 @@ impl EndpointArgs { #[cfg(not(feature = "discovery-local-network"))] { n0_error::bail_any!( - "Must have the `test-utils` feature enabled when using the `--relay-only` flag" + "Must have the `discovery-local-network` enabled when using the `--mdns` flag" ); } } @@ -337,7 +328,6 @@ async fn provide(endpoint: Endpoint, size: u64) -> Result<()> { } }; // spawn a task to handle reading and writing off of the connection - let endpoint_clone = endpoint.clone(); tokio::spawn(async move { let conn = accepting.await.anyerr()?; let endpoint_id = conn.remote_id(); @@ -350,7 +340,7 @@ async fn provide(endpoint: Endpoint, size: u64) -> Result<()> { println!("[{remote}] Connected"); // Spawn a background task that prints connection type changes. Will be aborted on drop. - let _guard = watch_conn_type(&endpoint_clone, endpoint_id); + let _guard = watch_conn_type(conn.remote_id(), conn.paths()); // accept a bi-directional QUIC connection // use the `quinn` APIs to send and recv content @@ -404,7 +394,7 @@ async fn fetch(endpoint: Endpoint, remote_addr: EndpointAddr) -> Result<()> { let conn = endpoint.connect(remote_addr, TRANSFER_ALPN).await?; println!("Connected to {}", remote_id); // Spawn a background task that prints connection type changes. Will be aborted on drop. - let _guard = watch_conn_type(&endpoint, remote_id); + let _guard = watch_conn_type(conn.remote_id(), conn.paths()); // Use the Quinn API to send and recv content. let (mut send, mut recv) = conn.open_bi().await.anyerr()?; @@ -521,14 +511,36 @@ fn parse_byte_size(s: &str) -> std::result::Result { cfg.parse_size(s) } -fn watch_conn_type(endpoint: &Endpoint, endpoint_id: EndpointId) -> AbortOnDropHandle<()> { - let mut stream = endpoint.conn_type(endpoint_id).unwrap().stream(); +fn watch_conn_type( + endpoint_id: EndpointId, + paths_watcher: impl Watcher + Send + Unpin + 'static, +) -> AbortOnDropHandle<()> { + let id = endpoint_id.fmt_short(); let task = tokio::task::spawn(async move { - while let Some(conn_type) = stream.next().await { - println!( - "[{}] Connection type changed to: {conn_type}", - endpoint_id.fmt_short() - ); + let mut stream = paths_watcher.stream(); + let mut previous = None; + while let Some(paths) = stream.next().await { + if let Some(path) = paths.iter().find(|p| p.is_selected()) { + // We can get path updates without the selected path changing. We don't want to log again in that case. + if Some(path) == previous.as_ref() { + continue; + } + println!( + "[{id}] Connection type changed to: {:?} (RTT: {:?})", + path.remote_addr(), + path.rtt() + ); + previous = Some(path.clone()); + } else if !paths.is_empty() { + println!( + "[{id}] Connection type changed to: mixed ({} paths)", + paths.len() + ); + previous = None; + } else { + println!("[{id}] Connection type changed to none (no active transmission paths)",); + previous = None; + } } }); AbortOnDropHandle::new(task) diff --git a/iroh/src/disco.rs b/iroh/src/disco.rs deleted file mode 100644 index 0e648b28db5..00000000000 --- a/iroh/src/disco.rs +++ /dev/null @@ -1,624 +0,0 @@ -//! Contains the discovery message types. -//! -//! A discovery message is: -//! -//! Header: -//! -//! ```ignore -//! magic: [u8; 6] // “TS💬” (0x54 53 f0 9f 92 ac) -//! sender_disco_pub: [u8; 32] // nacl public key -//! nonce: [u8; 24] -//! ```` -//! The recipient then decrypts the bytes following (the nacl secretbox) -//! and then the inner payload structure is: -//! -//! ```ignore -//! message_type: u8 // (the MessageType constants below) -//! message_version: u8 // (0 for now; but always ignore bytes at the end) -//! message_payload: &[u8] -//! ``` - -use std::{ - fmt::{self, Display}, - net::{IpAddr, SocketAddr}, -}; - -use data_encoding::HEXLOWER; -use iroh_base::{PublicKey, RelayUrl}; -use n0_error::{e, ensure, stack_error}; -use rand::Rng; -use serde::{Deserialize, Serialize}; -use url::Url; - -use crate::magicsock::transports; - -// TODO: custom magicn -/// The 6 byte header of all discovery messages. -pub const MAGIC: &str = "TS💬"; // 6 bytes: 0x54 53 f0 9f 92 ac -pub const MAGIC_LEN: usize = MAGIC.len(); - -/// Current Version. -const V0: u8 = 0; - -pub(crate) const KEY_LEN: usize = 32; -const TX_LEN: usize = 12; - -// Sizes for the inner message structure. - -/// Header: Type | Version -const HEADER_LEN: usize = 2; - -const PING_LEN: usize = TX_LEN + iroh_base::PublicKey::LENGTH; -const EP_LENGTH: usize = 16 + 2; // 16 byte IP address + 2 byte port - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -#[repr(u8)] -pub enum MessageType { - Ping = 0x01, - Pong = 0x02, - CallMeMaybe = 0x03, -} - -impl TryFrom for MessageType { - type Error = u8; - - fn try_from(value: u8) -> std::result::Result { - match value { - 0x01 => Ok(MessageType::Ping), - 0x02 => Ok(MessageType::Pong), - 0x03 => Ok(MessageType::CallMeMaybe), - _ => Err(value), - } - } -} - -const MESSAGE_HEADER_LEN: usize = MAGIC_LEN + KEY_LEN; - -pub fn encode_message(sender: &PublicKey, seal: Vec) -> Vec { - let mut out = Vec::with_capacity(MESSAGE_HEADER_LEN); - out.extend_from_slice(MAGIC.as_bytes()); - out.extend_from_slice(sender.as_bytes()); - out.extend(seal); - - out -} - -/// Reports whether p looks like it's a packet containing an encrypted disco message. -pub fn looks_like_disco_wrapper(p: &[u8]) -> bool { - if p.len() < MESSAGE_HEADER_LEN { - return false; - } - - &p[..MAGIC_LEN] == MAGIC.as_bytes() -} - -/// If `p` looks like a disco message it returns the slice of `p` that represents the disco public key source, -/// and the part that is the box. -pub fn source_and_box(p: &[u8]) -> Option<(PublicKey, &[u8])> { - if !looks_like_disco_wrapper(p) { - return None; - } - - let source = &p[MAGIC_LEN..MAGIC_LEN + KEY_LEN]; - let sender = PublicKey::try_from(source).ok()?; - let sealed_box = &p[MAGIC_LEN + KEY_LEN..]; - Some((sender, sealed_box)) -} - -/// A discovery message. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Message { - Ping(Ping), - Pong(Pong), - CallMeMaybe(CallMeMaybe), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Ping { - /// Random client-generated per-ping transaction ID. - pub tx_id: TransactionId, - - /// Allegedly the ping sender's wireguard public key. - /// It shouldn't be trusted by itself, but can be combined with - /// netmap data to reduce the discokey:endpointkey relation from 1:N to 1:1. - pub endpoint_key: PublicKey, -} - -/// A response a Ping. -/// -/// It includes the sender's source IP + port, so it's effectively a STUN response. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Pong { - pub tx_id: TransactionId, - /// The observed address off the ping sender. - /// - /// 18 bytes (16+2) on the wire; v4-mapped ipv6 for IPv4. - pub ping_observed_addr: SendAddr, -} - -/// Addresses to which we can send. This is either a UDP or a relay address. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub enum SendAddr { - /// UDP, the ip addr. - Udp(SocketAddr), - /// Relay Url. - Relay(RelayUrl), -} - -impl SendAddr { - /// Returns if this is a `relay` addr. - pub fn is_relay(&self) -> bool { - matches!(self, Self::Relay(_)) - } - - /// Returns the `Some(Url)` if it is a relay addr. - pub fn relay_url(&self) -> Option { - match self { - Self::Relay(url) => Some(url.clone()), - Self::Udp(_) => None, - } - } -} - -impl From for SendAddr { - fn from(addr: transports::Addr) -> Self { - match addr { - transports::Addr::Ip(addr) => SendAddr::Udp(addr), - transports::Addr::Relay(url, _) => SendAddr::Relay(url), - } - } -} - -impl From for SendAddr { - fn from(source: SocketAddr) -> Self { - SendAddr::Udp(source) - } -} - -impl From for SendAddr { - fn from(source: RelayUrl) -> Self { - SendAddr::Relay(source) - } -} - -impl PartialEq for SendAddr { - fn eq(&self, other: &SocketAddr) -> bool { - match self { - Self::Relay(_) => false, - Self::Udp(addr) => addr.eq(other), - } - } -} - -impl Display for SendAddr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SendAddr::Relay(id) => write!(f, "Relay({id})"), - SendAddr::Udp(addr) => write!(f, "UDP({addr})"), - } - } -} - -/// Message sent only over the relay to request that the recipient try -/// to open up a magicsock path back to the sender. -/// -/// The sender should've already sent UDP packets to the peer to open -/// up the stateful firewall mappings inbound. -/// -/// The recipient may choose to not open a path back, if it's already happy with its path. -/// But usually it will. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CallMeMaybe { - /// What the peer believes its endpoints are. - pub my_numbers: Vec, -} - -impl Ping { - fn from_bytes(p: &[u8]) -> Result { - // Deliberately lax on longer-than-expected messages, for future compatibility. - ensure!(p.len() >= PING_LEN, ParseError::TooShort); - let tx_id: [u8; TX_LEN] = p[..TX_LEN].try_into().expect("length checked"); - let raw_key = &p[TX_LEN..TX_LEN + iroh_base::PublicKey::LENGTH]; - let endpoint_key = - PublicKey::try_from(raw_key).map_err(|_| e!(ParseError::InvalidEncoding))?; - let tx_id = TransactionId::from(tx_id); - - Ok(Ping { - tx_id, - endpoint_key, - }) - } - - fn as_bytes(&self) -> Vec { - let header = msg_header(MessageType::Ping, V0); - let mut out = vec![0u8; PING_LEN + HEADER_LEN]; - - out[..HEADER_LEN].copy_from_slice(&header); - out[HEADER_LEN..HEADER_LEN + TX_LEN].copy_from_slice(&self.tx_id); - out[HEADER_LEN + TX_LEN..].copy_from_slice(self.endpoint_key.as_ref()); - - out - } -} - -#[allow(missing_docs)] -#[stack_error(derive, add_meta)] -#[non_exhaustive] -pub enum ParseError { - #[error("message is too short")] - TooShort, - #[error("invalid encoding")] - InvalidEncoding, - #[error("unknown format")] - UnknownFormat, -} - -fn send_addr_from_bytes(p: &[u8]) -> Result { - ensure!(p.len() > 2, ParseError::TooShort); - match p[0] { - 0u8 => { - let bytes: [u8; EP_LENGTH] = p[1..].try_into().map_err(|_| e!(ParseError::TooShort))?; - let addr = socket_addr_from_bytes(bytes); - Ok(SendAddr::Udp(addr)) - } - 1u8 => { - let s = std::str::from_utf8(&p[1..]).map_err(|_| e!(ParseError::InvalidEncoding))?; - let u: Url = s.parse().map_err(|_| e!(ParseError::InvalidEncoding))?; - Ok(SendAddr::Relay(u.into())) - } - _ => Err(e!(ParseError::UnknownFormat)), - } -} - -fn send_addr_to_vec(addr: &SendAddr) -> Vec { - match addr { - SendAddr::Relay(url) => { - let mut out = vec![1u8]; - out.extend_from_slice(url.to_string().as_bytes()); - out - } - SendAddr::Udp(ip) => { - let mut out = vec![0u8]; - out.extend_from_slice(&socket_addr_as_bytes(ip)); - out - } - } -} - -// Assumes p.len() == EP_LENGTH -fn socket_addr_from_bytes(p: [u8; EP_LENGTH]) -> SocketAddr { - debug_assert_eq!(EP_LENGTH, 16 + 2); - - let raw_src_ip: [u8; 16] = p[..16].try_into().expect("array long enough"); - let raw_port: [u8; 2] = p[16..].try_into().expect("array long enough"); - - let src_ip = IpAddr::from(raw_src_ip).to_canonical(); - let src_port = u16::from_le_bytes(raw_port); - - SocketAddr::new(src_ip, src_port) -} - -fn socket_addr_as_bytes(addr: &SocketAddr) -> [u8; EP_LENGTH] { - let mut out = [0u8; EP_LENGTH]; - let ipv6 = match addr.ip() { - IpAddr::V4(v4) => v4.to_ipv6_mapped(), - IpAddr::V6(v6) => v6, - }; - out[..16].copy_from_slice(&ipv6.octets()); - out[16..].copy_from_slice(&addr.port().to_le_bytes()); - - out -} - -impl Pong { - fn from_bytes(p: &[u8]) -> Result { - let tx_id: [u8; TX_LEN] = p[..TX_LEN] - .try_into() - .map_err(|_| e!(ParseError::TooShort))?; - - let tx_id = TransactionId::from(tx_id); - let src = send_addr_from_bytes(&p[TX_LEN..])?; - - Ok(Pong { - tx_id, - ping_observed_addr: src, - }) - } - - fn as_bytes(&self) -> Vec { - let header = msg_header(MessageType::Pong, V0); - let mut out = header.to_vec(); - out.extend_from_slice(&self.tx_id); - - let src_bytes = send_addr_to_vec(&self.ping_observed_addr); - out.extend(src_bytes); - out - } -} - -impl CallMeMaybe { - fn from_bytes(p: &[u8]) -> Result { - ensure!(p.len() % EP_LENGTH == 0, ParseError::InvalidEncoding); - - let num_entries = p.len() / EP_LENGTH; - let mut m = CallMeMaybe { - my_numbers: Vec::with_capacity(num_entries), - }; - - for chunk in p.chunks_exact(EP_LENGTH) { - let bytes: [u8; EP_LENGTH] = chunk - .try_into() - .map_err(|_| e!(ParseError::InvalidEncoding))?; - let src = socket_addr_from_bytes(bytes); - m.my_numbers.push(src); - } - - Ok(m) - } - - fn as_bytes(&self) -> Vec { - let header = msg_header(MessageType::CallMeMaybe, V0); - let mut out = vec![0u8; HEADER_LEN + self.my_numbers.len() * EP_LENGTH]; - out[..HEADER_LEN].copy_from_slice(&header); - - for (m, chunk) in self - .my_numbers - .iter() - .zip(out[HEADER_LEN..].chunks_exact_mut(EP_LENGTH)) - { - let raw = socket_addr_as_bytes(m); - chunk.copy_from_slice(&raw); - } - - out - } -} - -impl Message { - /// Parses the encrypted part of the message from inside the nacl secretbox. - pub fn from_bytes(p: &[u8]) -> Result { - ensure!(p.len() >= 2, ParseError::TooShort); - - let t = MessageType::try_from(p[0]).map_err(|_| e!(ParseError::UnknownFormat))?; - let version = p[1]; - ensure!(version == V0, ParseError::UnknownFormat); - - let p = &p[2..]; - match t { - MessageType::Ping => { - let ping = Ping::from_bytes(p)?; - Ok(Message::Ping(ping)) - } - MessageType::Pong => { - let pong = Pong::from_bytes(p)?; - Ok(Message::Pong(pong)) - } - MessageType::CallMeMaybe => { - let cm = CallMeMaybe::from_bytes(p)?; - Ok(Message::CallMeMaybe(cm)) - } - } - } - - /// Serialize this message to bytes. - pub fn as_bytes(&self) -> Vec { - match self { - Message::Ping(ping) => ping.as_bytes(), - Message::Pong(pong) => pong.as_bytes(), - Message::CallMeMaybe(cm) => cm.as_bytes(), - } - } -} - -impl Display for Message { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Message::Ping(ping) => { - write!(f, "Ping(tx={})", HEXLOWER.encode(&ping.tx_id)) - } - Message::Pong(pong) => { - write!(f, "Pong(tx={})", HEXLOWER.encode(&pong.tx_id)) - } - Message::CallMeMaybe(_) => { - write!(f, "CallMeMaybe") - } - } - } -} - -const fn msg_header(t: MessageType, ver: u8) -> [u8; HEADER_LEN] { - [t as u8, ver] -} - -const TRANSACTION_ID_SIZE: usize = 12; - -/// The transaction ID is a 96-bit identifier -/// -/// It is used to uniquely identify STUN transactions. -/// It primarily serves to correlate requests with responses, -/// though it also plays a small role in helping to prevent -/// certain types of attacks. The server also uses the transaction ID as -/// a key to identify each transaction uniquely across all clients. -#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct TransactionId([u8; TRANSACTION_ID_SIZE]); - -impl fmt::Debug for TransactionId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "TransactionId(0x")?; - fmt_transcation_id(self.as_ref(), f) - } -} - -impl fmt::Display for TransactionId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "transaction id (0x")?; - fmt_transcation_id(self.as_ref(), f) - } -} - -fn fmt_transcation_id(bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Result { - for byte in bytes { - write!(f, "{:02X}", byte)?; - } - write!(f, ")") -} - -impl std::ops::Deref for TransactionId { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - &self.0 - } -} - -impl AsRef<[u8]> for TransactionId { - fn as_ref(&self) -> &[u8] { - &self.0[..] - } -} - -impl From<&[u8; TRANSACTION_ID_SIZE]> for TransactionId { - fn from(buff: &[u8; TRANSACTION_ID_SIZE]) -> Self { - Self(*buff) - } -} - -impl From<[u8; TRANSACTION_ID_SIZE]> for TransactionId { - fn from(buff: [u8; TRANSACTION_ID_SIZE]) -> Self { - Self(buff) - } -} - -impl rand::distr::Distribution for rand::distr::StandardUniform { - fn sample(&self, rng: &mut R) -> TransactionId { - let mut buffer = [0u8; TRANSACTION_ID_SIZE]; - rng.fill_bytes(&mut buffer); - TransactionId::from(buffer) - } -} - -impl Default for TransactionId { - /// Creates a cryptographically random transaction ID chosen from the interval 0 .. 2**96-1. - fn default() -> Self { - let mut rng = rand::rng(); - rng.random() - } -} - -#[cfg(test)] -mod tests { - use iroh_base::SecretKey; - use rand::SeedableRng; - - use super::*; - use crate::key::{SharedSecret, public_ed_box, secret_ed_box}; - - #[test] - fn test_to_from_bytes() { - struct Test { - name: &'static str, - m: Message, - want: &'static str, - } - let tests = [ - Test { - name: "ping_with_endpointkey_src", - m: Message::Ping(Ping { - tx_id: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12].into(), - endpoint_key: PublicKey::try_from( - &[ - 190, 243, 65, 104, 37, 102, 175, 75, 243, 22, 69, 200, 167, 107, 24, - 63, 216, 140, 120, 43, 4, 112, 16, 62, 117, 155, 45, 215, 72, 175, 40, - 189, - ][..], - ) - .unwrap(), - }), - want: "01 00 01 02 03 04 05 06 07 08 09 0a 0b 0c be f3 41 68 25 66 af 4b f3 16 45 c8 a7 6b 18 3f d8 8c 78 2b 04 70 10 3e 75 9b 2d d7 48 af 28 bd", - }, - Test { - name: "pong", - m: Message::Pong(Pong { - tx_id: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12].into(), - ping_observed_addr: SendAddr::Udp("2.3.4.5:1234".parse().unwrap()), - }), - want: "02 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 00 00 00 00 00 00 00 00 00 00 ff ff 02 03 04 05 d2 04", - }, - Test { - name: "pongv6", - m: Message::Pong(Pong { - tx_id: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12].into(), - ping_observed_addr: SendAddr::Udp("[fed0::12]:6666".parse().unwrap()), - }), - want: "02 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 fe d0 00 00 00 00 00 00 00 00 00 00 00 00 00 12 0a 1a", - }, - Test { - name: "call_me_maybe", - m: Message::CallMeMaybe(CallMeMaybe { - my_numbers: Vec::new(), - }), - want: "03 00", - }, - Test { - name: "call_me_maybe_endpoints", - m: Message::CallMeMaybe(CallMeMaybe { - my_numbers: vec![ - "1.2.3.4:567".parse().unwrap(), - "[2001::3456]:789".parse().unwrap(), - ], - }), - want: "03 00 00 00 00 00 00 00 00 00 00 00 ff ff 01 02 03 04 37 02 20 01 00 00 00 00 00 00 00 00 00 00 00 00 34 56 15 03", - }, - ]; - for test in tests { - println!("{}", test.name); - - let got = test.m.as_bytes(); - assert_eq!( - got, - data_encoding::HEXLOWER - .decode(test.want.replace(' ', "").as_bytes()) - .unwrap(), - "wrong as_bytes" - ); - - let back = Message::from_bytes(&got).expect("failed to parse"); - assert_eq!(test.m, back, "wrong from_bytes"); - } - } - - #[test] - fn test_extraction() { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let sender_key = SecretKey::generate(&mut rng); - let recv_key = SecretKey::generate(&mut rng); - - let msg = Message::Ping(Ping { - tx_id: TransactionId::default(), - endpoint_key: sender_key.public(), - }); - - let sender_secret = secret_ed_box(&sender_key); - let shared = SharedSecret::new(&sender_secret, &public_ed_box(&recv_key.public())); - let mut seal = msg.as_bytes(); - shared.seal(&mut seal); - - let bytes = encode_message(&sender_key.public(), seal.clone()); - - assert!(looks_like_disco_wrapper(&bytes)); - assert_eq!(source_and_box(&bytes).unwrap().0, sender_key.public()); - - let (raw_key, seal_back) = source_and_box(&bytes).unwrap(); - assert_eq!(raw_key, sender_key.public()); - assert_eq!(seal_back, seal); - - let recv_secret = secret_ed_box(&recv_key); - let shared_recv = SharedSecret::new(&recv_secret, &public_ed_box(&sender_key.public())); - let mut open_seal = seal_back.to_vec(); - shared_recv - .open(&mut open_seal) - .expect("failed to open seal_back"); - let msg_back = Message::from_bytes(&open_seal).unwrap(); - assert_eq!(msg_back, msg); - } -} diff --git a/iroh/src/discovery.rs b/iroh/src/discovery.rs index f99dc771fb0..aa6902e70ac 100644 --- a/iroh/src/discovery.rs +++ b/iroh/src/discovery.rs @@ -113,22 +113,14 @@ use std::sync::{Arc, RwLock}; use iroh_base::{EndpointAddr, EndpointId}; -use n0_error::{AnyError, e, ensure, stack_error}; -use n0_future::{ - boxed::BoxStream, - stream::StreamExt, - task::{self, AbortOnDropHandle}, - time::{self, Duration}, -}; -use tokio::sync::oneshot; -use tracing::{Instrument, debug, error_span, warn}; +use n0_error::{AnyError, e, stack_error}; +use n0_future::boxed::BoxStream; use crate::Endpoint; pub use crate::endpoint_info::{EndpointData, EndpointInfo, ParseError, UserData}; #[cfg(not(wasm_browser))] pub mod dns; - #[cfg(feature = "discovery-local-network")] pub mod mdns; pub mod pkarr; @@ -219,15 +211,16 @@ impl IntoDiscoveryError { #[allow(missing_docs)] #[stack_error(derive, add_meta)] #[non_exhaustive] +#[derive(Clone)] pub enum DiscoveryError { #[error("No discovery service configured")] NoServiceConfigured, - #[error("Discovery produced no results for {}", endpoint_id.fmt_short())] - NoResults { endpoint_id: EndpointId }, + #[error("Discovery produced no results")] + NoResults, #[error("Service '{provenance}' error")] User { provenance: &'static str, - source: AnyError, + source: Arc, }, } @@ -238,10 +231,7 @@ impl DiscoveryError { provenance: &'static str, source: T, ) -> Self { - e!(DiscoveryError::User { - provenance, - source: AnyError::from_std(source) - }) + Self::from_err_any(provenance, AnyError::from_std(source)) } /// Creates a new user error from an arbitrary boxed error type. @@ -250,10 +240,7 @@ impl DiscoveryError { provenance: &'static str, source: Box, ) -> Self { - e!(DiscoveryError::User { - provenance, - source: AnyError::from_std_box(source) - }) + Self::from_err_any(provenance, AnyError::from_std_box(source)) } /// Creates a new user error from an arbitrary error type that can be converted into [`AnyError`]. @@ -261,7 +248,7 @@ impl DiscoveryError { pub fn from_err_any(provenance: &'static str, source: impl Into) -> Self { e!(DiscoveryError::User { provenance, - source: source.into() + source: Arc::new(source.into()) }) } } @@ -503,163 +490,18 @@ impl Discovery for ConcurrentDiscovery { } } -/// Maximum duration since the last control or data message received from an endpoint to make us -/// start a discovery task. -const MAX_AGE: Duration = Duration::from_secs(10); - -/// A wrapper around a tokio task which runs an endpoint discovery. -pub(super) struct DiscoveryTask { - on_first_rx: oneshot::Receiver>, - _task: AbortOnDropHandle<()>, -} - -impl DiscoveryTask { - /// Starts a discovery task. - pub(super) fn start(ep: Endpoint, endpoint_id: EndpointId) -> Result { - ensure!( - !ep.discovery().is_empty(), - DiscoveryError::NoServiceConfigured - ); - let (on_first_tx, on_first_rx) = oneshot::channel(); - let me = ep.id(); - let task = task::spawn( - async move { Self::run(ep, endpoint_id, on_first_tx).await }.instrument( - error_span!("discovery", me = %me.fmt_short(), endpoint = %endpoint_id.fmt_short()), - ), - ); - Ok(Self { - _task: AbortOnDropHandle::new(task), - on_first_rx, - }) - } - - /// Starts a discovery task after a delay and only if no path to the endpoint was recently active. - /// - /// This returns `None` if we received data or control messages from the remote endpoint - /// recently enough. If not it returns a [`DiscoveryTask`]. - /// - /// If `delay` is set, the [`DiscoveryTask`] will first wait for `delay` and then check again - /// if we recently received messages from remote endpoint. If true, the task will abort. - /// Otherwise, or if no `delay` is set, the discovery will be started. - pub(super) fn maybe_start_after_delay( - ep: &Endpoint, - endpoint_id: EndpointId, - delay: Option, - ) -> Result, DiscoveryError> { - // If discovery is not needed, don't even spawn a task. - if !ep.needs_discovery(endpoint_id, MAX_AGE) { - return Ok(None); - } - ensure!( - !ep.discovery().is_empty(), - DiscoveryError::NoServiceConfigured - ); - let (on_first_tx, on_first_rx) = oneshot::channel(); - let ep = ep.clone(); - let me = ep.id(); - let task = task::spawn( - async move { - // If delay is set, wait and recheck if discovery is needed. If not, early-exit. - if let Some(delay) = delay { - time::sleep(delay).await; - if !ep.needs_discovery(endpoint_id, MAX_AGE) { - debug!("no discovery needed, abort"); - on_first_tx.send(Ok(())).ok(); - return; - } - } - Self::run(ep, endpoint_id, on_first_tx).await - } - .instrument( - error_span!("discovery", me = %me.fmt_short(), endpoint = %endpoint_id.fmt_short()), - ), - ); - Ok(Some(Self { - _task: AbortOnDropHandle::new(task), - on_first_rx, - })) - } - - /// Waits until the discovery task produced at least one result. - pub(super) async fn first_arrived(&mut self) -> Result<(), DiscoveryError> { - let fut = &mut self.on_first_rx; - fut.await.expect("sender dropped")?; - Ok(()) - } - - fn create_stream( - ep: &Endpoint, - endpoint_id: EndpointId, - ) -> Result>, DiscoveryError> { - ensure!( - !ep.discovery().is_empty(), - DiscoveryError::NoServiceConfigured - ); - let stream = ep - .discovery() - .resolve(endpoint_id) - .ok_or_else(|| e!(DiscoveryError::NoResults { endpoint_id }))?; - Ok(stream) - } - - async fn run( - ep: Endpoint, - endpoint_id: EndpointId, - on_first_tx: oneshot::Sender>, - ) { - let mut stream = match Self::create_stream(&ep, endpoint_id) { - Ok(stream) => stream, - Err(err) => { - on_first_tx.send(Err(err)).ok(); - return; - } - }; - let mut on_first_tx = Some(on_first_tx); - debug!("starting"); - loop { - match stream.next().await { - Some(Ok(r)) => { - let provenance = r.provenance; - let endpoint_addr = r.to_endpoint_addr(); - if endpoint_addr.is_empty() { - debug!(%provenance, "empty address found"); - continue; - } - debug!(%provenance, addr = ?endpoint_addr, "new address found"); - let source = crate::magicsock::Source::Discovery { - name: provenance.to_string(), - }; - ep.add_endpoint_addr(endpoint_addr, source).ok(); - - if let Some(tx) = on_first_tx.take() { - tx.send(Ok(())).ok(); - } - } - Some(Err(err)) => { - warn!(?err, "discovery service produced error"); - break; - } - None => break, - } - } - if let Some(tx) = on_first_tx.take() { - tx.send(Err(e!(DiscoveryError::NoResults { endpoint_id }))) - .ok(); - } - } -} - #[cfg(test)] mod tests { use std::{ collections::HashMap, net::SocketAddr, sync::{Arc, Mutex}, - time::SystemTime, + time::{Duration, SystemTime}, }; use iroh_base::{EndpointAddr, SecretKey, TransportAddr}; use n0_error::{AnyError as Error, Result, StackResultExt}; + use n0_future::{StreamExt, time}; use quinn::{IdleTimeout, TransportConfig}; use rand::{CryptoRng, Rng, SeedableRng}; use tokio_util::task::AbortOnDropHandle; diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 02561829016..48e0edb45f6 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -24,36 +24,42 @@ use n0_watcher::Watcher; use tracing::{debug, instrument, trace, warn}; use url::Url; +use self::hooks::EndpointHooksList; +pub use super::magicsock::{ + DirectAddr, DirectAddrType, PathInfo, + remote_map::{PathInfoList, Source}, +}; #[cfg(wasm_browser)] use crate::discovery::pkarr::PkarrResolver; #[cfg(not(wasm_browser))] use crate::dns::DnsResolver; use crate::{ - discovery::{ - ConcurrentDiscovery, DiscoveryError, DiscoveryTask, DynIntoDiscovery, IntoDiscovery, - UserData, - }, + discovery::{ConcurrentDiscovery, DiscoveryError, DynIntoDiscovery, IntoDiscovery, UserData}, endpoint::presets::Preset, - magicsock::{self, EndpointIdMappedAddr, Handle}, + magicsock::{ + self, HEARTBEAT_INTERVAL, Handle, MAX_MULTIPATH_PATHS, PATH_MAX_IDLE_TIMEOUT, + RemoteStateActorStoppedError, mapped_addrs::MappedAddr, + }, metrics::EndpointMetrics, net_report::Report, tls::{self, DEFAULT_MAX_TLS_TICKETS}, }; mod connection; +pub(crate) mod hooks; pub mod presets; -mod rtt_actor; +pub use hooks::{AfterHandshakeOutcome, BeforeConnectOutcome, EndpointHooks}; // Missing still: SendDatagram and ConnectionClose::frame_type's Type. pub use quinn::{ AcceptBi, AcceptUni, AckFrequencyConfig, ApplicationClose, Chunk, ClosedStream, ConnectionClose, ConnectionError, ConnectionStats, MtuDiscoveryConfig, OpenBi, OpenUni, - ReadDatagram, ReadError, ReadExactError, ReadToEndError, RecvStream, ResetError, RetryError, - SendDatagramError, SendStream, ServerConfig, StoppedError, StreamId, TransportConfig, VarInt, - WeakConnectionHandle, WriteError, + PathStats, ReadDatagram, ReadError, ReadExactError, ReadToEndError, RecvStream, ResetError, + RetryError, SendDatagramError, SendStream, ServerConfig, StoppedError, StreamId, + TransportConfig as QuinnTransportConfig, VarInt, WeakConnectionHandle, WriteError, }; pub use quinn_proto::{ - FrameStats, PathStats, TransportError, TransportErrorCode, UdpStats, Written, + FrameStats, TransportError, TransportErrorCode, UdpStats, Written, congestion::{Controller, ControllerFactory}, crypto::{ AeadKey, CryptoError, ExportKeyingMaterialError, HandshakeTokenKey, @@ -63,32 +69,11 @@ pub use quinn_proto::{ pub use self::connection::{ Accept, Accepting, AlpnError, AuthenticationError, Connecting, ConnectingError, Connection, - ConnectionState, HandshakeCompleted, Incoming, IncomingZeroRtt, IncomingZeroRttConnection, - OutgoingZeroRtt, OutgoingZeroRttConnection, RemoteEndpointIdError, ZeroRttStatus, + ConnectionInfo, ConnectionState, HandshakeCompleted, Incoming, IncomingZeroRtt, + IncomingZeroRttConnection, OutgoingZeroRtt, OutgoingZeroRttConnection, RemoteEndpointIdError, + ZeroRttStatus, }; -pub use super::magicsock::{ - AddEndpointAddrError, ConnectionType, ControlMsg, DirectAddr, DirectAddrInfo, DirectAddrType, - Source, -}; - -/// The delay to fall back to discovery when direct addresses fail. -/// -/// When a connection is attempted with an [`EndpointAddr`] containing direct addresses the -/// [`Endpoint`] assumes one of those addresses probably works. If after this delay there -/// is still no connection the configured [`crate::discovery::Discovery`] will be used however. -const DISCOVERY_WAIT_PERIOD: Duration = Duration::from_millis(500); - -/// Defines the mode of path selection for all traffic flowing through -/// the endpoint. -#[cfg(any(test, feature = "test-utils"))] -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] -pub enum PathSelection { - /// Uses all available paths - #[default] - All, - /// Forces all traffic to go exclusively through relays - RelayOnly, -} +pub use crate::magicsock::transports::TransportConfig; /// Builder for [`Endpoint`]. /// @@ -99,7 +84,6 @@ pub enum PathSelection { #[derive(Debug)] pub struct Builder { secret_key: Option, - relay_mode: RelayMode, alpn_protocols: Vec>, transport_config: quinn::TransportConfig, keylog: bool, @@ -110,11 +94,24 @@ pub struct Builder { dns_resolver: Option, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: bool, - addr_v4: Option, - addr_v6: Option, - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection, + transports: Vec, max_tls_tickets: usize, + hooks: EndpointHooksList, +} + +impl From for Option { + fn from(mode: RelayMode) -> Self { + match mode { + RelayMode::Disabled => None, + RelayMode::Default => Some(TransportConfig::Relay { + relay_map: mode.relay_map(), + }), + RelayMode::Staging => Some(TransportConfig::Relay { + relay_map: mode.relay_map(), + }), + RelayMode::Custom(relay_map) => Some(TransportConfig::Relay { relay_map }), + } + } } impl Builder { @@ -138,11 +135,20 @@ impl Builder { pub fn empty(relay_mode: RelayMode) -> Self { let mut transport_config = quinn::TransportConfig::default(); transport_config.keep_alive_interval(Some(Duration::from_secs(1))); + + let mut transports = vec![ + #[cfg(not(wasm_browser))] + TransportConfig::default_ipv4(), + #[cfg(not(wasm_browser))] + TransportConfig::default_ipv6(), + ]; + if let Some(relay) = relay_mode.into() { + transports.push(relay); + } Self { secret_key: Default::default(), - relay_mode, alpn_protocols: Default::default(), - transport_config, + transport_config: quinn::TransportConfig::default(), keylog: Default::default(), discovery: Default::default(), discovery_user_data: Default::default(), @@ -151,23 +157,33 @@ impl Builder { dns_resolver: None, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: false, - addr_v4: None, - addr_v6: None, - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection::default(), max_tls_tickets: DEFAULT_MAX_TLS_TICKETS, + transports, + hooks: Default::default(), } } // # The final constructor that everyone needs. /// Binds the magic endpoint. - pub async fn bind(self) -> Result { + pub async fn bind(mut self) -> Result { let mut rng = rand::rng(); - let relay_map = self.relay_mode.relay_map(); let secret_key = self .secret_key .unwrap_or_else(move || SecretKey::generate(&mut rng)); + + // Override some transport config settings. + self.transport_config + .keep_alive_interval(Some(HEARTBEAT_INTERVAL)); + self.transport_config + .default_path_keep_alive_interval(Some(HEARTBEAT_INTERVAL)); + self.transport_config + .default_path_max_idle_timeout(Some(PATH_MAX_IDLE_TIMEOUT)); + self.transport_config + .max_concurrent_multipath_paths(MAX_MULTIPATH_PATHS + 1); + self.transport_config + .set_max_remote_nat_traversal_addresses(MAX_MULTIPATH_PATHS as u8); + let static_config = StaticConfig { transport_config: Arc::new(self.transport_config), tls_config: tls::TlsConfig::new(secret_key.clone(), self.max_tls_tickets), @@ -181,10 +197,8 @@ impl Builder { let metrics = EndpointMetrics::default(); let msock_opts = magicsock::Options { - addr_v4: self.addr_v4, - addr_v6: self.addr_v6, + transports: self.transports, secret_key, - relay_map, discovery_user_data: self.discovery_user_data, proxy_url: self.proxy_url, #[cfg(not(wasm_browser))] @@ -192,19 +206,16 @@ impl Builder { server_config, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: self.insecure_skip_relay_cert_verify, - #[cfg(any(test, feature = "test-utils"))] - path_selection: self.path_selection, metrics, + hooks: self.hooks, }; let msock = magicsock::MagicSock::spawn(msock_opts).await?; trace!("created magicsock"); debug!(version = env!("CARGO_PKG_VERSION"), "iroh Endpoint created"); - let metrics = msock.metrics.magicsock.clone(); let ep = Endpoint { msock, - rtt_actor: Arc::new(rtt_actor::RttHandle::new(metrics)), static_config: Arc::new(static_config), }; @@ -219,25 +230,46 @@ impl Builder { // # The very common methods everyone basically needs. - /// Sets the IPv4 bind address. + /// Adds an IP transport, binding to the provided IPv4 address. + /// + /// If you want to remove the default transports, make sure to call `clear_ip` first. /// /// Setting the port to `0` will use a random port. /// If the port specified is already in use, it will fallback to choosing a random port. - /// - /// By default will use `0.0.0.0:0` to bind to. - pub fn bind_addr_v4(mut self, addr: SocketAddrV4) -> Self { - self.addr_v4.replace(addr); + #[cfg(not(wasm_browser))] + pub fn bind_addr_v4(mut self, bind_addr: SocketAddrV4) -> Self { + self.transports.push(TransportConfig::Ip { + bind_addr: bind_addr.into(), + }); self } - /// Sets the IPv6 bind address. + /// Adds an IP transport, binding to the provided IPv6 address. + /// + /// If you want to remove the default transports, make sure to call `clear_ip` first. /// /// Setting the port to `0` will use a random port. /// If the port specified is already in use, it will fallback to choosing a random port. - /// - /// By default will use `[::]:0` to bind to. - pub fn bind_addr_v6(mut self, addr: SocketAddrV6) -> Self { - self.addr_v6.replace(addr); + #[cfg(not(wasm_browser))] + pub fn bind_addr_v6(mut self, bind_addr: SocketAddrV6) -> Self { + self.transports.push(TransportConfig::Ip { + bind_addr: bind_addr.into(), + }); + self + } + + /// Removes all IP based transports + #[cfg(not(wasm_browser))] + pub fn clear_ip_transports(mut self) -> Self { + self.transports + .retain(|t| !matches!(t, TransportConfig::Ip { .. })); + self + } + + /// Removes all relay based transports + pub fn clear_relay_transports(mut self) -> Self { + self.transports + .retain(|t| !matches!(t, TransportConfig::Relay { .. })); self } @@ -283,7 +315,24 @@ impl Builder { /// [crate docs]: crate /// [number 0]: https://n0.computer pub fn relay_mode(mut self, relay_mode: RelayMode) -> Self { - self.relay_mode = relay_mode; + let transport: Option<_> = relay_mode.into(); + match transport { + Some(transport) => { + if let Some(og) = self + .transports + .iter_mut() + .find(|t| matches!(t, TransportConfig::Relay { .. })) + { + *og = transport; + } else { + self.transports.push(transport); + } + } + None => { + self.transports + .retain(|t| !matches!(t, TransportConfig::Relay { .. })); + } + } self } @@ -403,14 +452,6 @@ impl Builder { self } - /// This implies we only use the relay to communicate - /// and do not attempt to do any hole punching. - #[cfg(any(test, feature = "test-utils"))] - pub fn path_selection(mut self, path_selection: PathSelection) -> Self { - self.path_selection = path_selection; - self - } - /// Set the maximum number of TLS tickets to cache. /// /// Set this to a larger value if you want to do 0rtt connections to a large @@ -421,6 +462,21 @@ impl Builder { self.max_tls_tickets = n; self } + + /// Install hooks onto the endpoint. + /// + /// Endpoint hooks intercept the connection establishment process of an [`Endpoint`]. + /// + /// You can install multiple [`EndpointHooks`] by calling this function multiple times. + /// Order matters: hooks are invoked in the order they were installed onto the endpoint + /// builder. Once a hook returns reject, further processing + /// is aborted and other hooks won't be invoked. + /// + /// See [`EndpointHooks`] for details on the possible interception points in the connection lifecycle. + pub fn hooks(mut self, hooks: impl EndpointHooks + 'static) -> Self { + self.hooks.push(hooks); + self + } } /// Configuration for a [`quinn::Endpoint`] that cannot be changed at runtime. @@ -472,9 +528,7 @@ impl StaticConfig { #[derive(Clone, Debug)] pub struct Endpoint { /// Handle to the magicsocket/actor - msock: Handle, - /// Handle to the actor that resets the quinn RTT estimator - rtt_actor: Arc, + pub(crate) msock: Handle, /// Configuration structs for quinn, holds the transport config, certificate setup, secret key etc. static_config: Arc, } @@ -482,18 +536,24 @@ pub struct Endpoint { #[allow(missing_docs)] #[stack_error(derive, add_meta, from_sources)] #[non_exhaustive] +#[allow(private_interfaces)] pub enum ConnectWithOptsError { - #[error(transparent)] - AddEndpointAddr { source: AddEndpointAddrError }, #[error("Connecting to ourself is not supported")] SelfConnect, #[error("No addressing information available")] - NoAddress { source: GetMappingAddressError }, + NoAddress { source: DiscoveryError }, #[error("Unable to connect to remote")] Quinn { #[error(std_err)] source: quinn_proto::ConnectError, }, + #[error("Internal consistency error")] + InternalConsistencyError { + /// Private source type, cannot be created publicly. + source: RemoteStateActorStoppedError, + }, + #[error("Connection was rejected locally")] + LocallyRejected, } #[allow(missing_docs)] @@ -525,18 +585,6 @@ pub enum BindError { }, } -#[allow(missing_docs)] -#[stack_error(derive, add_meta)] -#[non_exhaustive] -pub enum GetMappingAddressError { - #[error("Discovery service required due to missing addressing information")] - DiscoveryStart { source: DiscoveryError }, - #[error("Discovery service failed")] - Discover { source: DiscoveryError }, - #[error("No addressing information found")] - NoAddress, -} - impl Endpoint { // The ordering of public methods is reflected directly in the documentation. This is // roughly ordered by what is most commonly needed by users, but grouped in similar @@ -664,31 +712,26 @@ impl Endpoint { options: ConnectOptions, ) -> Result { let endpoint_addr: EndpointAddr = endpoint_addr.into(); - tracing::Span::current().record( - "remote", - tracing::field::display(endpoint_addr.id.fmt_short()), - ); + if let BeforeConnectOutcome::Reject = + self.msock.hooks.before_connect(&endpoint_addr, alpn).await + { + return Err(e!(ConnectWithOptsError::LocallyRejected)); + } + let endpoint_id = endpoint_addr.id; + + tracing::Span::current().record("remote", tracing::field::display(endpoint_id.fmt_short())); // Connecting to ourselves is not supported. - ensure!( - endpoint_addr.id != self.id(), - ConnectWithOptsError::SelfConnect + ensure!(endpoint_id != self.id(), ConnectWithOptsError::SelfConnect); + + trace!( + dst_endpoint_id = %endpoint_id.fmt_short(), + relay_url = ?endpoint_addr.relay_urls().next().cloned(), + ip_addresses = ?endpoint_addr.ip_addrs().cloned().collect::>(), + "connecting", ); - if !endpoint_addr.is_empty() { - self.add_endpoint_addr(endpoint_addr.clone(), Source::App)?; - } - let endpoint_id = endpoint_addr.id; - let ip_addresses: Vec<_> = endpoint_addr.ip_addrs().cloned().collect(); - let relay_url = endpoint_addr.relay_urls().next().cloned(); - - // Get the mapped IPv6 address from the magic socket. Quinn will connect to this - // address. Start discovery for this endpoint if it's enabled and we have no valid or - // verified address information for this endpoint. Dropping the discovery cancels any - // still running task. - let (mapped_addr, _discovery_drop_guard) = self - .get_mapping_addr_and_maybe_start_discovery(endpoint_addr) - .await?; + let mapped_addr = self.msock.resolve_remote(endpoint_addr).await??; let transport_config = options .transport_config @@ -697,12 +740,6 @@ impl Endpoint { // Start connecting via quinn. This will time out after 10 seconds if no reachable // address is available. - debug!( - ?mapped_addr, - ?ip_addresses, - ?relay_url, - "Attempting connection..." - ); let client_config = { let mut alpn_protocols = vec![alpn.to_vec()]; alpn_protocols.extend(options.additional_alpns); @@ -715,19 +752,14 @@ impl Endpoint { client_config }; + let dest_addr = mapped_addr.private_socket_addr(); let server_name = &tls::name::encode(endpoint_id); - let connect = self.msock.endpoint().connect_with( - client_config, - mapped_addr.private_socket_addr(), - server_name, - )?; + let connect = self + .msock + .endpoint() + .connect_with(client_config, dest_addr, server_name)?; - Ok(Connecting::new( - connect, - self.clone(), - endpoint_id, - _discovery_drop_guard, - )) + Ok(Connecting::new(connect, self.clone(), endpoint_id)) } /// Accepts an incoming connection on the endpoint. @@ -745,43 +777,6 @@ impl Endpoint { } } - // # Methods for manipulating the internal state about other endpoints. - - /// Informs this [`Endpoint`] about addresses of the iroh endpoint. - /// - /// This updates the local state for the remote endpoint. If the provided [`EndpointAddr`] - /// contains a [`RelayUrl`] this will be used as the new relay server for this endpoint. If - /// it contains any new IP endpoints they will also be stored and tried when next - /// connecting to this endpoint. Any address that matches this endpoint's direct addresses will be - /// silently ignored. - /// - /// The *source* is used for logging exclusively and will not be stored. - /// - /// # Using endpoint discovery instead - /// - /// It is strongly advised to use endpoint discovery using the [`StaticProvider`] instead. - /// This provides more flexibility and future proofing. - /// - /// # Errors - /// - /// Will return an error if we attempt to add our own [`EndpointId`] to the endpoint map or - /// if the direct addresses are a subset of ours. - /// - /// [`StaticProvider`]: crate::discovery::static_provider::StaticProvider - /// [`RelayUrl`]: crate::RelayUrl - pub(crate) fn add_endpoint_addr( - &self, - endpoint_addr: EndpointAddr, - source: Source, - ) -> Result<(), AddEndpointAddrError> { - // Connecting to ourselves is not supported. - ensure!( - endpoint_addr.id != self.id(), - AddEndpointAddrError::OwnAddress - ); - self.msock.add_endpoint_addr(endpoint_addr, source) - } - // # Getter methods for properties of this Endpoint itself. /// Returns the secret_key of this endpoint. @@ -854,8 +849,6 @@ impl Endpoint { let endpoint_id = self.id(); watch_addrs.or(watch_relay).map(move |(addrs, relays)| { - debug_assert!(!addrs.is_empty(), "direct addresses must never be empty"); - EndpointAddr::from_parts( endpoint_id, relays @@ -952,40 +945,6 @@ impl Endpoint { // // Partially they return things passed into the builder. - /// Returns a [`Watcher`] that reports the current connection type and any changes for - /// given remote endpoint. - /// - /// This watcher allows observing a stream of [`ConnectionType`] items by calling - /// [`Watcher::stream()`]. If the underlying connection to a remote endpoint changes, it will - /// yield a new item. These connection changes are when the connection switches between - /// using the Relay server and a direct connection. - /// - /// Note that this does not guarantee each connection change is yielded in the stream. - /// If the connection type changes several times before this stream is polled, only the - /// last recorded state is returned. This can be observed e.g. right at the start of a - /// connection when the switch from a relayed to a direct connection can be so fast that - /// the relayed state is never exposed. - /// - /// If there is currently a connection with the remote endpoint, then using [`Watcher::get`] - /// will immediately return either [`ConnectionType::Relay`], [`ConnectionType::Direct`] - /// or [`ConnectionType::Mixed`]. - /// - /// It is possible for the connection type to be [`ConnectionType::None`] if you've - /// recently connected to this endpoint id but previous methods of reaching the endpoint have - /// become inaccessible. - /// - /// Will return `None` if we do not have any address information for the given `endpoint_id`. - pub fn conn_type(&self, endpoint_id: EndpointId) -> Option> { - self.msock.conn_type(endpoint_id) - } - - /// Returns the currently lowest latency for this endpoint. - /// - /// Will return `None` if we do not have any address information for the given `endpoint_id`. - pub fn latency(&self, endpoint_id: EndpointId) -> Option { - self.msock.latency(endpoint_id) - } - /// Returns the DNS resolver used in this [`Endpoint`]. /// /// See [`Builder::dns_resolver`]. @@ -1199,92 +1158,6 @@ impl Endpoint { // # Remaining private methods - /// Checks if the given `EndpointId` needs discovery. - pub(crate) fn needs_discovery(&self, endpoint_id: EndpointId, max_age: Duration) -> bool { - match self.msock.remote_info(endpoint_id) { - // No info means no path to endpoint -> start discovery. - None => true, - Some(info) => { - match ( - info.last_received(), - info.relay_url.as_ref().and_then(|r| r.last_alive), - ) { - // No path to endpoint -> start discovery. - (None, None) => true, - // If we haven't received on direct addresses or the relay for MAX_AGE, - // start discovery. - (Some(elapsed), Some(elapsed_relay)) => { - elapsed > max_age && elapsed_relay > max_age - } - (Some(elapsed), _) | (_, Some(elapsed)) => elapsed > max_age, - } - } - } - } - - /// Return the quic mapped address for this `endpoint_id` and possibly start discovery - /// services if discovery is enabled on this magic endpoint. - /// - /// This will launch discovery in all cases except if: - /// 1) we do not have discovery enabled - /// 2) we have discovery enabled, but already have at least one verified, unexpired - /// addresses for this `endpoint_id` - /// - /// # Errors - /// - /// This method may fail if we have no way of dialing the endpoint. This can occur if - /// we were given no dialing information in the [`EndpointAddr`] and no discovery - /// services were configured or if discovery failed to fetch any dialing information. - async fn get_mapping_addr_and_maybe_start_discovery( - &self, - endpoint_addr: EndpointAddr, - ) -> Result<(EndpointIdMappedAddr, Option), GetMappingAddressError> { - let endpoint_id = endpoint_addr.id; - - // Only return a mapped addr if we have some way of dialing this endpoint, in other - // words, we have either a relay URL or at least one direct address. - let addr = if self.msock.has_send_address(endpoint_id) { - self.msock.get_mapping_addr(endpoint_id) - } else { - None - }; - match addr { - Some(addr) => { - // We have some way of dialing this endpoint, but that doesn't actually mean - // we can actually connect to any of these addresses. - // Therefore, we will invoke the discovery service if we haven't received from the - // endpoint on any of the existing paths recently. - // If the user provided addresses in this connect call, we will add a delay - // followed by a recheck before starting the discovery, to give the magicsocket a - // chance to test the newly provided addresses. - let delay = (!endpoint_addr.is_empty()).then_some(DISCOVERY_WAIT_PERIOD); - let discovery = DiscoveryTask::maybe_start_after_delay(self, endpoint_id, delay) - .ok() - .flatten(); - Ok((addr, discovery)) - } - - None => { - // We have no known addresses or relay URLs for this endpoint. - // So, we start a discovery task and wait for the first result to arrive, and - // only then continue, because otherwise we wouldn't have any - // path to the remote endpoint. - let res = DiscoveryTask::start(self.clone(), endpoint_id); - let mut discovery = - res.map_err(|err| e!(GetMappingAddressError::DiscoveryStart, err))?; - discovery - .first_arrived() - .await - .map_err(|err| e!(GetMappingAddressError::Discover, err))?; - if let Some(addr) = self.msock.get_mapping_addr(endpoint_id) { - Ok((addr, Some(discovery))) - } else { - Err(e!(GetMappingAddressError::NoAddress)) - } - } - } - } - #[cfg(test)] pub(crate) fn magic_sock(&self) -> Handle { self.msock.clone() @@ -1298,7 +1171,7 @@ impl Endpoint { /// Options for the [`Endpoint::connect_with_opts`] function. #[derive(Default, Debug, Clone)] pub struct ConnectOptions { - transport_config: Option>, + transport_config: Option>, additional_alpns: Vec>, } @@ -1312,7 +1185,7 @@ impl ConnectOptions { } /// Sets the QUIC transport config options for this connection. - pub fn with_transport_config(mut self, transport_config: Arc) -> Self { + pub fn with_transport_config(mut self, transport_config: Arc) -> Self { self.transport_config = Some(transport_config); self } @@ -1386,6 +1259,7 @@ fn proxy_url_from_env() -> Option { #[derive(Debug, Clone, PartialEq, Eq)] pub enum RelayMode { /// Disable relay servers completely. + /// This means that neither listening nor dialing relays will be available. Disabled, /// Use the default relay map, with production relay servers from n0. /// @@ -1445,18 +1319,19 @@ mod tests { use iroh_base::{EndpointAddr, EndpointId, SecretKey, TransportAddr}; use n0_error::{AnyError as Error, Result, StdResultExt}; - use n0_future::{BufferedStreamExt, StreamExt, stream, task::AbortOnDropHandle}; + use n0_future::{BufferedStreamExt, StreamExt, stream, time}; use n0_watcher::Watcher; use quinn::ConnectionError; use rand::SeedableRng; - use tracing::{Instrument, error_span, info, info_span}; + use tokio::sync::oneshot; + use tracing::{Instrument, error_span, info, info_span, instrument}; use tracing_test::traced_test; use super::Endpoint; use crate::{ - RelayMode, + RelayMap, RelayMode, discovery::static_provider::StaticProvider, - endpoint::{ConnectOptions, Connection, ConnectionType}, + endpoint::{ConnectOptions, Connection}, protocol::{AcceptError, ProtocolHandler, Router}, test_utils::{run_relay_server, run_relay_server_with}, }; @@ -1476,6 +1351,7 @@ mod tests { assert!(res.is_err()); let err = res.err().unwrap(); assert!(err.to_string().starts_with("Connecting to ourself")); + Ok(()) } @@ -1729,6 +1605,370 @@ mod tests { Ok(()) } + #[tokio::test] + #[traced_test] + async fn endpoint_two_direct_only() -> Result { + // Connect two endpoints on the same network, without a relay server, without + // discovery. + let ep1 = { + let span = info_span!("server"); + let _guard = span.enter(); + Endpoint::builder() + .alpns(vec![TEST_ALPN.to_vec()]) + .relay_mode(RelayMode::Disabled) + .bind() + .await? + }; + let ep2 = { + let span = info_span!("client"); + let _guard = span.enter(); + Endpoint::builder() + .alpns(vec![TEST_ALPN.to_vec()]) + .relay_mode(RelayMode::Disabled) + .bind() + .await? + }; + let ep1_nodeaddr = ep1.addr(); + + #[instrument(name = "client", skip_all)] + async fn connect(ep: Endpoint, dst: EndpointAddr) -> Result { + info!(me = %ep.id().fmt_short(), "client starting"); + let conn = ep.connect(dst, TEST_ALPN).await?; + let mut send = conn.open_uni().await.anyerr()?; + send.write_all(b"hello").await.anyerr()?; + send.finish().anyerr()?; + Ok(conn.closed().await) + } + + #[instrument(name = "server", skip_all)] + async fn accept(ep: Endpoint, src: EndpointId) -> Result { + info!(me = %ep.id().fmt_short(), "server starting"); + let conn = ep.accept().await.anyerr()?.await.anyerr()?; + let node_id = conn.remote_id(); + assert_eq!(node_id, src); + let mut recv = conn.accept_uni().await.anyerr()?; + let msg = recv.read_to_end(100).await.anyerr()?; + assert_eq!(msg, b"hello"); + // Dropping the connection closes it just fine. + Ok(()) + } + + let ep1_accept = tokio::spawn(accept(ep1.clone(), ep2.id())); + let ep2_connect = tokio::spawn(connect(ep2.clone(), ep1_nodeaddr)); + + ep1_accept.await.anyerr()??; + let conn_closed = dbg!(ep2_connect.await.anyerr()??); + assert!(matches!( + conn_closed, + ConnectionError::ApplicationClosed(quinn::ApplicationClose { .. }) + )); + + Ok(()) + } + + #[tokio::test] + #[traced_test] + async fn endpoint_two_relay_only_becomes_direct() -> Result { + // Connect two endpoints on the same network, via a relay server, without + // discovery. Wait until there is a direct connection. + let (relay_map, _relay_url, _relay_server_guard) = run_relay_server().await?; + let (node_addr_tx, node_addr_rx) = oneshot::channel(); + + #[instrument(name = "client", skip_all)] + async fn connect( + relay_map: RelayMap, + node_addr_rx: oneshot::Receiver, + ) -> Result { + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); + let secret = SecretKey::generate(&mut rng); + let ep = Endpoint::builder() + .secret_key(secret) + .alpns(vec![TEST_ALPN.to_vec()]) + .insecure_skip_relay_cert_verify(true) + .relay_mode(RelayMode::Custom(relay_map)) + .bind() + .await?; + info!(me = %ep.id().fmt_short(), "client starting"); + let dst = node_addr_rx.await.anyerr()?; + + info!(me = %ep.id().fmt_short(), "client connecting"); + let conn = ep.connect(dst, TEST_ALPN).await?; + let mut send = conn.open_uni().await.anyerr()?; + send.write_all(b"hello").await.anyerr()?; + let mut paths = conn.paths().stream(); + info!("Waiting for direct connection"); + while let Some(infos) = paths.next().await { + info!(?infos, "new PathInfos"); + if infos.iter().any(|info| info.is_ip()) { + break; + } + } + info!("Have direct connection"); + send.write_all(b"close please").await.anyerr()?; + send.finish().anyerr()?; + Ok(conn.closed().await) + } + + #[instrument(name = "server", skip_all)] + async fn accept( + relay_map: RelayMap, + node_addr_tx: oneshot::Sender, + ) -> Result { + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1u64); + let secret = SecretKey::generate(&mut rng); + let ep = Endpoint::builder() + .secret_key(secret) + .alpns(vec![TEST_ALPN.to_vec()]) + .insecure_skip_relay_cert_verify(true) + .relay_mode(RelayMode::Custom(relay_map)) + .bind() + .await?; + ep.online().await; + let mut node_addr = ep.addr(); + node_addr.addrs.retain(|addr| addr.is_relay()); + node_addr_tx.send(node_addr).unwrap(); + + info!(me = %ep.id().fmt_short(), "server starting"); + let conn = ep.accept().await.anyerr()?.await.anyerr()?; + // let node_id = conn.remote_node_id()?; + // assert_eq!(node_id, src); + let mut recv = conn.accept_uni().await.anyerr()?; + let mut msg = [0u8; 5]; + recv.read_exact(&mut msg).await.anyerr()?; + assert_eq!(&msg, b"hello"); + info!("received hello"); + let msg = recv.read_to_end(100).await.anyerr()?; + assert_eq!(msg, b"close please"); + info!("received 'close please'"); + // Dropping the connection closes it just fine. + Ok(()) + } + + let server_task = tokio::spawn(accept(relay_map.clone(), node_addr_tx)); + let client_task = tokio::spawn(connect(relay_map, node_addr_rx)); + + server_task.await.anyerr()??; + let conn_closed = dbg!(client_task.await.anyerr()??); + assert!(matches!( + conn_closed, + ConnectionError::ApplicationClosed(quinn::ApplicationClose { .. }) + )); + + Ok(()) + } + + #[tokio::test] + #[traced_test] + async fn endpoint_two_relay_only_no_ip() -> Result { + // Connect two endpoints on the same network, via a relay server, without + // discovery. + let (relay_map, _relay_url, _relay_server_guard) = run_relay_server().await?; + let (node_addr_tx, node_addr_rx) = oneshot::channel(); + + #[instrument(name = "client", skip_all)] + async fn connect( + relay_map: RelayMap, + node_addr_rx: oneshot::Receiver, + ) -> Result { + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); + let secret = SecretKey::generate(&mut rng); + let ep = Endpoint::builder() + .secret_key(secret) + .alpns(vec![TEST_ALPN.to_vec()]) + .insecure_skip_relay_cert_verify(true) + .relay_mode(RelayMode::Custom(relay_map)) + .clear_ip_transports() // disable direct + .bind() + .await?; + info!(me = %ep.id().fmt_short(), "client starting"); + let dst = node_addr_rx.await.anyerr()?; + + info!(me = %ep.id().fmt_short(), "client connecting"); + let conn = ep.connect(dst, TEST_ALPN).await?; + let mut send = conn.open_uni().await.anyerr()?; + send.write_all(b"hello").await.anyerr()?; + let mut paths = conn.paths().stream(); + info!("Waiting for connection"); + 'outer: while let Some(infos) = paths.next().await { + info!(?infos, "new PathInfos"); + for info in infos { + if info.is_ip() { + panic!("should not happen: {:?}", info); + } + if info.is_relay() { + break 'outer; + } + } + } + info!("Have relay connection"); + send.write_all(b"close please").await.anyerr()?; + send.finish().anyerr()?; + Ok(conn.closed().await) + } + + #[instrument(name = "server", skip_all)] + async fn accept( + relay_map: RelayMap, + node_addr_tx: oneshot::Sender, + ) -> Result { + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1u64); + let secret = SecretKey::generate(&mut rng); + let ep = Endpoint::builder() + .secret_key(secret) + .alpns(vec![TEST_ALPN.to_vec()]) + .insecure_skip_relay_cert_verify(true) + .relay_mode(RelayMode::Custom(relay_map)) + .clear_ip_transports() + .bind() + .await?; + ep.online().await; + let node_addr = ep.addr(); + node_addr_tx.send(node_addr).unwrap(); + + info!(me = %ep.id().fmt_short(), "server starting"); + let conn = ep.accept().await.anyerr()?.await.anyerr()?; + // let node_id = conn.remote_node_id()?; + // assert_eq!(node_id, src); + let mut recv = conn.accept_uni().await.anyerr()?; + let mut msg = [0u8; 5]; + recv.read_exact(&mut msg).await.anyerr()?; + assert_eq!(&msg, b"hello"); + info!("received hello"); + let msg = recv.read_to_end(100).await.anyerr()?; + assert_eq!(msg, b"close please"); + info!("received 'close please'"); + // Dropping the connection closes it just fine. + Ok(()) + } + + let server_task = tokio::spawn(accept(relay_map.clone(), node_addr_tx)); + let client_task = tokio::spawn(connect(relay_map, node_addr_rx)); + + server_task.await.anyerr()??; + let conn_closed = dbg!(client_task.await.anyerr()??); + assert!(matches!( + conn_closed, + ConnectionError::ApplicationClosed(quinn::ApplicationClose { .. }) + )); + + Ok(()) + } + + #[tokio::test] + #[traced_test] + async fn endpoint_two_direct_add_relay() -> Result { + // Connect two endpoints on the same network, without relay server and without + // discovery. Add a relay connection later. + let (relay_map, _relay_url, _relay_server_guard) = run_relay_server().await?; + let (node_addr_tx, node_addr_rx) = oneshot::channel(); + + #[instrument(name = "client", skip_all)] + async fn connect( + relay_map: RelayMap, + node_addr_rx: oneshot::Receiver, + ) -> Result<()> { + let secret = SecretKey::from([0u8; 32]); + let ep = Endpoint::builder() + .secret_key(secret) + .alpns(vec![TEST_ALPN.to_vec()]) + .insecure_skip_relay_cert_verify(true) + .relay_mode(RelayMode::Custom(relay_map)) + .bind() + .await?; + info!(me = %ep.id().fmt_short(), "client starting"); + let dst = node_addr_rx.await.anyerr()?; + + info!(me = %ep.id().fmt_short(), "client connecting"); + let conn = ep.connect(dst, TEST_ALPN).await?; + info!(me = %ep.id().fmt_short(), "client connected"); + + // We should be connected via IP, because it is faster than the relay server. + // TODO: Maybe not panic if this is not true? + let path_info = conn.paths().get(); + assert_eq!(path_info.len(), 1); + assert!(path_info.iter().next().unwrap().is_ip()); + + let mut paths = conn.paths().stream(); + time::timeout(Duration::from_secs(5), async move { + while let Some(infos) = paths.next().await { + info!(?infos, "new PathInfos"); + if infos.iter().any(|info| info.is_relay()) { + info!("client has a relay path"); + break; + } + } + }) + .await + .anyerr()?; + + // wait for the server to signal it has the relay connection + let mut stream = conn.accept_uni().await.anyerr()?; + stream.read_to_end(100).await.anyerr()?; + + info!("client closing"); + conn.close(0u8.into(), b""); + ep.close().await; + Ok(()) + } + + #[instrument(name = "server", skip_all)] + async fn accept( + relay_map: RelayMap, + node_addr_tx: oneshot::Sender, + ) -> Result { + let secret = SecretKey::from([1u8; 32]); + let ep = Endpoint::builder() + .secret_key(secret) + .alpns(vec![TEST_ALPN.to_vec()]) + .insecure_skip_relay_cert_verify(true) + .relay_mode(RelayMode::Custom(relay_map)) + .bind() + .await?; + ep.online().await; + let node_addr = ep.addr(); + node_addr_tx.send(node_addr).unwrap(); + + info!(me = %ep.id().fmt_short(), "server starting"); + let conn = ep.accept().await.anyerr()?.await.anyerr()?; + info!(me = %ep.id().fmt_short(), "server accepted connection"); + + // Wait for a relay connection to be added. Client does all the asserting here, + // we just want to wait so we get to see all the mechanics of the connection + // being added on this side too. + let mut paths = conn.paths().stream(); + time::timeout(Duration::from_secs(5), async move { + while let Some(infos) = paths.next().await { + info!(?infos, "new PathInfos"); + if infos.iter().any(|path| path.is_relay()) { + info!("server has a relay path"); + break; + } + } + }) + .await + .anyerr()?; + + let mut stream = conn.open_uni().await.anyerr()?; + stream.write_all(b"have relay").await.anyerr()?; + stream.finish().anyerr()?; + info!("waiting conn.closed()"); + + Ok(conn.closed().await) + } + + let server_task = tokio::spawn(accept(relay_map.clone(), node_addr_tx)); + let client_task = tokio::spawn(connect(relay_map, node_addr_rx)); + + client_task.await.anyerr()??; + let conn_closed = dbg!(server_task.await.anyerr()??); + assert!(matches!( + conn_closed, + ConnectionError::ApplicationClosed(quinn::ApplicationClose { .. }) + )); + + Ok(()) + } + #[tokio::test] #[traced_test] async fn endpoint_relay_map_change() -> Result { @@ -1936,92 +2176,6 @@ mod tests { Ok(()) } - #[tokio::test] - #[traced_test] - async fn endpoint_conn_type_becomes_direct() -> Result { - const TIMEOUT: Duration = std::time::Duration::from_secs(15); - let (relay_map, _relay_url, _relay_guard) = run_relay_server().await?; - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(42); - let ep1_secret_key = SecretKey::generate(&mut rng); - let ep2_secret_key = SecretKey::generate(&mut rng); - let ep1 = Endpoint::empty_builder(RelayMode::Custom(relay_map.clone())) - .secret_key(ep1_secret_key) - .insecure_skip_relay_cert_verify(true) - .alpns(vec![TEST_ALPN.to_vec()]) - .bind() - .await?; - let ep2 = Endpoint::empty_builder(RelayMode::Custom(relay_map)) - .secret_key(ep2_secret_key) - .insecure_skip_relay_cert_verify(true) - .alpns(vec![TEST_ALPN.to_vec()]) - .bind() - .await?; - - async fn wait_for_conn_type_direct(ep: &Endpoint, endpoint_id: EndpointId) -> Result { - let mut stream = ep - .conn_type(endpoint_id) - .expect("connection exists") - .stream(); - let src = ep.id().fmt_short(); - let dst = endpoint_id.fmt_short(); - while let Some(conn_type) = stream.next().await { - tracing::info!(me = %src, dst = %dst, conn_type = ?conn_type); - if matches!(conn_type, ConnectionType::Direct(_)) { - return Ok(()); - } - } - n0_error::bail_any!("conn_type stream ended before `ConnectionType::Direct`"); - } - - async fn accept(ep: &Endpoint) -> Result { - let incoming = ep.accept().await.expect("ep closed"); - let conn = incoming.await.anyerr()?; - let endpoint_id = conn.remote_id(); - tracing::info!(endpoint_id=%endpoint_id.fmt_short(), "accepted connection"); - Ok(conn) - } - - let ep1_endpointid = ep1.id(); - let ep2_endpointid = ep2.id(); - - let ep1_endpointaddr = ep1.addr(); - tracing::info!( - "endpoint id 1 {ep1_endpointid}, relay URL {:?}", - ep1_endpointaddr.relay_urls().next() - ); - tracing::info!("endpoint id 2 {ep2_endpointid}"); - - let ep1_side = tokio::time::timeout(TIMEOUT, async move { - let conn = accept(&ep1).await?; - let mut send = conn.open_uni().await.anyerr()?; - wait_for_conn_type_direct(&ep1, ep2_endpointid).await?; - send.write_all(b"Conn is direct").await.anyerr()?; - send.finish().anyerr()?; - conn.closed().await; - Ok::<(), Error>(()) - }); - - let ep2_side = tokio::time::timeout(TIMEOUT, async move { - let conn = ep2.connect(ep1_endpointaddr, TEST_ALPN).await?; - let mut recv = conn.accept_uni().await.anyerr()?; - wait_for_conn_type_direct(&ep2, ep1_endpointid).await?; - let read = recv.read_to_end(100).await.anyerr()?; - assert_eq!(read, b"Conn is direct".to_vec()); - conn.close(0u32.into(), b"done"); - conn.closed().await; - Ok::<(), Error>(()) - }); - - let res_ep1 = AbortOnDropHandle::new(tokio::spawn(ep1_side)); - let res_ep2 = AbortOnDropHandle::new(tokio::spawn(ep2_side)); - - let (r1, r2) = tokio::try_join!(res_ep1, res_ep2).anyerr()?; - r1.anyerr()??; - r2.anyerr()??; - - Ok(()) - } - #[tokio::test] #[traced_test] async fn test_direct_addresses_no_qad_relay() -> Result { @@ -2082,7 +2236,7 @@ mod tests { #[tokio::test] #[traced_test] async fn metrics_smoke() -> Result { - use iroh_metrics::{MetricsSource, Registry}; + use iroh_metrics::Registry; let secret_key = SecretKey::from_bytes(&[0u8; 32]); let client = Endpoint::empty_builder(RelayMode::Disabled) @@ -2112,17 +2266,17 @@ mod tests { let server = server_task.await.anyerr()??; let m = client.metrics(); - assert_eq!(m.magicsock.num_direct_conns_added.get(), 1); - assert_eq!(m.magicsock.connection_became_direct.get(), 1); - assert_eq!(m.magicsock.connection_handshake_success.get(), 1); - assert_eq!(m.magicsock.endpoints_contacted_directly.get(), 1); + // assert_eq!(m.magicsock.num_direct_conns_added.get(), 1); + // assert_eq!(m.magicsock.connection_became_direct.get(), 1); + // assert_eq!(m.magicsock.connection_handshake_success.get(), 1); + // assert_eq!(m.magicsock.endpoints_contacted_directly.get(), 1); assert!(m.magicsock.recv_datagrams.get() > 0); let m = server.metrics(); - assert_eq!(m.magicsock.num_direct_conns_added.get(), 1); - assert_eq!(m.magicsock.connection_became_direct.get(), 1); - assert_eq!(m.magicsock.endpoints_contacted_directly.get(), 1); - assert_eq!(m.magicsock.connection_handshake_success.get(), 1); + // assert_eq!(m.magicsock.num_direct_conns_added.get(), 1); + // assert_eq!(m.magicsock.connection_became_direct.get(), 1); + // assert_eq!(m.magicsock.endpoints_contacted_directly.get(), 1); + // assert_eq!(m.magicsock.connection_handshake_success.get(), 1); assert!(m.magicsock.recv_datagrams.get() > 0); // test openmetrics encoding with labeled subregistries per endpoint @@ -2134,9 +2288,9 @@ mod tests { let mut registry = Registry::default(); register_endpoint(&mut registry, &client); register_endpoint(&mut registry, &server); - let s = registry.encode_openmetrics_to_string().anyerr()?; - assert!(s.contains(r#"magicsock_endpoints_contacted_directly_total{id="3b6a27bcce"} 1"#)); - assert!(s.contains(r#"magicsock_endpoints_contacted_directly_total{id="8a88e3dd74"} 1"#)); + // let s = registry.encode_openmetrics_to_string().anyerr()?; + // assert!(s.contains(r#"magicsock_endpoints_contacted_directly_total{id="3b6a27bcce"} 1"#)); + // assert!(s.contains(r#"magicsock_endpoints_contacted_directly_total{id="8a88e3dd74"} 1"#)); Ok(()) } diff --git a/iroh/src/endpoint/connection.rs b/iroh/src/endpoint/connection.rs index 6a4593adc08..6f59d7bec20 100644 --- a/iroh/src/endpoint/connection.rs +++ b/iroh/src/endpoint/connection.rs @@ -29,16 +29,23 @@ use ed25519_dalek::{VerifyingKey, pkcs8::DecodePublicKey}; use futures_util::{FutureExt, future::Shared}; use iroh_base::EndpointId; use n0_error::{e, stack_error}; -use n0_future::time::Duration; +use n0_future::{TryFutureExt, future::Boxed as BoxFuture, time::Duration}; use n0_watcher::Watcher; use pin_project::pin_project; use quinn::{ AcceptBi, AcceptUni, ConnectionError, ConnectionStats, OpenBi, OpenUni, ReadDatagram, - RetryError, SendDatagramError, ServerConfig, VarInt, + RetryError, SendDatagramError, ServerConfig, Side, VarInt, WeakConnectionHandle, }; use tracing::warn; -use crate::{Endpoint, discovery::DiscoveryTask, endpoint::rtt_actor::RttMessage}; +use crate::{ + Endpoint, + endpoint::AfterHandshakeOutcome, + magicsock::{ + RemoteStateActorStoppedError, + remote_map::{PathInfoList, PathsWatcher}, + }, +}; /// Future produced by [`Endpoint::accept`]. #[derive(derive_more::Debug)] @@ -149,41 +156,27 @@ impl IntoFuture for Incoming { type IntoFuture = IncomingFuture; fn into_future(self) -> Self::IntoFuture { - IncomingFuture { - inner: self.inner.into_future(), - ep: self.ep, - } + IncomingFuture(Box::pin(async move { + let quinn_conn = self.inner.into_future().await?; + let conn = conn_from_quinn_conn(quinn_conn, &self.ep)?.await?; + Ok(conn) + })) } } /// Adaptor to let [`Incoming`] be `await`ed like a [`Connecting`]. -#[derive(Debug)] -#[pin_project] -pub struct IncomingFuture { - #[pin] - inner: quinn::IncomingFuture, - ep: Endpoint, -} +#[derive(derive_more::Debug)] +#[debug("IncomingFuture")] +pub struct IncomingFuture(BoxFuture>); impl Future for IncomingFuture { type Output = Result; - fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { - let this = self.project(); - match this.inner.poll(cx) { - Poll::Pending => Poll::Pending, - Poll::Ready(Err(err)) => Poll::Ready(Err(err.into())), - Poll::Ready(Ok(inner)) => { - let conn = match conn_from_quinn_conn(inner) { - Ok(conn) => conn, - Err(err) => return Poll::Ready(Err(err.into())), - }; - try_send_rtt_msg(conn.quinn_connection(), this.ep, conn.remote_id()); - Poll::Ready(Ok(conn)) - } - } + fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { + self.0.poll_unpin(cx) } } + /// Extracts the ALPN protocol from the peer's handshake data. fn alpn_from_quinn_conn(conn: &quinn::Connection) -> Option> { let data = conn.handshake_data()?; @@ -207,37 +200,75 @@ async fn alpn_from_quinn_connecting(conn: &mut quinn::Connecting) -> Result Result { - if let Some(reason) = conn.close_reason() { - return Err(e!(AuthenticationError::ConnectionError { source: reason })); - } - Ok(Connection { - info: HandshakeCompletedData { - endpoint_id: remote_id_from_quinn_conn(&conn)?, - alpn: alpn_from_quinn_conn(&conn).ok_or_else(|| e!(AuthenticationError::NoAlpn))?, - }, - inner: conn, +/// Otherwise returns a future that completes once the connection has been registered with the +/// magicsock. This future can return an [`RemoteStateActorStoppedError`], which will only be +/// emitted if the endpoint is closing. +/// +/// The returned future is `'static`, so it can be stored without being lifetime-bound on `&ep`. +fn conn_from_quinn_conn( + conn: quinn::Connection, + ep: &Endpoint, +) -> Result< + impl Future> + Send + 'static, + ConnectingError, +> { + let info = match static_info_from_conn(&conn) { + Ok(val) => val, + Err(auth_err) => { + // If the authentication error raced with a connection error, the connection + // error wins. + if let Some(conn_err) = conn.close_reason() { + return Err(e!(ConnectingError::ConnectionError { source: conn_err })); + } else { + return Err(e!(ConnectingError::HandshakeFailure { source: auth_err })); + } + } + }; + + // Register this connection with the magicsock. + let fut = ep + .msock + .register_connection(info.endpoint_id, conn.weak_handle()); + + // Check hooks + let msock = ep.msock.clone(); + Ok(async move { + let paths = fut.await?; + let conn = Connection { + data: HandshakeCompletedData { info, paths }, + inner: conn, + }; + + if let AfterHandshakeOutcome::Reject { error_code, reason } = + msock.hooks.after_handshake(&conn.to_info()).await + { + conn.close(error_code, &reason); + return Err(e!(ConnectingError::LocallyRejected)); + } + + Ok(conn) }) } +fn static_info_from_conn(conn: &quinn::Connection) -> Result { + let endpoint_id = remote_id_from_quinn_conn(conn)?; + let alpn = alpn_from_quinn_conn(conn).ok_or_else(|| e!(AuthenticationError::NoAlpn))?; + Ok(StaticInfo { endpoint_id, alpn }) +} + /// Returns the [`EndpointId`] from the peer's TLS certificate. /// /// The [`PublicKey`] of an endpoint is also known as an [`EndpointId`]. This [`PublicKey`] is @@ -284,24 +315,31 @@ fn remote_id_from_quinn_conn( /// /// This future resolves to a [`Connection`] once the handshake completes. #[derive(derive_more::Debug)] -#[pin_project] pub struct Connecting { - #[pin] inner: quinn::Connecting, + /// Future to register the connection with the magicsock. + /// + /// This is set and polled after `inner` completes. We are using an option instead of an enum + /// because we need infallible access to `inner` in some methods. + #[debug("{}", register_with_magicsock.as_ref().map(|_| "Some(RegisterWithMagicsockFut)").unwrap_or("None"))] + register_with_magicsock: Option, ep: Endpoint, /// `Some(remote_id)` if this is an outgoing connection, `None` if this is an incoming conn remote_endpoint_id: EndpointId, - /// We run discovery as long as we haven't established a connection yet. - #[debug("Option")] - _discovery_drop_guard: Option, } +type RegisterWithMagicsockFut = BoxFuture>; + /// In-progress connection attempt future #[derive(derive_more::Debug)] -#[pin_project] pub struct Accepting { - #[pin] inner: quinn::Connecting, + /// Future to register the connection with the magicsock. + /// + /// This is set and polled after `inner` completes. We are using an option instead of an enum + /// because we need infallible access to `inner` in some methods. + #[debug("{}", register_with_magicsock.as_ref().map(|_| "Some(RegisterWithMagicsockFut)").unwrap_or("None"))] + register_with_magicsock: Option, ep: Endpoint, } @@ -323,6 +361,8 @@ pub enum AlpnError { #[stack_error(add_meta, derive, from_sources)] #[allow(missing_docs)] #[non_exhaustive] +#[derive(Clone)] +#[allow(private_interfaces)] pub enum ConnectingError { #[error(transparent)] ConnectionError { @@ -331,6 +371,13 @@ pub enum ConnectingError { }, #[error("Failure finalizing the handshake")] HandshakeFailure { source: AuthenticationError }, + #[error("internal consistency error")] + InternalConsistencyError { + /// Private source type, cannot be created publicly. + source: RemoteStateActorStoppedError, + }, + #[error("Connection was rejected locally")] + LocallyRejected, } impl Connecting { @@ -338,13 +385,12 @@ impl Connecting { inner: quinn::Connecting, ep: Endpoint, remote_endpoint_id: EndpointId, - _discovery_drop_guard: Option, ) -> Self { Self { inner, ep, remote_endpoint_id, - _discovery_drop_guard, + register_with_magicsock: None, } } @@ -386,30 +432,25 @@ impl Connecting { #[allow(clippy::result_large_err)] pub fn into_0rtt(self) -> Result { match self.inner.into_0rtt() { - Ok((inner, zrtt_accepted)) => { - let accepted = ZeroRttAccepted { - inner: zrtt_accepted, - _discovery_drop_guard: self._discovery_drop_guard, - } - .shared(); - // This call is why `self.remote_endpoint_id` was introduced. - // When we `Connecting::into_0rtt`, then we don't yet have `handshake_data` - // in our `Connection`, thus `try_send_rtt_msg` won't be able to pick up - // `Connection::remote_endpoint_id`. - // Instead, we provide `self.remote_endpoint_id` here - we know it in advance, - // after all. - try_send_rtt_msg(&inner, &self.ep, self.remote_endpoint_id); + Ok((quinn_conn, zrtt_accepted)) => { + let accepted: BoxFuture<_> = Box::pin({ + let quinn_conn = quinn_conn.clone(); + async move { + let accepted = zrtt_accepted.await; + let conn = conn_from_quinn_conn(quinn_conn, &self.ep)?.await?; + Ok(match accepted { + true => ZeroRttStatus::Accepted(conn), + false => ZeroRttStatus::Rejected(conn), + }) + } + }); + let accepted = accepted.shared(); Ok(Connection { - info: OutgoingZeroRttData { accepted }, - inner, + inner: quinn_conn, + data: OutgoingZeroRttData { accepted }, }) } - Err(inner) => Err(Self { - inner, - ep: self.ep, - remote_endpoint_id: self.remote_endpoint_id, - _discovery_drop_guard: self._discovery_drop_guard, - }), + Err(inner) => Err(Self { inner, ..self }), } } @@ -432,21 +473,14 @@ impl Connecting { impl Future for Connecting { type Output = Result; - fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { - let this = self.project(); - match this.inner.poll(cx) { - Poll::Pending => Poll::Pending, - Poll::Ready(Err(err)) => Poll::Ready(Err(err.into())), - Poll::Ready(Ok(inner)) => { - let conn = match conn_from_quinn_conn(inner) { - Ok(conn) => conn, - Err(err) => { - return Poll::Ready(Err(err.into())); - } - }; - - try_send_rtt_msg(conn.quinn_connection(), this.ep, conn.remote_id()); - Poll::Ready(Ok(conn)) + fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { + loop { + if let Some(fut) = &mut self.register_with_magicsock { + return fut.poll_unpin(cx).map_err(Into::into); + } else { + let quinn_conn = std::task::ready!(self.inner.poll_unpin(cx)?); + let fut = conn_from_quinn_conn(quinn_conn, &self.ep)?; + self.register_with_magicsock = Some(Box::pin(fut.err_into())); } } } @@ -454,7 +488,11 @@ impl Future for Connecting { impl Accepting { pub(crate) fn new(inner: quinn::Connecting, ep: Endpoint) -> Self { - Self { inner, ep } + Self { + inner, + ep, + register_with_magicsock: None, + } } /// Converts this [`Accepting`] into a 0-RTT or 0.5-RTT connection at the cost of weakened @@ -485,18 +523,24 @@ impl Accepting { /// /// [`RecvStream::is_0rtt`]: quinn::RecvStream::is_0rtt pub fn into_0rtt(self) -> IncomingZeroRttConnection { - let (inner, accepted) = self + let (quinn_conn, zrtt_accepted) = self .inner .into_0rtt() .expect("incoming connections can always be converted to 0-RTT"); - let accepted = ZeroRttAccepted { - inner: accepted, - _discovery_drop_guard: None, - } - .shared(); - Connection { - info: IncomingZeroRttData { accepted }, - inner, + + let accepted: BoxFuture<_> = Box::pin({ + let quinn_conn = quinn_conn.clone(); + async move { + let _ = zrtt_accepted.await; + let conn = conn_from_quinn_conn(quinn_conn, &self.ep)?.await?; + Ok(conn) + } + }); + let accepted = accepted.shared(); + + IncomingZeroRttConnection { + inner: quinn_conn, + data: IncomingZeroRttData { accepted }, } } @@ -514,49 +558,21 @@ impl Accepting { impl Future for Accepting { type Output = Result; - fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { - let this = self.project(); - match this.inner.poll(cx) { - Poll::Pending => Poll::Pending, - Poll::Ready(Err(err)) => Poll::Ready(Err(err.into())), - Poll::Ready(Ok(inner)) => { - let conn = match conn_from_quinn_conn(inner) { - Ok(conn) => conn, - Err(err) => return Poll::Ready(Err(err.into())), + fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { + loop { + if let Some(fut) = &mut self.register_with_magicsock { + return fut.poll_unpin(cx).map_err(Into::into); + } else { + let quinn_conn = std::task::ready!(self.inner.poll_unpin(cx)?); + match conn_from_quinn_conn(quinn_conn, &self.ep) { + Err(err) => return Poll::Ready(Err(err)), + Ok(fut) => self.register_with_magicsock = Some(Box::pin(fut.err_into())), }; - - try_send_rtt_msg(conn.quinn_connection(), this.ep, conn.remote_id()); - Poll::Ready(Ok(conn)) } } } } -/// Future that completes when a connection is fully established. -/// -/// For clients, the resulting value indicates if 0-RTT was accepted. For servers, the resulting -/// value is meaningless. -#[derive(derive_more::Debug)] -#[debug("ZeroRttAccepted")] -struct ZeroRttAccepted { - inner: quinn::ZeroRttAccepted, - /// When we call `Connecting::into_0rtt`, we don't want to stop discovery, so we transfer the task - /// to this future. - /// When `quinn::ZeroRttAccepted` resolves, we've successfully received data from the remote. - /// Thus, that's the right time to drop discovery to preserve the behaviour similar to - /// `Connecting` -> `Connection` without 0-RTT. - /// Should we eventually decide to keep the discovery task alive for the duration of the whole - /// `Connection`, then this task should be transferred to the `Connection` instead of here. - _discovery_drop_guard: Option, -} - -impl Future for ZeroRttAccepted { - type Output = bool; - fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { - Pin::new(&mut self.inner).poll(cx) - } -} - /// The client side of a 0-RTT connection. /// /// This is created using [`Connecting::into_0rtt`]. @@ -571,7 +587,7 @@ impl Future for ZeroRttAccepted { pub type OutgoingZeroRttConnection = Connection; /// Returned from [`OutgoingZeroRttConnection::handshake_completed`]. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum ZeroRttStatus { /// If the 0-RTT data was accepted, you can continue to use any streams /// that were created before the handshake was completed. @@ -615,16 +631,23 @@ pub type IncomingZeroRttConnection = Connection; /// connection without losing application data. /// /// May be cloned to obtain another handle to the same connection. -#[derive(derive_more::Debug, Clone)] +#[derive(Debug, Clone)] pub struct Connection { inner: quinn::Connection, /// State-specific information - info: State::Data, + data: State::Data, } #[doc(hidden)] #[derive(Debug, Clone)] pub struct HandshakeCompletedData { + info: StaticInfo, + paths: PathsWatcher, +} + +/// Static info from a completed TLS handshake. +#[derive(Debug, Clone)] +struct StaticInfo { endpoint_id: EndpointId, alpn: Vec, } @@ -632,13 +655,13 @@ pub struct HandshakeCompletedData { #[doc(hidden)] #[derive(Debug, Clone)] pub struct IncomingZeroRttData { - accepted: Shared, + accepted: Shared>>, } #[doc(hidden)] #[derive(Debug, Clone)] pub struct OutgoingZeroRttData { - accepted: Shared, + accepted: Shared>>, } mod sealed { @@ -671,6 +694,7 @@ impl sealed::Sealed for IncomingZeroRtt {} impl ConnectionState for IncomingZeroRtt { type Data = IncomingZeroRttData; } + impl sealed::Sealed for OutgoingZeroRtt {} impl ConnectionState for OutgoingZeroRtt { type Data = OutgoingZeroRttData; @@ -679,13 +703,10 @@ impl ConnectionState for OutgoingZeroRtt { #[allow(missing_docs)] #[stack_error(add_meta, derive)] #[error("Protocol error: no remote id available")] +#[derive(Clone)] pub struct RemoteEndpointIdError; impl Connection { - fn quinn_connection(&self) -> &quinn::Connection { - &self.inner - } - /// Initiates a new outgoing unidirectional stream. /// /// Streams are cheap and instantaneous to open unless blocked by flow control. As a @@ -941,7 +962,7 @@ impl Connection { impl Connection { /// Extracts the ALPN protocol from the peer's handshake data. pub fn alpn(&self) -> &[u8] { - &self.info.alpn + &self.data.info.alpn } /// Returns the [`EndpointId`] from the peer's TLS certificate. @@ -953,7 +974,41 @@ impl Connection { /// /// [`PublicKey`]: iroh_base::PublicKey pub fn remote_id(&self) -> EndpointId { - self.info.endpoint_id + self.data.info.endpoint_id + } + + /// Returns a [`Watcher`] for the network paths of this connection. + /// + /// A connection can have several network paths to the remote endpoint, commonly there + /// will be a path via the relay server and a holepunched path. + /// + /// The watcher is updated whenever a path is opened or closed, or when the path selected + /// for transmission changes (see [`PathInfo::is_selected`]). + /// + /// The [`PathInfoList`] returned from the watcher contains a [`PathInfo`] for each + /// transmission path. + /// + /// [`PathInfo::is_selected`]: crate::magicsock::PathInfo::is_selected + /// [`PathInfo`]: crate::magicsock::PathInfo + pub fn paths(&self) -> impl Watcher + Unpin + Send + Sync + 'static { + self.data.paths.clone() + } + + /// Returns the side of the connection (client or server). + pub fn side(&self) -> Side { + self.inner.side() + } + + /// Returns a connection info struct. + /// + /// A [`ConnectionInfo`] is a weak handle to the connection that does not keep the connection alive, + /// but does allow to access some information about the connection and to wait for the connection to be closed. + pub fn to_info(&self) -> ConnectionInfo { + ConnectionInfo { + data: self.data.clone(), + inner: self.inner.weak_handle(), + side: self.side(), + } } } @@ -965,19 +1020,18 @@ impl Connection { /// Waits until the full handshake occurs and then returns a [`Connection`]. /// - /// This may fail with [`AuthenticationError::ConnectionError`], if there was + /// This may fail with [`ConnectingError::ConnectionError`], if there was /// some general failure with the connection, such as a network timeout since /// we accepted the connection. /// - /// This may fail with other [`AuthenticationError`]s, if the other side + /// This may fail with [`ConnectingError::HandshakeFailure`], if the other side /// doesn't use the right TLS authentication, which usually every iroh endpoint /// uses and requires. /// /// Thus, those errors should only occur if someone connects to you with a /// modified iroh endpoint or with a plain QUIC client. - pub async fn handshake_completed(self) -> Result { - let _ = self.info.accepted.clone().await; - conn_from_quinn_conn(self.inner) + pub async fn handshake_completed(&self) -> Result { + self.data.accepted.clone().await } } @@ -996,58 +1050,101 @@ impl Connection { /// the handshake will error and any data sent should be re-sent on a /// new stream. /// - /// This may fail with [`AuthenticationError::ConnectionError`], if there was + /// This may fail with [`ConnectingError::ConnectionError`], if there was /// some general failure with the connection, such as a network timeout since /// we initiated the connection. /// - /// This may fail with other [`AuthenticationError`]s, if the other side + /// This may fail with [`ConnectingError::HandshakeFailure`], if the other side /// doesn't use the right TLS authentication, which usually every iroh endpoint /// uses and requires. /// /// Thus, those errors should only occur if someone connects to you with a /// modified iroh endpoint or with a plain QUIC client. - pub async fn handshake_completed(&self) -> Result { - let accepted = self.info.accepted.clone().await; - let conn = conn_from_quinn_conn(self.inner.clone())?; - - Ok(match accepted { - true => ZeroRttStatus::Accepted(conn), - false => ZeroRttStatus::Rejected(conn), - }) + pub async fn handshake_completed(&self) -> Result { + self.data.accepted.clone().await } } -/// Try send a message to the rtt-actor. +/// Information about a connection. /// -/// If we can't notify the actor that will impact performance a little, but we can still -/// function. -fn try_send_rtt_msg(conn: &quinn::Connection, ep: &Endpoint, remote_id: EndpointId) { - let Some(conn_type_changes) = ep.conn_type(remote_id) else { - warn!(?conn, "failed to create conn_type stream"); - return; - }; - let rtt_msg = RttMessage::NewConnection { - connection: conn.weak_handle(), - conn_type_changes: conn_type_changes.stream(), - endpoint_id: remote_id, - }; - if let Err(err) = ep.rtt_actor.msg_tx.try_send(rtt_msg) { - warn!(?conn, "rtt-actor not reachable: {err:#}"); +/// A [`ConnectionInfo`] is a weak handle to a connection that exposes some information about the connection, +/// but does not keep the connection alive. +#[derive(Debug, Clone)] +pub struct ConnectionInfo { + side: Side, + data: HandshakeCompletedData, + inner: WeakConnectionHandle, +} + +#[allow(missing_docs)] +impl ConnectionInfo { + pub fn alpn(&self) -> &[u8] { + &self.data.info.alpn + } + + pub fn remote_id(&self) -> EndpointId { + self.data.info.endpoint_id + } + + pub fn is_alive(&self) -> bool { + self.inner.upgrade().is_some() + } + + /// Returns a [`Watcher`] for the network paths of this connection. + /// + /// A connection can have several network paths to the remote endpoint, commonly there + /// will be a path via the relay server and a holepunched path. + /// + /// The watcher is updated whenever a path is opened or closed, or when the path selected + /// for transmission changes (see [`PathInfo::is_selected`]). + /// + /// The [`PathInfoList`] returned from the watcher contains a [`PathInfo`] for each + /// transmission path. + /// + /// [`PathInfo::is_selected`]: crate::magicsock::PathInfo::is_selected + /// [`PathInfo`]: crate::magicsock::PathInfo + pub fn paths(&self) -> impl Watcher + Unpin + Send + Sync + 'static { + self.data.paths.clone() + } + + /// Returns connection statistics. + /// + /// Returns `None` if the connection has been dropped. + pub fn stats(&self) -> Option { + self.inner.upgrade().map(|conn| conn.stats()) + } + + /// Returns the side of the connection (client or server). + pub fn side(&self) -> Side { + self.side + } + + /// Waits for the connection to be closed, and returns the close reason and final connection stats. + /// + /// Returns `None` if the connection has been dropped already before this call. + pub async fn closed(&self) -> Option<(ConnectionError, ConnectionStats)> { + let fut = self.inner.upgrade()?.on_closed(); + Some(fut.await) } } #[cfg(test)] mod tests { + use std::time::Duration; + use iroh_base::{EndpointAddr, SecretKey}; use n0_error::{Result, StackResultExt, StdResultExt}; + use n0_future::StreamExt; + use n0_watcher::Watcher; use rand::SeedableRng; - use tracing::{Instrument, info_span, trace_span}; + use tracing::{Instrument, error_span, info, info_span, trace_span}; use tracing_test::traced_test; use super::Endpoint; use crate::{ RelayMode, - endpoint::{ConnectOptions, Incoming, ZeroRttStatus}, + endpoint::{ConnectOptions, Incoming, PathInfo, PathInfoList, ZeroRttStatus}, + test_utils::run_relay_server, }; const TEST_ALPN: &[u8] = b"n0/iroh/test"; @@ -1257,4 +1354,94 @@ mod tests { tokio::join!(client.close(), server.close()); Ok(()) } + + #[tokio::test] + #[traced_test] + async fn test_paths_watcher() -> Result { + const ALPN: &[u8] = b"test"; + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); + let (relay_map, _relay_map, _guard) = run_relay_server().await?; + let server = Endpoint::empty_builder(RelayMode::Custom(relay_map.clone())) + .secret_key(SecretKey::generate(&mut rng)) + .insecure_skip_relay_cert_verify(true) + .alpns(vec![ALPN.to_vec()]) + .bind() + .await?; + + let client = Endpoint::empty_builder(RelayMode::Custom(relay_map.clone())) + .secret_key(SecretKey::generate(&mut rng)) + .insecure_skip_relay_cert_verify(true) + .bind() + .await?; + + server.online().await; + let server_addr = server.addr(); + info!("server addr: {server_addr:?}"); + + let (conn_client, conn_server) = tokio::join!( + async { client.connect(server_addr, ALPN).await.unwrap() }, + async { server.accept().await.unwrap().await.unwrap() } + ); + info!("connected"); + let mut paths_client = conn_client.paths().stream(); + let mut paths_server = conn_server.paths().stream(); + + /// Advances the path stream until at least one IP and one relay paths are available. + /// + /// Panics if the path stream finishes before that happens. + async fn wait_for_paths( + stream: &mut n0_watcher::Stream + Unpin>, + ) { + loop { + let paths = stream.next().await.expect("paths stream ended"); + info!(?paths, "paths"); + if paths.len() >= 2 + && paths.iter().any(PathInfo::is_relay) + && paths.iter().any(PathInfo::is_ip) + { + info!("break"); + return; + } + } + } + + // Verify that both connections are notified of path changes and get an IP and a relay path. + tokio::join!( + async { + tokio::time::timeout(Duration::from_secs(1), wait_for_paths(&mut paths_server)) + .instrument(error_span!("paths-server")) + .await + .unwrap() + }, + async { + tokio::time::timeout(Duration::from_secs(1), wait_for_paths(&mut paths_client)) + .instrument(error_span!("paths-client")) + .await + .unwrap() + } + ); + + // Close the client connection. + info!("close client conn"); + conn_client.close(0u32.into(), b""); + + // Verify that the path watch streams close. + assert_eq!( + tokio::time::timeout(Duration::from_secs(1), paths_client.next()) + .await + .unwrap(), + None + ); + assert_eq!( + tokio::time::timeout(Duration::from_secs(1), paths_server.next()) + .await + .unwrap(), + None + ); + + server.close().await; + client.close().await; + + Ok(()) + } } diff --git a/iroh/src/endpoint/hooks.rs b/iroh/src/endpoint/hooks.rs new file mode 100644 index 00000000000..a60dc5860c5 --- /dev/null +++ b/iroh/src/endpoint/hooks.rs @@ -0,0 +1,168 @@ +use std::pin::Pin; + +use iroh_base::EndpointAddr; +use quinn::VarInt; + +use crate::endpoint::connection::ConnectionInfo; + +type BoxFuture<'a, T> = Pin + Send + 'a>>; + +/// Outcome of [`EndpointHooks::before_connect`] +#[derive(Debug)] +pub enum BeforeConnectOutcome { + /// Accept the connect attempt. + Accept, + /// Reject the connect attempt. + Reject, +} + +/// Outcome of [`EndpointHooks::after_handshake`] +#[derive(Debug)] +pub enum AfterHandshakeOutcome { + /// Accept the connection. + Accept, + /// Reject and close the connection. + /// + /// See [`Connection::close`] for details on `error_code` and `reason`. + /// + /// [`Connection::close`]: crate::endpoint::Connection::close + Reject { + /// Error code to send with the connection close frame. + error_code: VarInt, + /// Close reason to send with the connection close frame. + reason: Vec, + }, +} + +impl AfterHandshakeOutcome { + /// Returns [`Self::Accept`]. + pub fn accept() -> Self { + Self::Accept + } + + /// Returns [`Self::Reject`]. + pub fn reject(&self, error_code: VarInt, reason: &[u8]) -> Self { + Self::Reject { + error_code, + reason: reason.to_vec(), + } + } +} + +/// EndpointHooks intercept the connection establishment process of an [`Endpoint`]. +/// +/// Use [`Builder::hooks`] to install hooks onto an endpoint. +/// +/// For each hook, all installed hooks are invoked in the order they were installed on +/// the endpoint builder. If a hook returns `Accept`, processing continues with the next +/// hook. If a hook returns `Reject`, processing is aborted and further hooks +/// are not invoked for this hook. +/// +/// ## Notes to implementers +/// +/// As hooks are stored on the endpoint, you must make sure to never store an [`Endpoint`] +/// on the hook struct itself, as this would create reference counting loop and cause the +/// endpoint to never be dropped, leaking memory. +/// +/// [`Endpoint`]: crate::Endpoint +/// [`Builder::hooks`]: crate::endpoint::Builder::hooks +pub trait EndpointHooks: std::fmt::Debug + Send + Sync { + /// Intercept outgoing connections before they are started. + /// + /// This is called whenever a new outgoing connection is initiated via [`Endpoint::connect`] + /// or [`Endpoint::connect_with_opts`]. + /// + /// If any hook returns [`BeforeConnectOutcome::Reject`], the connection attempt is aborted + /// before any packets are sent to the remote. + /// + /// [`Endpoint::connect`]: crate::Endpoint::connect + /// [`Endpoint::connect_with_opts`]: crate::Endpoint::connect_with_opts + fn before_connect<'a>( + &'a self, + _remote_addr: &'a EndpointAddr, + _alpn: &'a [u8], + ) -> impl Future + Send + 'a { + async { BeforeConnectOutcome::Accept } + } + + /// Intercept both incoming and outgoing connections once the TLS handshake has completed. + /// + /// At this point in time, we know the remote's endpoint id and ALPN. If any hook returns + /// [`AfterHandshakeOutcome::Reject`], the connection is closed with the provided error code + /// and reason. + fn after_handshake<'a>( + &'a self, + _conn: &'a ConnectionInfo, + ) -> impl Future + Send + 'a { + async { AfterHandshakeOutcome::accept() } + } +} + +pub(crate) trait DynEndpointHooks: std::fmt::Debug + Send + Sync { + fn before_connect<'a>( + &'a self, + remote_addr: &'a EndpointAddr, + alpn: &'a [u8], + ) -> BoxFuture<'a, BeforeConnectOutcome>; + fn after_handshake<'a>( + &'a self, + conn: &'a ConnectionInfo, + ) -> BoxFuture<'a, AfterHandshakeOutcome>; +} + +impl DynEndpointHooks for T { + fn before_connect<'a>( + &'a self, + remote_addr: &'a EndpointAddr, + alpn: &'a [u8], + ) -> BoxFuture<'a, BeforeConnectOutcome> { + Box::pin(EndpointHooks::before_connect(self, remote_addr, alpn)) + } + + fn after_handshake<'a>( + &'a self, + conn: &'a ConnectionInfo, + ) -> BoxFuture<'a, AfterHandshakeOutcome> { + Box::pin(EndpointHooks::after_handshake(self, conn)) + } +} + +#[derive(Debug, Default)] +pub(crate) struct EndpointHooksList { + inner: Vec>, +} + +impl EndpointHooksList { + pub(super) fn push(&mut self, hook: impl EndpointHooks + 'static) { + let hook: Box = Box::new(hook); + self.inner.push(hook); + } + + pub(super) async fn before_connect( + &self, + remote_addr: &EndpointAddr, + alpn: &[u8], + ) -> BeforeConnectOutcome { + for hook in self.inner.iter() { + match hook.before_connect(remote_addr, alpn).await { + BeforeConnectOutcome::Accept => continue, + reject @ BeforeConnectOutcome::Reject => { + return reject; + } + } + } + BeforeConnectOutcome::Accept + } + + pub(super) async fn after_handshake(&self, conn: &ConnectionInfo) -> AfterHandshakeOutcome { + for hook in self.inner.iter() { + match hook.after_handshake(conn).await { + AfterHandshakeOutcome::Accept => continue, + reject @ AfterHandshakeOutcome::Reject { .. } => { + return reject; + } + } + } + AfterHandshakeOutcome::Accept + } +} diff --git a/iroh/src/endpoint/rtt_actor.rs b/iroh/src/endpoint/rtt_actor.rs deleted file mode 100644 index 4dddc7955af..00000000000 --- a/iroh/src/endpoint/rtt_actor.rs +++ /dev/null @@ -1,171 +0,0 @@ -//! Actor which coordinates the congestion controller for the magic socket - -use std::{pin::Pin, sync::Arc, task::Poll}; - -use iroh_base::EndpointId; -use n0_future::{ - MergeUnbounded, Stream, StreamExt, - task::{self, AbortOnDropHandle}, -}; -use tokio::sync::mpsc; -use tracing::{Instrument, debug, info_span}; - -use crate::{magicsock::ConnectionType, metrics::MagicsockMetrics}; - -#[derive(Debug)] -pub(super) struct RttHandle { - // We should and some point use this to propagate panics and errors. - pub(super) _handle: AbortOnDropHandle<()>, - pub(super) msg_tx: mpsc::Sender, -} - -impl RttHandle { - pub(super) fn new(metrics: Arc) -> Self { - let mut actor = RttActor { - connection_events: Default::default(), - metrics, - }; - let (msg_tx, msg_rx) = mpsc::channel(16); - let handle = task::spawn( - async move { - actor.run(msg_rx).await; - } - .instrument(info_span!("rtt-actor")), - ); - Self { - _handle: AbortOnDropHandle::new(handle), - msg_tx, - } - } -} - -/// Messages to send to the [`RttActor`]. -#[derive(Debug)] -pub(super) enum RttMessage { - /// Informs the [`RttActor`] of a new connection is should monitor. - NewConnection { - /// The connection. - connection: quinn::WeakConnectionHandle, - /// Path changes for this connection from the magic socket. - conn_type_changes: n0_watcher::Stream>, - /// For reporting-only, the Endpoint ID of this connection. - endpoint_id: EndpointId, - }, -} - -/// Actor to coordinate congestion controller state with magic socket state. -/// -/// The magic socket can change the underlying network path, between two endpoints. If we can -/// inform the QUIC congestion controller of this event it will work much more efficiently. -#[derive(derive_more::Debug)] -struct RttActor { - /// Stream of connection type changes. - #[debug("MergeUnbounded>")] - connection_events: MergeUnbounded, - metrics: Arc, -} - -#[derive(Debug)] -struct MappedStream { - stream: n0_watcher::Stream>, - endpoint_id: EndpointId, - /// Reference to the connection. - connection: quinn::WeakConnectionHandle, - /// This an indiciator of whether this connection was direct before. - /// This helps establish metrics on number of connections that became direct. - was_direct_before: bool, -} - -struct ConnectionEvent { - became_direct: bool, -} - -impl Stream for MappedStream { - type Item = ConnectionEvent; - - /// Performs the congestion controller reset for a magic socket path change. - /// - /// Regardless of which kind of path we are changed to, the congestion controller needs - /// resetting. Even when switching to mixed we should reset the state as e.g. switching - /// from direct to mixed back to direct should be a rare exception and is a bug if this - /// happens commonly. - fn poll_next( - mut self: Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> Poll> { - match Pin::new(&mut self.stream).poll_next(cx) { - Poll::Ready(Some(new_conn_type)) => { - let mut became_direct = false; - if self.connection.network_path_changed() { - debug!( - endpoint_id = %self.endpoint_id.fmt_short(), - new_type = ?new_conn_type, - "Congestion controller state reset", - ); - if !self.was_direct_before && matches!(new_conn_type, ConnectionType::Direct(_)) - { - self.was_direct_before = true; - became_direct = true - } - }; - Poll::Ready(Some(ConnectionEvent { became_direct })) - } - Poll::Ready(None) => Poll::Ready(None), - Poll::Pending => Poll::Pending, - } - } -} - -impl RttActor { - /// Runs the actor main loop. - /// - /// The main loop will finish when the sender is dropped. - async fn run(&mut self, mut msg_rx: mpsc::Receiver) { - loop { - tokio::select! { - biased; - msg = msg_rx.recv() => { - match msg { - Some(msg) => self.handle_msg(msg), - None => break, - } - } - event = self.connection_events.next(), if !self.connection_events.is_empty() => { - if event.map(|e| e.became_direct).unwrap_or(false) { - self.metrics.connection_became_direct.inc(); - } - } - } - } - debug!("rtt-actor finished"); - } - - /// Handle actor messages. - fn handle_msg(&mut self, msg: RttMessage) { - match msg { - RttMessage::NewConnection { - connection, - conn_type_changes, - endpoint_id, - } => { - self.handle_new_connection(connection, conn_type_changes, endpoint_id); - } - } - } - - /// Handles the new connection message. - fn handle_new_connection( - &mut self, - connection: quinn::WeakConnectionHandle, - conn_type_changes: n0_watcher::Stream>, - endpoint_id: EndpointId, - ) { - self.connection_events.push(MappedStream { - stream: conn_type_changes, - connection, - endpoint_id, - was_direct_before: false, - }); - self.metrics.connection_handshake_success.inc(); - } -} diff --git a/iroh/src/key.rs b/iroh/src/key.rs deleted file mode 100644 index c6b9f6ee483..00000000000 --- a/iroh/src/key.rs +++ /dev/null @@ -1,158 +0,0 @@ -//! The private and public keys of an endpoint. - -use std::fmt::Debug; - -use aead::{AeadCore, AeadInOut, Buffer}; -use iroh_base::{PublicKey, SecretKey}; -use n0_error::{e, ensure, stack_error}; - -pub(crate) const NONCE_LEN: usize = 24; - -const AEAD_DATA: &[u8] = &[]; - -pub(super) fn public_ed_box(key: &PublicKey) -> crypto_box::PublicKey { - let key = key.as_verifying_key(); - crypto_box::PublicKey::from(key.to_montgomery()) -} - -pub(super) fn secret_ed_box(key: &SecretKey) -> crypto_box::SecretKey { - let key = key.as_signing_key(); - crypto_box::SecretKey::from(key.to_scalar()) -} - -/// Shared Secret. -pub struct SharedSecret(crypto_box::ChaChaBox); - -/// Errors that can occur during [`SharedSecret::open`]. -#[stack_error(derive, add_meta, from_sources, std_sources)] -#[non_exhaustive] -pub enum DecryptionError { - /// The nonce had the wrong size. - #[error("Invalid nonce")] - InvalidNonce, - /// AEAD decryption failed. - #[error("Aead error")] - Aead { - #[error(std_err)] - source: aead::Error, - }, -} - -impl Debug for SharedSecret { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "SharedSecret(crypto_box::ChaChaBox)") - } -} - -impl SharedSecret { - pub fn new(this: &crypto_box::SecretKey, other: &crypto_box::PublicKey) -> Self { - SharedSecret(crypto_box::ChaChaBox::new_from_clamped(other, this)) - } - - /// Seals the provided cleartext. - pub fn seal(&self, buffer: &mut dyn Buffer) { - let nonce = crypto_box::ChaChaBox::try_generate_nonce_with_rng(&mut rand::rng()) - .expect("not enough randomness"); - - self.0 - .encrypt_in_place(&nonce, AEAD_DATA, buffer) - .expect("encryption failed"); - - buffer.extend_from_slice(&nonce).expect("buffer too small"); - } - - /// Opens the ciphertext, which must have been created using `Self::seal`, and places the clear text into the provided buffer. - pub fn open(&self, buffer: &mut dyn Buffer) -> Result<(), DecryptionError> { - ensure!(buffer.len() >= NONCE_LEN, DecryptionError::InvalidNonce); - - let offset = buffer.len() - NONCE_LEN; - let nonce: [u8; NONCE_LEN] = buffer.as_ref()[offset..] - .try_into() - .map_err(|_| e!(DecryptionError::InvalidNonce))?; - - buffer.truncate(offset); - self.0.decrypt_in_place(&nonce.into(), AEAD_DATA, buffer)?; - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use rand::SeedableRng; - - use super::*; - - fn shared(this: &iroh_base::SecretKey, other: &iroh_base::PublicKey) -> SharedSecret { - let secret_key = secret_ed_box(this); - let public_key = public_ed_box(other); - - SharedSecret::new(&secret_key, &public_key) - } - - #[test] - fn test_seal_open_roundtrip() { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let key_a = iroh_base::SecretKey::generate(&mut rng); - let key_b = iroh_base::SecretKey::generate(&mut rng); - - println!("a -> a"); - seal_open_roundtrip(&key_a, &key_a); - println!("b -> b"); - seal_open_roundtrip(&key_b, &key_b); - - println!("a -> b"); - seal_open_roundtrip(&key_a, &key_b); - println!("b -> a"); - seal_open_roundtrip(&key_b, &key_a); - } - - fn seal_open_roundtrip(key_a: &iroh_base::SecretKey, key_b: &iroh_base::SecretKey) { - let msg = b"super secret message!!!!".to_vec(); - let shared_a = shared(key_a, &key_b.public()); - let mut sealed_message = msg.clone(); - shared_a.seal(&mut sealed_message); - - let shared_b = shared(key_b, &key_a.public()); - let mut decrypted_message = sealed_message.clone(); - shared_b.open(&mut decrypted_message).unwrap(); - assert_eq!(&msg[..], &decrypted_message); - } - - #[test] - fn test_roundtrip_public_key() { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let key = crypto_box::SecretKey::generate(&mut rng); - let public_bytes = *key.public_key().as_bytes(); - let public_key_back = crypto_box::PublicKey::from(public_bytes); - assert_eq!(key.public_key(), public_key_back); - } - - #[test] - fn test_same_public_key_api() { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let key = iroh_base::SecretKey::generate(&mut rng); - let public_key1: crypto_box::PublicKey = public_ed_box(&key.public()); - let public_key2: crypto_box::PublicKey = secret_ed_box(&key).public_key(); - - assert_eq!(public_key1, public_key2); - } - - #[test] - fn test_same_public_key_low_level() { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let key = ed25519_dalek::SigningKey::generate(&mut rng); - let public_key1 = { - let m = key.verifying_key().to_montgomery(); - crypto_box::PublicKey::from(m) - }; - - let public_key2 = { - let s = key.to_scalar(); - let cs = crypto_box::SecretKey::from(s); - cs.public_key() - }; - - assert_eq!(public_key1, public_key2); - } -} diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index b6504381d6b..a2be352d20b 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -253,8 +253,6 @@ #![cfg_attr(not(test), deny(clippy::unwrap_used))] #![cfg_attr(iroh_docsrs, feature(doc_cfg))] -mod disco; -mod key; mod magicsock; mod tls; diff --git a/iroh/src/magicsock.rs b/iroh/src/magicsock.rs index 68fbdb15411..e4d3ad11a09 100644 --- a/iroh/src/magicsock.rs +++ b/iroh/src/magicsock.rs @@ -16,101 +16,109 @@ //! however, read any packets that come off the UDP sockets. use std::{ - collections::{BTreeMap, BTreeSet, HashMap}, + collections::{BTreeMap, BTreeSet}, fmt::Display, io, - net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, - pin::Pin, + net::{IpAddr, SocketAddr}, sync::{ Arc, Mutex, RwLock, - atomic::{AtomicBool, AtomicU64, Ordering}, + atomic::{AtomicBool, Ordering}, }, - task::{Context, Poll}, }; -use bytes::Bytes; -use data_encoding::HEXLOWER; use iroh_base::{EndpointAddr, EndpointId, PublicKey, RelayUrl, SecretKey, TransportAddr}; use iroh_relay::{RelayConfig, RelayMap}; -use n0_error::{e, stack_error}; +use n0_error::{bail, e, stack_error}; use n0_future::{ task::{self, AbortOnDropHandle}, time::{self, Duration, Instant}, }; use n0_watcher::{self, Watchable, Watcher}; -use netwatch::netmon; #[cfg(not(wasm_browser))] -use netwatch::{UdpSocket, ip::LocalAddresses}; -use quinn::{AsyncUdpSocket, ServerConfig}; +use netwatch::ip::LocalAddresses; +use netwatch::netmon; +use quinn::{ServerConfig, WeakConnectionHandle}; use rand::Rng; -use smallvec::SmallVec; -use tokio::sync::{Mutex as AsyncMutex, mpsc}; +use tokio::sync::{Mutex as AsyncMutex, mpsc, oneshot}; use tokio_util::sync::CancellationToken; -use tracing::{ - Instrument, Level, debug, error, event, info, info_span, instrument, trace, trace_span, warn, -}; -use transports::LocalAddrsWatch; +use tracing::{Instrument, Level, debug, event, info_span, instrument, trace, warn}; +use transports::{LocalAddrsWatch, MagicTransport, TransportConfig}; use url::Url; -#[cfg(not(wasm_browser))] -use self::transports::IpTransport; use self::{ - endpoint_map::{EndpointMap, PingAction, PingRole, SendPing}, metrics::Metrics as MagicsockMetrics, - transports::{RelayActorConfig, RelayTransport, Transports, UdpSender}, + remote_map::{RemoteMap, RemoteStateMessage}, + transports::{RelayActorConfig, Transports}, }; #[cfg(not(wasm_browser))] use crate::dns::DnsResolver; -#[cfg(any(test, feature = "test-utils"))] -use crate::endpoint::PathSelection; #[cfg(not(wasm_browser))] -use crate::net_report::{IpMappedAddr, QuicConfig}; +use crate::net_report::QuicConfig; use crate::{ defaults::timeouts::NET_REPORT_TIMEOUT, - disco::{self, SendAddr, TransactionId}, - discovery::{ConcurrentDiscovery, Discovery, EndpointData, UserData}, - key::{DecryptionError, SharedSecret, public_ed_box, secret_ed_box}, - magicsock::endpoint_map::RemoteInfo, + discovery::{ConcurrentDiscovery, Discovery, DiscoveryError, EndpointData, UserData}, + endpoint::hooks::EndpointHooksList, + magicsock::remote_map::PathsWatcher, metrics::EndpointMetrics, - net_report::{self, IfStateDetails, IpMappedAddresses, Report}, + net_report::{self, IfStateDetails, Report}, }; -mod endpoint_map; mod metrics; +pub(crate) mod mapped_addrs; +pub(crate) mod remote_map; pub(crate) mod transports; -pub use endpoint_map::Source; - -pub use self::{ - endpoint_map::{ConnectionType, ControlMsg, DirectAddrInfo}, - metrics::Metrics, +use self::{ + mapped_addrs::{EndpointIdMappedAddr, MappedAddr}, + transports::Addr, }; +pub use self::{metrics::Metrics, remote_map::PathInfo}; + +// TODO: Use this +// /// How long we consider a QAD-derived endpoint valid for. UDP NAT mappings typically +// /// expire at 30 seconds, so this is a few seconds shy of that. +// const ENDPOINTS_FRESH_ENOUGH_DURATION: Duration = Duration::from_secs(27); -/// How long we consider a QAD-derived endpoint valid for. UDP NAT mappings typically -/// expire at 30 seconds, so this is a few seconds shy of that. -const ENDPOINTS_FRESH_ENOUGH_DURATION: Duration = Duration::from_secs(27); +/// The duration in which we send keep-alives. +/// +/// If a path is idle for this long, a PING frame will be sent to keep the connection +/// alive. +pub(crate) const HEARTBEAT_INTERVAL: Duration = Duration::from_secs(5); -const HEARTBEAT_INTERVAL: Duration = Duration::from_secs(5); +/// The maximum time a path can stay idle before being closed. +/// +/// This is [`HEARTBEAT_INTERVAL`] + 1.5s. This gives us a chance to send a PING frame and +/// some retries. +pub(crate) const PATH_MAX_IDLE_TIMEOUT: Duration = Duration::from_millis(6500); + +/// Maximum number of concurrent QUIC multipath paths per connection. +/// +/// Pretty arbitrary and high right now. +pub(crate) const MAX_MULTIPATH_PATHS: u32 = 12; + +/// Error returned when the endpoint state actor stopped while waiting for a reply. +#[stack_error(add_meta, derive)] +#[error("endpoint state actor stopped")] +#[derive(Clone)] +pub(crate) struct RemoteStateActorStoppedError; + +impl From> for RemoteStateActorStoppedError { + #[track_caller] + fn from(_value: mpsc::error::SendError) -> Self { + Self::new() + } +} /// Contains options for `MagicSock::listen`. #[derive(derive_more::Debug)] pub(crate) struct Options { - /// The IPv4 address to listen on. - /// - /// If set to `None` it will choose a random port and listen on `0.0.0.0:0`. - pub(crate) addr_v4: Option, - /// The IPv6 address to listen on. - /// - /// If set to `None` it will choose a random port and listen on `[::]:0`. - pub(crate) addr_v6: Option, + /// The configuration for the different transports. + pub(crate) transports: Vec, /// Secret key for this endpoint. pub(crate) secret_key: SecretKey, - /// The [`RelayMap`] to use, leave empty to not use a relay server. - pub(crate) relay_map: RelayMap, - /// Optional user-defined discovery data. pub(crate) discovery_user_data: Option, @@ -132,12 +140,8 @@ pub(crate) struct Options { /// May only be used in tests. #[cfg(any(test, feature = "test-utils"))] pub(crate) insecure_skip_relay_cert_verify: bool, - - /// Configuration for what path selection to use - #[cfg(any(test, feature = "test-utils"))] - pub(crate) path_selection: PathSelection, - pub(crate) metrics: EndpointMetrics, + pub(crate) hooks: EndpointHooksList, } /// Handle for [`MagicSock`]. @@ -186,9 +190,8 @@ pub(crate) struct MagicSock { /// If the last net_report report, reports IPv6 to be available. ipv6_reported: Arc, /// Tracks the networkmap endpoint entity for each endpoint discovery key. - endpoint_map: EndpointMap, - /// Tracks the mapped IP addresses - ip_mapped_addrs: IpMappedAddresses, + pub(crate) remote_map: RemoteMap, + /// Local addresses local_addrs_watch: LocalAddrsWatch, /// Currently bound IP addresses of all sockets @@ -199,9 +202,6 @@ pub(crate) struct MagicSock { dns_resolver: DnsResolver, relay_map: RelayMap, - /// Disco - disco: DiscoState, - // - Discovery /// Optional discovery service discovery: ConcurrentDiscovery, @@ -210,22 +210,11 @@ pub(crate) struct MagicSock { /// Metrics pub(crate) metrics: EndpointMetrics, -} - -#[allow(missing_docs)] -#[stack_error(derive, add_meta)] -#[non_exhaustive] -pub enum AddEndpointAddrError { - #[error("Empty addressing info")] - Empty, - #[error("Empty addressing info, {pruned} direct address have been pruned")] - EmptyPruned { pruned: usize }, - #[error("Adding our own address is not supported")] - OwnAddress, + pub(crate) hooks: EndpointHooksList, } impl MagicSock { - /// Creates a magic [`MagicSock`] listening on [`Options::addr_v4`] and [`Options::addr_v6`]. + /// Creates a magic [`MagicSock`] listening. pub(crate) async fn spawn(opts: Options) -> Result { Handle::new(opts).await } @@ -256,6 +245,30 @@ impl MagicSock { self.local_addrs_watch.clone().get() } + /// Registers the connection in the `RemoteStateActor`. + /// + /// The actor is responsible for holepunching and opening additional paths to this + /// connection. + /// + /// Returns a future that resolves to [`PathsWatcher`]. + /// + /// The returned future is `'static`, so it can be stored without being liftetime-bound to `&self`. + pub(crate) fn register_connection( + &self, + remote: EndpointId, + conn: WeakConnectionHandle, + ) -> impl Future> + Send + 'static + { + let (tx, rx) = oneshot::channel(); + let sender = self.remote_map.remote_state_actor(remote); + async move { + sender + .send(RemoteStateMessage::AddConnection(conn, tx)) + .await?; + rx.await.map_err(|_| RemoteStateActorStoppedError::new()) + } + } + #[cfg(not(wasm_browser))] fn ip_bind_addrs(&self) -> &[SocketAddr] { &self.ip_bind_addrs @@ -267,22 +280,37 @@ impl MagicSock { .filter_map(|addr| addr.into_socket_addr()) } - /// Returns `true` if we have at least one candidate address where we can send packets to. - pub(crate) fn has_send_address(&self, endpoint_key: PublicKey) -> bool { - self.remote_info(endpoint_key) - .map(|info| info.has_send_address()) - .unwrap_or(false) - } - - /// Return the [`RemoteInfo`]s of all endpoints in the endpoint map. - #[cfg(test)] - pub(crate) fn list_remote_infos(&self) -> Vec { - self.endpoint_map.list_remote_infos(Instant::now()) - } - - /// Return the [`RemoteInfo`] for a single endpoint in the endpoint map. - pub(crate) fn remote_info(&self, endpoint_id: EndpointId) -> Option { - self.endpoint_map.remote_info(endpoint_id) + /// Resolves an [`EndpointAddr`] to an [`EndpointIdMappedAddr`] to connect to via [`Handle::endpoint`]. + /// + /// This starts a `RemoteStateActor` for the remote if not running already, and then checks + /// if the actor has any known paths to the remote. If not, it starts discovery and waits for + /// at least one result to arrive. + /// + /// Returns `Ok(Ok(EndpointIdMappedAddr))` if there is a known path or discovery produced + /// at least one result. This does not mean there is a working path, only that we have at least + /// one transport address we can try to connect to. + /// + /// Returns `Ok(Err(discovery_error))` if there are no known paths to the remote and discovery + /// failed or produced no results. This means that we don't have any transport address for + /// the remote, thus there is no point in trying to connect over the quinn endpoint. + /// + /// Returns `Err(RemoteStateActorStoppedError)` if the `RemoteStateActor` for the remote has stopped, + /// which may never happen and thus is a bug if it does. + pub(crate) async fn resolve_remote( + &self, + addr: EndpointAddr, + ) -> Result, RemoteStateActorStoppedError> { + let EndpointAddr { id, addrs } = addr; + let actor = self.remote_map.remote_state_actor(id); + let (tx, rx) = oneshot::channel(); + actor + .send(RemoteStateMessage::ResolveRemote(addrs, tx)) + .await?; + match rx.await { + Ok(Ok(())) => Ok(Ok(self.remote_map.endpoint_mapped_addr(id))), + Ok(Err(err)) => Ok(Err(err)), + Err(_) => Err(RemoteStateActorStoppedError::new()), + } } pub(crate) async fn insert_relay( @@ -363,59 +391,6 @@ impl MagicSock { }) } - /// Returns a [`n0_watcher::Direct`] that reports the [`ConnectionType`] we have to the - /// given `endpoint_id`. - /// - /// This gets us a copy of the [`n0_watcher::Direct`] for the [`Watchable`] with a [`ConnectionType`] - /// that the `EndpointMap` stores for each `endpoint_id`'s endpoint. - /// - /// # Errors - /// - /// Will return `None` if there is no address information known about the - /// given `endpoint_id`. - pub(crate) fn conn_type( - &self, - endpoint_id: EndpointId, - ) -> Option> { - self.endpoint_map.conn_type(endpoint_id) - } - - pub(crate) fn latency(&self, endpoint_id: EndpointId) -> Option { - self.endpoint_map.latency(endpoint_id) - } - - /// Returns the socket address which can be used by the QUIC layer to dial this endpoint. - pub(crate) fn get_mapping_addr(&self, endpoint_id: EndpointId) -> Option { - self.endpoint_map - .get_quic_mapped_addr_for_endpoint_key(endpoint_id) - } - - /// Add addresses for an endpoint to the magic socket's addresbook. - #[instrument(skip_all)] - pub(crate) fn add_endpoint_addr( - &self, - mut addr: EndpointAddr, - source: endpoint_map::Source, - ) -> Result<(), AddEndpointAddrError> { - let mut pruned: usize = 0; - for my_addr in self.direct_addrs.sockaddrs() { - if addr.addrs.remove(&TransportAddr::Ip(my_addr)) { - warn!( endpoint_id=%addr.id.fmt_short(), %my_addr, %source, "not adding our addr for endpoint"); - pruned += 1; - } - } - if !addr.is_empty() { - let have_ipv6 = self.ipv6_reported.load(Ordering::Relaxed); - self.endpoint_map - .add_endpoint_addr(addr, source, have_ipv6, &self.metrics.magicsock); - Ok(()) - } else if pruned != 0 { - Err(e!(AddEndpointAddrError::EmptyPruned { pruned })) - } else { - Err(e!(AddEndpointAddrError::Empty)) - } - } - /// Stores a new set of direct addresses. /// /// If the direct addresses have changed from the previous set, they are published to @@ -423,8 +398,6 @@ impl MagicSock { pub(super) fn store_direct_addresses(&self, addrs: BTreeSet) { let updated = self.direct_addrs.update(addrs); if updated { - self.endpoint_map - .on_direct_addr_discovered(self.direct_addrs.sockaddrs().collect()); self.publish_my_addr(); } } @@ -468,18 +441,18 @@ impl MagicSock { #[cfg_attr(windows, allow(dead_code))] fn normalized_local_addr(&self) -> io::Result { - let addrs = self.local_addrs_watch.clone().get(); + let addrs = self.local_addrs_watch.peek(); let mut ipv4_addr = None; for addr in addrs { - let Some(addr) = addr.into_socket_addr() else { - continue; - }; - if addr.is_ipv6() { - return Ok(addr); - } - if addr.is_ipv4() && ipv4_addr.is_none() { - ipv4_addr.replace(addr); + match addr { + Addr::Ip(addr @ SocketAddr::V6(_)) => { + return Ok(*addr); + } + Addr::Ip(addr @ SocketAddr::V4(_)) if ipv4_addr.is_none() => { + ipv4_addr.replace(*addr); + } + _ => {} } } match ipv4_addr { @@ -488,97 +461,14 @@ impl MagicSock { } } - /// Searches the `endpoint_map` to determine the current transports to be used. - #[instrument(skip_all)] - fn prepare_send( - &self, - udp_sender: &UdpSender, - transmit: &quinn_udp::Transmit, - ) -> io::Result> { - self.metrics - .magicsock - .send_data - .inc_by(transmit.contents.len() as _); - - if self.is_closed() { - self.metrics - .magicsock - .send_data_network_down - .inc_by(transmit.contents.len() as _); - return Err(io::Error::new( - io::ErrorKind::NotConnected, - "connection closed", - )); - } - - let mut active_paths = SmallVec::<[_; 3]>::new(); - - match MappedAddr::from(transmit.destination) { - MappedAddr::None(dest) => { - error!(%dest, "Cannot convert to a mapped address."); - } - MappedAddr::EndpointId(dest) => { - trace!( - dst = %dest, - src = ?transmit.src_ip, - len = %transmit.contents.len(), - "sending", - ); - - // Get the endpoint's relay address and best direct address, as well - // as any pings that need to be sent for hole-punching purposes. - match self.endpoint_map.get_send_addrs( - dest, - self.ipv6_reported.load(Ordering::Relaxed), - &self.metrics.magicsock, - ) { - Some((endpoint_id, udp_addr, relay_url, ping_actions)) => { - if !ping_actions.is_empty() { - self.try_send_ping_actions(udp_sender, ping_actions).ok(); - } - if let Some(addr) = udp_addr { - active_paths.push(transports::Addr::from(addr)); - } - if let Some(url) = relay_url { - active_paths.push(transports::Addr::Relay(url, endpoint_id)); - } - } - None => { - error!(%dest, "no EndpointState for mapped address"); - } - } - } - #[cfg(not(wasm_browser))] - MappedAddr::Ip(dest) => { - trace!( - dst = %dest, - src = ?transmit.src_ip, - len = %transmit.contents.len(), - "sending", - ); - - // Check if this is a known IpMappedAddr, and if so, send over UDP - // Get the socket addr - match self.ip_mapped_addrs.get_ip_addr(&dest) { - Some(addr) => { - active_paths.push(transports::Addr::from(addr)); - } - None => { - error!(%dest, "unknown mapped address"); - } - } - } - } - - Ok(active_paths) - } - - /// Process datagrams received from UDP sockets. + /// Process datagrams received from all the transports. /// /// All the `bufs` and `metas` should have initialized packets in them. /// - /// This fixes up the datagrams to use the correct [`EndpointIdMappedAddr`] and extracts DISCO - /// packets, processing them inside the magic socket. + /// This fixes up the datagrams to use the correct [`MultipathMappedAddr`] and extracts + /// DISCO packets, processing them inside the magic socket. + /// + /// [`MultipathMappedAddr`]: mapped_addrs::MultipathMappedAddr fn process_datagrams( &self, bufs: &mut [io::IoSliceMut<'_>], @@ -599,8 +489,8 @@ impl MagicSock { // result in the wrong address family and Windows trips up on that. // // What should be done is that this dst_ip from the RecvMeta is stored in the - // EndpointState/PathState. Then on the send path it should be retrieved from the - // EndpointState/PathSate together with the send address and substituted at send time. + // RemoteState/PathState. Then on the send path it should be retrieved from the + // RemoteState/PathState together with the send address and substituted at send time. // This is relevant for IPv6 link-local addresses where the OS otherwise does not // know which interface to send from. #[cfg(not(windows))] @@ -610,534 +500,58 @@ impl MagicSock { #[cfg(windows)] let dst_ip = None; - let mut quic_packets_total = 0; - - for ((quinn_meta, buf), source_addr) in metas - .iter_mut() - .zip(bufs.iter_mut()) - .zip(source_addrs.iter()) - { - let mut buf_contains_quic_datagrams = false; - let mut quic_datagram_count = 0; - if quinn_meta.len > quinn_meta.stride { - trace!(%quinn_meta.len, %quinn_meta.stride, "GRO datagram received"); - self.metrics.magicsock.recv_gro_datagrams.inc(); - } - - // Chunk through the datagrams in this GRO payload to find disco - // packets and forward them to the actor - for datagram in buf[..quinn_meta.len].chunks_mut(quinn_meta.stride) { - if datagram.len() < quinn_meta.stride { - trace!( - len = %datagram.len(), - %quinn_meta.stride, - "Last GRO datagram smaller than stride", - ); - } - - // Detect DISCO datagrams and process them. Overwrite the first - // byte of those packets with zero to make Quinn ignore the packet. This - // relies on quinn::EndpointConfig::grease_quic_bit being set to `false`, - // which we do in Endpoint::bind. - if let Some((sender, sealed_box)) = disco::source_and_box(datagram) { - trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: disco packet"); - self.handle_disco_message(sender, sealed_box, source_addr); - datagram[0] = 0u8; - } else { - trace!(src = ?source_addr, len = %quinn_meta.stride, "UDP recv: quic packet"); - match source_addr { - transports::Addr::Ip(SocketAddr::V4(..)) => { - self.metrics - .magicsock - .recv_data_ipv4 - .inc_by(datagram.len() as _); - } - transports::Addr::Ip(SocketAddr::V6(..)) => { - self.metrics - .magicsock - .recv_data_ipv6 - .inc_by(datagram.len() as _); - } - transports::Addr::Relay(..) => { - self.metrics - .magicsock - .recv_data_relay - .inc_by(datagram.len() as _); - } - } - - quic_datagram_count += 1; - buf_contains_quic_datagrams = true; - } - } - - if buf_contains_quic_datagrams { - match source_addr { - #[cfg(wasm_browser)] - transports::Addr::Ip(_addr) => { - panic!("cannot use IP based addressing in the browser"); - } - #[cfg(not(wasm_browser))] - transports::Addr::Ip(addr) => { - // UDP - - // Update the EndpointMap and remap RecvMeta to the EndpointIdMappedAddr. - match self.endpoint_map.receive_udp(*addr) { - None => { - // Check if this address is mapped to an IpMappedAddr - if let Some(ip_mapped_addr) = - self.ip_mapped_addrs.get_mapped_addr(addr) - { - trace!( - src = %addr, - count = %quic_datagram_count, - len = quinn_meta.len, - "UDP recv QUIC address discovery packets", - ); - quic_packets_total += quic_datagram_count; - quinn_meta.addr = ip_mapped_addr.private_socket_addr(); - } else { - warn!( - src = %addr, - count = %quic_datagram_count, - len = quinn_meta.len, - "UDP recv quic packets: no endpoint state found, skipping", - ); - // If we have no endpoint state for the from addr, set len to 0 to make - // quinn skip the buf completely. - quinn_meta.len = 0; - } - } - Some((endpoint_id, quic_mapped_addr)) => { - trace!( - src = %addr, - endpoint = %endpoint_id.fmt_short(), - count = %quic_datagram_count, - len = quinn_meta.len, - "UDP recv quic packets", - ); - quic_packets_total += quic_datagram_count; - quinn_meta.addr = quic_mapped_addr.private_socket_addr(); - } - } - } - transports::Addr::Relay(src_url, src_endpoint) => { - // Relay - let quic_mapped_addr = - self.endpoint_map.receive_relay(src_url, *src_endpoint); - quinn_meta.addr = quic_mapped_addr.private_socket_addr(); - } - } - } else { - // If all datagrams in this buf are DISCO, set len to zero to make - // Quinn skip the buf completely. - quinn_meta.len = 0; - } - // Normalize local_ip - quinn_meta.dst_ip = dst_ip; - } + // zip is slow :( + for i in 0..metas.len() { + let quinn_meta = &mut metas[i]; + let source_addr = &source_addrs[i]; - if quic_packets_total > 0 { + let datagram_count = quinn_meta.len.div_ceil(quinn_meta.stride); self.metrics .magicsock .recv_datagrams - .inc_by(quic_packets_total as _); - trace!("UDP recv: {} packets", quic_packets_total); - } - } - - /// Handles a discovery message. - #[instrument("disco_in", skip_all, fields(endpoint = %sender.fmt_short(), ?src))] - fn handle_disco_message(&self, sender: PublicKey, sealed_box: &[u8], src: &transports::Addr) { - trace!("handle_disco_message start"); - if self.is_closed() { - return; - } - - if let transports::Addr::Relay(_, endpoint_id) = src { - if endpoint_id != &sender { - // TODO: return here? - warn!( - "Received relay disco message from connection for {}, but with message from {}", - endpoint_id.fmt_short(), - sender.fmt_short() + .inc_by(datagram_count as _); + if quinn_meta.len > quinn_meta.stride { + trace!( + src = ?source_addr, + len = quinn_meta.len, + stride = %quinn_meta.stride, + datagram_count = quinn_meta.len.div_ceil(quinn_meta.stride), + "GRO datagram received", ); + self.metrics.magicsock.recv_gro_datagrams.inc(); + } else { + trace!(src = ?source_addr, len = quinn_meta.len, "datagram received"); } - } - - // We're now reasonably sure we're expecting communication from - // this endpoint, do the heavy crypto lifting to see what they want. - let dm = match self.disco.unseal_and_decode(sender, sealed_box) { - Ok(dm) => dm, - Err(DiscoBoxError::Open { source, .. }) => { - warn!(?source, "failed to open disco box"); - self.metrics.magicsock.recv_disco_bad_key.inc(); - return; - } - Err(DiscoBoxError::Parse { source, .. }) => { - // Couldn't parse it, but it was inside a correctly - // signed box, so just ignore it, assuming it's from a - // newer version of Tailscale that we don't - // understand. Not even worth logging about, lest it - // be too spammy for old clients. - - self.metrics.magicsock.recv_disco_bad_parse.inc(); - debug!(?source, "failed to parse disco message"); - return; - } - }; - - if src.is_relay() { - self.metrics.magicsock.recv_disco_relay.inc(); - } else { - self.metrics.magicsock.recv_disco_udp.inc(); - } - - let span = trace_span!("handle_disco", ?dm); - let _guard = span.enter(); - trace!("receive disco message"); - match dm { - disco::Message::Ping(ping) => { - self.metrics.magicsock.recv_disco_ping.inc(); - self.handle_ping(ping, sender, src); - } - disco::Message::Pong(pong) => { - self.metrics.magicsock.recv_disco_pong.inc(); - self.endpoint_map - .handle_pong(sender, src, pong, &self.metrics.magicsock); - } - disco::Message::CallMeMaybe(cm) => { - self.metrics.magicsock.recv_disco_call_me_maybe.inc(); - match src { - transports::Addr::Relay(url, _) => { - event!( - target: "iroh::_events::call-me-maybe::recv", - Level::DEBUG, - remote_endpoint = %sender.fmt_short(), - via = ?url, - their_addrs = ?cm.my_numbers, - ); - } - _ => { - warn!("call-me-maybe packets should only come via relay"); - return; - } - } - let ping_actions = - self.endpoint_map - .handle_call_me_maybe(sender, cm, &self.metrics.magicsock); - for action in ping_actions { - match action { - PingAction::SendCallMeMaybe { .. } => { - warn!("Unexpected CallMeMaybe as response of handling a CallMeMaybe"); - } - PingAction::SendPing(ping) => { - self.send_ping_queued(ping); - } - } - } - } - } - trace!("disco message handled"); - } - - /// Handle a ping message. - fn handle_ping(&self, dm: disco::Ping, sender: EndpointId, src: &transports::Addr) { - // Insert the ping into the endpoint map, and return whether a ping with this tx_id was already - // received. - let addr: SendAddr = src.clone().into(); - let handled = self - .endpoint_map - .handle_ping(sender, addr.clone(), dm.tx_id); - match handled.role { - PingRole::Duplicate => { - debug!(?src, tx = %HEXLOWER.encode(&dm.tx_id), "received ping: path already confirmed, skip"); - return; - } - PingRole::LikelyHeartbeat => {} - PingRole::NewPath => { - debug!(?src, tx = %HEXLOWER.encode(&dm.tx_id), "received ping: new path"); - } - PingRole::Activate => { - debug!(?src, tx = %HEXLOWER.encode(&dm.tx_id), "received ping: path active"); - } - } - - // Send a pong. - debug!(tx = %HEXLOWER.encode(&dm.tx_id), %addr, dstkey = %sender.fmt_short(), - "sending pong"); - let pong = disco::Message::Pong(disco::Pong { - tx_id: dm.tx_id, - ping_observed_addr: addr.clone(), - }); - event!( - target: "iroh::_events::pong::sent", - Level::DEBUG, - remote_endpoint = %sender.fmt_short(), - dst = ?addr, - txn = ?dm.tx_id, - ); - - if !self.disco.try_send(addr.clone(), sender, pong) { - warn!(%addr, "failed to queue pong"); - } - - if let Some(ping) = handled.needs_ping_back { - debug!( - %addr, - dstkey = %sender.fmt_short(), - "sending direct ping back", - ); - self.send_ping_queued(ping); - } - } - - fn send_ping_queued(&self, ping: SendPing) { - let SendPing { - id, - dst, - dst_endpoint, - tx_id, - purpose, - } = ping; - let msg = disco::Message::Ping(disco::Ping { - tx_id, - endpoint_key: self.public_key, - }); - let sent = self.disco.try_send(dst.clone(), dst_endpoint, msg); - if sent { - let msg_sender = self.actor_sender.clone(); - trace!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent (queued)"); - self.endpoint_map - .notify_ping_sent(id, dst, tx_id, purpose, msg_sender); - } else { - warn!(dst = ?dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "failed to send ping: queues full"); - } - } - - /// Send the given ping actions out. - async fn send_ping_actions(&self, sender: &UdpSender, msgs: Vec) -> io::Result<()> { - for msg in msgs { - // Abort sending as soon as we know we are shutting down. - if self.is_closing() || self.is_closed() { - return Ok(()); - } - match msg { - PingAction::SendCallMeMaybe { - relay_url, - dst_endpoint, - } => { - // Sends the call-me-maybe DISCO message, queuing if addresses are too stale. - // - // To send the call-me-maybe message, we need to know our current direct addresses. If - // this information is too stale, the call-me-maybe is queued while a net_report run is - // scheduled. Once this run finishes, the call-me-maybe will be sent. - match self.direct_addrs.fresh_enough() { - Ok(()) => { - let msg = disco::Message::CallMeMaybe( - self.direct_addrs.to_call_me_maybe_message(), - ); - if !self.disco.try_send( - SendAddr::Relay(relay_url.clone()), - dst_endpoint, - msg.clone(), - ) { - warn!(dstkey = %dst_endpoint.fmt_short(), %relay_url, "relay channel full, dropping call-me-maybe"); - } else { - debug!(dstkey = %dst_endpoint.fmt_short(), %relay_url, "call-me-maybe sent"); - } - } - Err(last_refresh_ago) => { - debug!( - ?last_refresh_ago, - "want call-me-maybe but direct addrs stale; queuing after restun", - ); - self.actor_sender - .try_send(ActorMessage::ScheduleDirectAddrUpdate( - UpdateReason::RefreshForPeering, - Some((dst_endpoint, relay_url)), - )) - .ok(); - } - } + match source_addr { + transports::Addr::Ip(SocketAddr::V4(..)) => { + self.metrics + .magicsock + .recv_data_ipv4 + .inc_by(quinn_meta.len as _); } - PingAction::SendPing(SendPing { - id, - dst, - dst_endpoint, - tx_id, - purpose, - }) => { - let msg = disco::Message::Ping(disco::Ping { - tx_id, - endpoint_key: self.public_key, - }); - - self.send_disco_message(sender, dst.clone(), dst_endpoint, msg) - .await?; - debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent"); - let msg_sender = self.actor_sender.clone(); - self.endpoint_map - .notify_ping_sent(id, dst, tx_id, purpose, msg_sender); - } - } - } - Ok(()) - } - - /// Sends out a disco message. - async fn send_disco_message( - &self, - sender: &UdpSender, - dst: SendAddr, - dst_key: PublicKey, - msg: disco::Message, - ) -> io::Result<()> { - let dst = match dst { - SendAddr::Udp(addr) => transports::Addr::Ip(addr), - SendAddr::Relay(url) => transports::Addr::Relay(url, dst_key), - }; - - trace!(?dst, %msg, "send disco message (UDP)"); - if self.is_closed() { - return Err(io::Error::new( - io::ErrorKind::NotConnected, - "connection closed", - )); - } - - let pkt = self.disco.encode_and_seal(self.public_key, dst_key, &msg); - - let transmit = transports::Transmit { - contents: &pkt, - ecn: None, - segment_size: None, - }; - - let dst2 = dst.clone(); - match sender.send(&dst2, None, &transmit).await { - Ok(()) => { - trace!(?dst, %msg, "sent disco message"); - self.metrics.magicsock.sent_disco_udp.inc(); - disco_message_sent(&msg, &self.metrics.magicsock); - Ok(()) - } - Err(err) => { - warn!(?dst, ?msg, ?err, "failed to send disco message"); - Err(err) - } - } - } - /// Tries to send out the given ping actions out. - fn try_send_ping_actions(&self, sender: &UdpSender, msgs: Vec) -> io::Result<()> { - for msg in msgs { - // Abort sending as soon as we know we are shutting down. - if self.is_closing() || self.is_closed() { - return Ok(()); - } - match msg { - PingAction::SendCallMeMaybe { - relay_url, - dst_endpoint, - } => { - // Sends the call-me-maybe DISCO message, queuing if addresses are too stale. - // - // To send the call-me-maybe message, we need to know our current direct addresses. If - // this information is too stale, the call-me-maybe is queued while a net_report run is - // scheduled. Once this run finishes, the call-me-maybe will be sent. - match self.direct_addrs.fresh_enough() { - Ok(()) => { - let msg = disco::Message::CallMeMaybe( - self.direct_addrs.to_call_me_maybe_message(), - ); - if !self.disco.try_send( - SendAddr::Relay(relay_url.clone()), - dst_endpoint, - msg.clone(), - ) { - warn!(dstkey = %dst_endpoint.fmt_short(), %relay_url, "relay channel full, dropping call-me-maybe"); - } else { - debug!(dstkey = %dst_endpoint.fmt_short(), %relay_url, "call-me-maybe sent"); - } - } - Err(last_refresh_ago) => { - debug!( - ?last_refresh_ago, - "want call-me-maybe but direct addrs stale; queuing after restun", - ); - self.actor_sender - .try_send(ActorMessage::ScheduleDirectAddrUpdate( - UpdateReason::RefreshForPeering, - Some((dst_endpoint, relay_url)), - )) - .ok(); - } - } + transports::Addr::Ip(SocketAddr::V6(..)) => { + self.metrics + .magicsock + .recv_data_ipv6 + .inc_by(quinn_meta.len as _); } - PingAction::SendPing(SendPing { - id, - dst, - dst_endpoint, - tx_id, - purpose, - }) => { - let msg = disco::Message::Ping(disco::Ping { - tx_id, - endpoint_key: self.public_key, - }); - - self.try_send_disco_message(sender, dst.clone(), dst_endpoint, msg)?; - debug!(%dst, tx = %HEXLOWER.encode(&tx_id), ?purpose, "ping sent"); - let msg_sender = self.actor_sender.clone(); - self.endpoint_map - .notify_ping_sent(id, dst, tx_id, purpose, msg_sender); + transports::Addr::Relay(src_url, src_node) => { + self.metrics + .magicsock + .recv_data_relay + .inc_by(quinn_meta.len as _); + + // Fill in the correct mapped address + let mapped_addr = self + .remote_map + .relay_mapped_addrs + .get(&(src_url.clone(), *src_node)); + quinn_meta.addr = mapped_addr.private_socket_addr(); } } - } - Ok(()) - } - - /// Tries to send out a disco message. - fn try_send_disco_message( - &self, - sender: &UdpSender, - dst: SendAddr, - dst_key: PublicKey, - msg: disco::Message, - ) -> io::Result<()> { - let dst = match dst { - SendAddr::Udp(addr) => transports::Addr::Ip(addr), - SendAddr::Relay(url) => transports::Addr::Relay(url, dst_key), - }; - - trace!(?dst, %msg, "send disco message (UDP)"); - if self.is_closed() { - return Err(io::Error::new( - io::ErrorKind::NotConnected, - "connection closed", - )); - } - - let pkt = self.disco.encode_and_seal(self.public_key, dst_key, &msg); - - let transmit = transports::Transmit { - contents: &pkt, - ecn: None, - segment_size: None, - }; - let dst2 = dst.clone(); - match sender.inner_try_send(&dst2, None, &transmit) { - Ok(()) => { - trace!(?dst, %msg, "sent disco message"); - self.metrics.magicsock.sent_disco_udp.inc(); - disco_message_sent(&msg, &self.metrics.magicsock); - Ok(()) - } - Err(err) => { - warn!(?dst, ?msg, ?err, "failed to send disco message"); - Err(err) - } + // Normalize local_ip + quinn_meta.dst_ip = dst_ip; } } @@ -1170,32 +584,6 @@ impl MagicSock { } } -#[derive(Clone, Debug)] -enum MappedAddr { - EndpointId(EndpointIdMappedAddr), - #[cfg(not(wasm_browser))] - Ip(IpMappedAddr), - None(SocketAddr), -} - -impl From for MappedAddr { - fn from(value: SocketAddr) -> Self { - match value.ip() { - IpAddr::V4(_) => MappedAddr::None(value), - IpAddr::V6(addr) => { - if let Ok(endpoint_id_mapped_addr) = EndpointIdMappedAddr::try_from(addr) { - return MappedAddr::EndpointId(endpoint_id_mapped_addr); - } - #[cfg(not(wasm_browser))] - if let Ok(ip_mapped_addr) = IpMappedAddr::try_from(addr) { - return MappedAddr::Ip(ip_mapped_addr); - } - MappedAddr::None(value) - } - } - } -} - /// Manages currently running direct addr discovery, aka net_report runs. /// /// Invariants: @@ -1220,7 +608,6 @@ enum UpdateReason { /// Initial state #[default] None, - RefreshForPeering, Periodic, PortmapUpdated, LinkChangeMajor, @@ -1344,16 +731,16 @@ pub enum CreateHandleError { CreateNetmonMonitor { source: netmon::Error }, #[error("Failed to subscribe netmon monitor")] SubscribeNetmonMonitor { source: netmon::Error }, + #[error("Invalid transport configuration")] + InvalidTransportConfig, } impl Handle { - /// Creates a magic [`MagicSock`] listening on [`Options::addr_v4`] and [`Options::addr_v6`]. + /// Creates a magic [`MagicSock`]. async fn new(opts: Options) -> Result { let Options { - addr_v4, - addr_v6, secret_key, - relay_map, + transports: transport_configs, discovery_user_data, #[cfg(not(wasm_browser))] dns_resolver, @@ -1361,79 +748,111 @@ impl Handle { server_config, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify, - #[cfg(any(test, feature = "test-utils"))] - path_selection, metrics, + hooks, } = opts; let discovery = ConcurrentDiscovery::default(); - - let addr_v4 = addr_v4.unwrap_or_else(|| SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, 0)); - #[cfg(not(wasm_browser))] - let (ip_transports, port_mapper) = bind_ip(addr_v4, addr_v6, &metrics) - .map_err(|err| e!(CreateHandleError::BindSockets, err))?; + let port_mapper = + portmapper::Client::with_metrics(Default::default(), metrics.portmapper.clone()); - let ip_mapped_addrs = IpMappedAddresses::default(); - - let (actor_sender, actor_receiver) = mpsc::channel(256); - - let ipv6_reported = false; + let relay_transport_configs: Vec<_> = transport_configs + .iter() + .filter(|t| matches!(t, TransportConfig::Relay { .. })) + .collect(); - // load the endpoint data - let endpoint_map = EndpointMap::load_from_vec( - Vec::new(), - #[cfg(any(test, feature = "test-utils"))] - path_selection, - ipv6_reported, - &metrics.magicsock, - ); + // Currently we only support a single relay transport + if relay_transport_configs.len() > 1 { + dbg!(&transport_configs, &relay_transport_configs); + bail!(CreateHandleError::InvalidTransportConfig); + } + let relay_map = relay_transport_configs + .iter() + .filter_map(|t| { + #[allow(irrefutable_let_patterns)] + if let TransportConfig::Relay { relay_map } = t { + Some(relay_map.clone()) + } else { + None + } + }) + .next() + .unwrap_or_else(RelayMap::empty); let my_relay = Watchable::new(None); - let ipv6_reported = Arc::new(AtomicBool::new(ipv6_reported)); + let ipv6_reported = Arc::new(AtomicBool::new(false)); + let relay_actor_config = RelayActorConfig { + my_relay: my_relay.clone(), + secret_key: secret_key.clone(), + #[cfg(not(wasm_browser))] + dns_resolver: dns_resolver.clone(), + proxy_url: proxy_url.clone(), + ipv6_reported: ipv6_reported.clone(), + #[cfg(any(test, feature = "test-utils"))] + insecure_skip_relay_cert_verify, + metrics: metrics.magicsock.clone(), + }; let shutdown_token = CancellationToken::new(); - let relay_transport = RelayTransport::new( - RelayActorConfig { - my_relay: my_relay.clone(), - secret_key: secret_key.clone(), - #[cfg(not(wasm_browser))] - dns_resolver: dns_resolver.clone(), - proxy_url: proxy_url.clone(), - ipv6_reported: ipv6_reported.clone(), - #[cfg(any(test, feature = "test-utils"))] - insecure_skip_relay_cert_verify, - metrics: metrics.magicsock.clone(), - }, + let transports = Transports::bind( + &transport_configs, + relay_actor_config, + &metrics, shutdown_token.child_token(), - ); - let relay_transports = vec![relay_transport]; + ) + .map_err(|err| e!(CreateHandleError::BindSockets, err))?; - let secret_encryption_key = secret_ed_box(&secret_key); #[cfg(not(wasm_browser))] - let ipv6 = ip_transports.iter().any(|t| t.bind_addr().is_ipv6()); + { + if let Some(v4_port) = transports.local_addrs().into_iter().find_map(|t| { + if let transports::Addr::Ip(SocketAddr::V4(addr)) = t { + Some(addr.port()) + } else { + None + } + }) { + // NOTE: we can end up with a zero port if `netwatch::UdpSocket::socket_addr` fails + match v4_port.try_into() { + Ok(non_zero_port) => { + port_mapper.update_local_port(non_zero_port); + } + Err(_zero_port) => debug!("Skipping port mapping with zero local port"), + } + } + } + + let (actor_sender, actor_receiver) = mpsc::channel(256); #[cfg(not(wasm_browser))] - let transports = Transports::new(ip_transports, relay_transports); - #[cfg(wasm_browser)] - let transports = Transports::new(relay_transports); + let ipv6 = transports + .ip_bind_addrs() + .into_iter() + .any(|addr| addr.is_ipv6()); - let (disco, disco_receiver) = DiscoState::new(secret_encryption_key); + let direct_addrs = DiscoveredDirectAddrs::default(); + + let remote_map = { + RemoteMap::new( + secret_key.public(), + metrics.magicsock.clone(), + direct_addrs.addrs.watch(), + discovery.clone(), + ) + }; let msock = Arc::new(MagicSock { public_key: secret_key.public(), closing: AtomicBool::new(false), closed: AtomicBool::new(false), - disco, actor_sender: actor_sender.clone(), ipv6_reported, - endpoint_map, - ip_mapped_addrs: ip_mapped_addrs.clone(), + remote_map, discovery, relay_map: relay_map.clone(), discovery_user_data: RwLock::new(discovery_user_data), - direct_addrs: DiscoveredDirectAddrs::default(), + direct_addrs, net_report: Watchable::new((None, UpdateReason::None)), #[cfg(not(wasm_browser))] dns_resolver: dns_resolver.clone(), @@ -1441,6 +860,7 @@ impl Handle { local_addrs_watch: transports.local_addrs_watch(), #[cfg(not(wasm_browser))] ip_bind_addrs: transports.ip_bind_addrs(), + hooks, }); let mut endpoint_config = quinn::EndpointConfig::default(); @@ -1451,17 +871,13 @@ impl Handle { // the packet if grease_quic_bit is set to false. endpoint_config.grease_quic_bit(false); - let sender = transports.create_sender(msock.clone()); let local_addrs_watch = transports.local_addrs_watch(); let network_change_sender = transports.create_network_change_sender(); let endpoint = quinn::Endpoint::new_with_abstract_socket( endpoint_config, Some(server_config), - Box::new(MagicUdpSocket { - socket: msock.clone(), - transports, - }), + Box::new(MagicTransport::new(msock.clone(), transports)), #[cfg(not(wasm_browser))] Arc::new(quinn::TokioRuntime), #[cfg(wasm_browser)] @@ -1500,8 +916,6 @@ impl Handle { let net_reporter = net_report::Client::new( #[cfg(not(wasm_browser))] dns_resolver, - #[cfg(not(wasm_browser))] - Some(ip_mapped_addrs), relay_map.clone(), net_report_config, metrics.net_report.clone(), @@ -1529,8 +943,6 @@ impl Handle { direct_addr_update_state, network_change_sender, direct_addr_done_rx, - pending_call_me_maybes: Default::default(), - disco_receiver, }; // Initialize addresses #[cfg(not(wasm_browser))] @@ -1538,7 +950,7 @@ impl Handle { let actor_task = task::spawn( actor - .run(shutdown_token.child_token(), local_addrs_watch, sender) + .run(shutdown_token.child_token(), local_addrs_watch) .instrument(info_span!("actor")), ); @@ -1559,9 +971,11 @@ impl Handle { /// Closes the connection. /// - /// Only the first close does anything. Any later closes return nil. - /// Polling the socket ([`AsyncUdpSocket::poll_recv`]) will return [`Poll::Pending`] - /// indefinitely after this call. + /// Only the first close does anything. Any later closes return nil. Polling the socket + /// ([`quinn::AsyncUdpSocket::poll_recv`]) will return [`Poll::Pending`] indefinitely + /// after this call. + /// + /// [`Poll::Pending`]: std::task::Poll::Pending #[instrument(skip_all)] pub(crate) async fn close(&self) { trace!(me = ?self.public_key, "magicsock closing..."); @@ -1630,146 +1044,9 @@ fn default_quic_client_config() -> rustls::ClientConfig { } #[derive(Debug)] -struct DiscoState { - /// Encryption key for this endpoint. - secret_encryption_key: crypto_box::SecretKey, - /// The state for an active DiscoKey. - secrets: Mutex>, - /// Disco (ping) queue - sender: mpsc::Sender<(SendAddr, PublicKey, disco::Message)>, -} - -impl DiscoState { - fn new( - secret_encryption_key: crypto_box::SecretKey, - ) -> (Self, mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>) { - let (disco_sender, disco_receiver) = mpsc::channel(256); - - ( - Self { - secret_encryption_key, - secrets: Default::default(), - sender: disco_sender, - }, - disco_receiver, - ) - } - - fn try_send(&self, dst: SendAddr, endpoint_id: PublicKey, msg: disco::Message) -> bool { - self.sender.try_send((dst, endpoint_id, msg)).is_ok() - } - - fn encode_and_seal( - &self, - this_endpoint_id: EndpointId, - other_endpoint_id: EndpointId, - msg: &disco::Message, - ) -> Bytes { - let mut seal = msg.as_bytes(); - self.get_secret(other_endpoint_id, |secret| secret.seal(&mut seal)); - disco::encode_message(&this_endpoint_id, seal).into() - } - - fn unseal_and_decode( - &self, - endpoint_id: PublicKey, - sealed_box: &[u8], - ) -> Result { - let mut sealed_box = sealed_box.to_vec(); - self.get_secret(endpoint_id, |secret| secret.open(&mut sealed_box)) - .map_err(|source| e!(DiscoBoxError::Open { source }))?; - disco::Message::from_bytes(&sealed_box) - .map_err(|source| e!(DiscoBoxError::Parse { source })) - } - - fn get_secret(&self, endpoint_id: PublicKey, cb: F) -> T - where - F: FnOnce(&mut SharedSecret) -> T, - { - let mut inner = self.secrets.lock().expect("poisoned"); - let x = inner.entry(endpoint_id).or_insert_with(|| { - let public_key = public_ed_box(&endpoint_id); - SharedSecret::new(&self.secret_encryption_key, &public_key) - }); - cb(x) - } -} - -#[allow(missing_docs)] -#[stack_error(derive, add_meta)] -#[non_exhaustive] -enum DiscoBoxError { - #[error("Failed to open crypto box")] - Open { source: DecryptionError }, - #[error("Failed to parse disco message")] - Parse { source: disco::ParseError }, -} - -#[derive(Debug)] -struct MagicUdpSocket { - socket: Arc, - transports: Transports, -} - -impl AsyncUdpSocket for MagicUdpSocket { - fn create_sender(&self) -> Pin> { - Box::pin(self.transports.create_sender(self.socket.clone())) - } - - /// NOTE: Receiving on a closed socket will return [`Poll::Pending`] indefinitely. - fn poll_recv( - &mut self, - cx: &mut Context, - bufs: &mut [io::IoSliceMut<'_>], - metas: &mut [quinn_udp::RecvMeta], - ) -> Poll> { - self.transports.poll_recv(cx, bufs, metas, &self.socket) - } - - #[cfg(not(wasm_browser))] - fn local_addr(&self) -> io::Result { - let addrs: Vec<_> = self - .transports - .local_addrs() - .into_iter() - .filter_map(|addr| { - let addr: SocketAddr = addr.into_socket_addr()?; - Some(addr) - }) - .collect(); - - if let Some(addr) = addrs.iter().find(|addr| addr.is_ipv6()) { - return Ok(*addr); - } - if let Some(SocketAddr::V4(addr)) = addrs.first() { - // Pretend to be IPv6, because our `MappedAddr`s need to be IPv6. - let ip = addr.ip().to_ipv6_mapped().into(); - return Ok(SocketAddr::new(ip, addr.port())); - } - - Err(io::Error::other("no valid address available")) - } - - #[cfg(wasm_browser)] - fn local_addr(&self) -> io::Result { - // Again, we need to pretend we're IPv6, because of our `MappedAddr`s. - Ok(SocketAddr::new(std::net::Ipv6Addr::LOCALHOST.into(), 0)) - } - - fn max_receive_segments(&self) -> usize { - self.transports.max_receive_segments() - } - - fn may_fragment(&self) -> bool { - self.transports.may_fragment() - } -} - -#[derive(Debug)] +#[allow(clippy::enum_variant_names)] enum ActorMessage { - EndpointPingExpired(usize, TransactionId), NetworkChange, - ScheduleDirectAddrUpdate(UpdateReason, Option<(EndpointId, RelayUrl)>), RelayMapChange, #[cfg(test)] ForceNetworkChange(bool), @@ -1787,61 +1064,6 @@ struct Actor { /// Indicates the direct addr update state. direct_addr_update_state: DirectAddrUpdateState, direct_addr_done_rx: mpsc::Receiver<()>, - - /// List of CallMeMaybe disco messages that should be sent out after - /// the next endpoint update completes - pending_call_me_maybes: HashMap, - disco_receiver: mpsc::Receiver<(SendAddr, PublicKey, disco::Message)>, -} - -#[cfg(not(wasm_browser))] -fn bind_ip( - addr_v4: SocketAddrV4, - addr_v6: Option, - metrics: &EndpointMetrics, -) -> io::Result<(Vec, portmapper::Client)> { - let port_mapper = - portmapper::Client::with_metrics(Default::default(), metrics.portmapper.clone()); - - let v4 = Arc::new(bind_with_fallback(SocketAddr::V4(addr_v4))?); - let ip4_port = v4.local_addr()?.port(); - let ip6_port = ip4_port.checked_add(1).unwrap_or(ip4_port - 1); - - let addr_v6 = - addr_v6.unwrap_or_else(|| SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, ip6_port, 0, 0)); - - let v6 = match bind_with_fallback(SocketAddr::V6(addr_v6)) { - Ok(sock) => Some(Arc::new(sock)), - Err(err) => { - info!("bind ignoring IPv6 bind failure: {:?}", err); - None - } - }; - - let port = v4.local_addr().map_or(0, |p| p.port()); - - let mut ip = vec![IpTransport::new( - addr_v4.into(), - v4, - metrics.magicsock.clone(), - )]; - if let Some(v6) = v6 { - ip.push(IpTransport::new( - addr_v6.into(), - v6, - metrics.magicsock.clone(), - )) - } - - // NOTE: we can end up with a zero port if `netwatch::UdpSocket::socket_addr` fails - match port.try_into() { - Ok(non_zero_port) => { - port_mapper.update_local_port(non_zero_port); - } - Err(_zero_port) => debug!("Skipping port mapping with zero local port"), - } - - Ok((ip, port_mapper)) } impl Actor { @@ -1849,14 +1071,10 @@ impl Actor { mut self, shutdown_token: CancellationToken, mut watcher: impl Watcher> + Send + Sync, - sender: UdpSender, ) { // Setup network monitoring let mut current_netmon_state = self.netmon_watcher.get(); - #[cfg(not(wasm_browser))] - let mut direct_addr_heartbeat_timer = time::interval(HEARTBEAT_INTERVAL); - #[cfg(not(wasm_browser))] let mut portmap_watcher = self .direct_addr_update_state @@ -1872,6 +1090,9 @@ impl Actor { // ensure we are doing an initial publish of our addresses self.msock.publish_my_addr(); + // Interval timer to remove closed `RemoteStateActor` handles from the endpoint map. + let mut remote_map_gc = time::interval(remote_map::REMOTE_MAP_GC_INTERVAL); + loop { self.msock.metrics.magicsock.actor_tick_main.inc(); #[cfg(not(wasm_browser))] @@ -1879,11 +1100,6 @@ impl Actor { #[cfg(wasm_browser)] let portmap_watcher_changed = n0_future::future::pending(); - #[cfg(not(wasm_browser))] - let direct_addr_heartbeat_timer_tick = direct_addr_heartbeat_timer.tick(); - #[cfg(wasm_browser)] - let direct_addr_heartbeat_timer_tick = n0_future::future::pending(); - tokio::select! { _ = shutdown_token.cancelled() => { debug!("shutting down"); @@ -1966,22 +1182,6 @@ impl Actor { #[cfg(wasm_browser)] let _unused_in_browsers = change; }, - _ = direct_addr_heartbeat_timer_tick => { - #[cfg(not(wasm_browser))] - { - trace!( - "tick: direct addr heartbeat {} direct addrs", - self.msock.endpoint_map.endpoint_count(), - ); - self.msock.metrics.magicsock.actor_tick_direct_addr_heartbeat.inc(); - // TODO: this might trigger too many packets at once, pace this - - self.msock.endpoint_map.prune_inactive(); - let have_v6 = self.netmon_watcher.clone().get().have_v6; - let msgs = self.msock.endpoint_map.endpoints_stayin_alive(have_v6); - self.handle_ping_actions(&sender, msgs).await; - } - } state = self.netmon_watcher.updated() => { let Ok(state) = state else { trace!("tick: link change receiver closed"); @@ -1999,10 +1199,8 @@ impl Actor { self.msock.metrics.magicsock.actor_link_change.inc(); self.handle_network_change(is_major).await; } - Some((dst, dst_key, msg)) = self.disco_receiver.recv() => { - if let Err(err) = self.msock.send_disco_message(&sender, dst.clone(), dst_key, msg).await { - warn!(%dst, endpoint = %dst_key.fmt_short(), ?err, "failed to send disco message (UDP)"); - } + _ = remote_map_gc.tick() => { + self.msock.remote_map.remove_closed_remote_state_actors(); } } } @@ -2019,7 +1217,6 @@ impl Actor { #[cfg(not(wasm_browser))] self.msock.dns_resolver.reset().await; self.re_stun(UpdateReason::LinkChangeMajor); - self.reset_endpoint_states(); } else { self.re_stun(UpdateReason::LinkChangeMinor); } @@ -2035,36 +1232,14 @@ impl Actor { .schedule_run(why, state.into()); } - #[instrument(skip_all)] - async fn handle_ping_actions(&mut self, sender: &UdpSender, msgs: Vec) { - if let Err(err) = self.msock.send_ping_actions(sender, msgs).await { - warn!("Failed to send ping actions: {err:#}"); - } - } - /// Processes an incoming actor message. /// /// Returns `true` if it was a shutdown. async fn handle_actor_message(&mut self, msg: ActorMessage) { match msg { - ActorMessage::EndpointPingExpired(id, txid) => { - self.msock.endpoint_map.notify_ping_timeout( - id, - txid, - &self.msock.metrics.magicsock, - ); - } ActorMessage::NetworkChange => { self.network_monitor.network_change().await.ok(); } - ActorMessage::ScheduleDirectAddrUpdate(why, data) => { - if let Some((endpoint, url)) = data { - self.pending_call_me_maybes.insert(endpoint, url); - } - let state = self.netmon_watcher.get(); - self.direct_addr_update_state - .schedule_run(why, state.into()); - } ActorMessage::RelayMapChange => { self.handle_relay_map_change(); } @@ -2151,7 +1326,6 @@ impl Actor { }) .collect(), ); - self.send_queued_call_me_maybes(); } #[cfg(not(wasm_browser))] @@ -2218,22 +1392,6 @@ impl Actor { } } - fn send_queued_call_me_maybes(&mut self) { - let msg = self.msock.direct_addrs.to_call_me_maybe_message(); - let msg = disco::Message::CallMeMaybe(msg); - // allocate, to minimize locking duration - - for (public_key, url) in self.pending_call_me_maybes.drain() { - if !self - .msock - .disco - .try_send(SendAddr::Relay(url), public_key, msg.clone()) - { - warn!(endpoint = %public_key.fmt_short(), "relay channel full, dropping call-me-maybe"); - } - } - } - fn handle_net_report_report(&mut self, mut report: Option) { if let Some(ref mut r) = report { self.msock.ipv6_reported.store(r.udp_v6, Ordering::Relaxed); @@ -2250,15 +1408,6 @@ impl Actor { #[cfg(not(wasm_browser))] self.update_direct_addresses(report.as_ref()); } - - /// Resets the preferred address for all endpoints. - /// This is called when connectivity changes enough that we no longer trust the old routes. - #[instrument(skip_all)] - fn reset_endpoint_states(&mut self) { - self.msock - .endpoint_map - .reset_endpoint_states(&self.msock.metrics.magicsock) - } } fn new_re_stun_timer(initial_delay: bool) -> time::Interval { @@ -2278,31 +1427,6 @@ fn new_re_stun_timer(initial_delay: bool) -> time::Interval { } } -#[cfg(not(wasm_browser))] -fn bind_with_fallback(mut addr: SocketAddr) -> io::Result { - debug!(%addr, "binding"); - - // First try binding a preferred port, if specified - match UdpSocket::bind_full(addr) { - Ok(socket) => { - let local_addr = socket.local_addr()?; - debug!(%addr, %local_addr, "successfully bound"); - return Ok(socket); - } - Err(err) => { - debug!(%addr, "failed to bind: {err:#}"); - // If that was already the fallback port, then error out - if addr.port() == 0 { - return Err(err); - } - } - } - - // Otherwise, try binding with port 0 - addr.set_port(0); - UdpSocket::bind_full(addr) -} - /// The discovered direct addresses of this [`MagicSock`]. /// /// These are all the [`DirectAddr`]s that this [`MagicSock`] is aware of for itself. @@ -2337,130 +1461,6 @@ impl DiscoveredDirectAddrs { fn sockaddrs(&self) -> impl Iterator { self.addrs.get().into_iter().map(|da| da.addr) } - - /// Whether the direct addr information is considered "fresh". - /// - /// If not fresh you should probably update the direct addresses before using this info. - /// - /// Returns `Ok(())` if fresh enough and `Err(elapsed)` if not fresh enough. - /// `elapsed` is the time elapsed since the direct addresses were last updated. - /// - /// If there is no direct address information `Err(Duration::ZERO)` is returned. - fn fresh_enough(&self) -> Result<(), Duration> { - match *self.updated_at.read().expect("poisoned") { - None => Err(Duration::ZERO), - Some(time) => { - let elapsed = time.elapsed(); - if elapsed <= ENDPOINTS_FRESH_ENOUGH_DURATION { - Ok(()) - } else { - Err(elapsed) - } - } - } - } - - fn to_call_me_maybe_message(&self) -> disco::CallMeMaybe { - let my_numbers = self.addrs.get().into_iter().map(|da| da.addr).collect(); - disco::CallMeMaybe { my_numbers } - } -} - -/// The fake address used by the QUIC layer to address an endpoint. -/// -/// You can consider this as nothing more than a lookup key for an endpoint the [`MagicSock`] knows -/// about. -/// -/// [`MagicSock`] can reach an endpoint by several real socket addresses, or maybe even via the relay -/// endpoint. The QUIC layer however needs to address an endpoint by a stable [`SocketAddr`] so -/// that normal socket APIs can function. Thus when a new endpoint is introduced to a [`MagicSock`] -/// it is given a new fake address. This is the type of that address. -/// -/// It is but a newtype. And in our QUIC-facing socket APIs like [`AsyncUdpSocket`] it -/// comes in as the inner [`Ipv6Addr`], in those interfaces we have to be careful to do -/// the conversion to this type. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(crate) struct EndpointIdMappedAddr(Ipv6Addr); - -/// Can occur when converting a [`SocketAddr`] to an [`EndpointIdMappedAddr`] -#[stack_error(derive, add_meta)] -#[error("Failed to convert")] -pub struct EndpointIdMappedAddrError; - -/// Counter to always generate unique addresses for [`EndpointIdMappedAddr`]. -static ENDPOINT_ID_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1); - -impl EndpointIdMappedAddr { - /// The Prefix/L of our Unique Local Addresses. - const ADDR_PREFIXL: u8 = 0xfd; - /// The Global ID used in our Unique Local Addresses. - const ADDR_GLOBAL_ID: [u8; 5] = [21, 7, 10, 81, 11]; - /// The Subnet ID used in our Unique Local Addresses. - const ADDR_SUBNET: [u8; 2] = [0; 2]; - - /// The dummy port used for all [`EndpointIdMappedAddr`]s. - const ENDPOINT_ID_MAPPED_PORT: u16 = 12345; - - /// Generates a globally unique fake UDP address. - /// - /// This generates and IPv6 Unique Local Address according to RFC 4193. - pub(crate) fn generate() -> Self { - let mut addr = [0u8; 16]; - addr[0] = Self::ADDR_PREFIXL; - addr[1..6].copy_from_slice(&Self::ADDR_GLOBAL_ID); - addr[6..8].copy_from_slice(&Self::ADDR_SUBNET); - - let counter = ENDPOINT_ID_ADDR_COUNTER.fetch_add(1, Ordering::Relaxed); - addr[8..16].copy_from_slice(&counter.to_be_bytes()); - - Self(Ipv6Addr::from(addr)) - } - - /// Returns a consistent [`SocketAddr`] for the [`EndpointIdMappedAddr`]. - /// - /// This socket address does not have a routable IP address. - /// - /// This uses a made-up port number, since the port does not play a role in looking up - /// the endpoint in the [`EndpointMap`]. This socket address is only to be used to pass into - /// Quinn. - pub(crate) fn private_socket_addr(&self) -> SocketAddr { - SocketAddr::new(IpAddr::from(self.0), Self::ENDPOINT_ID_MAPPED_PORT) - } -} - -impl TryFrom for EndpointIdMappedAddr { - type Error = EndpointIdMappedAddrError; - - fn try_from(value: Ipv6Addr) -> Result { - let octets = value.octets(); - if octets[0] == Self::ADDR_PREFIXL - && octets[1..6] == Self::ADDR_GLOBAL_ID - && octets[6..8] == Self::ADDR_SUBNET - { - return Ok(Self(value)); - } - Err(e!(EndpointIdMappedAddrError)) - } -} - -impl std::fmt::Display for EndpointIdMappedAddr { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "EndpointIdMappedAddr({})", self.0) - } -} - -fn disco_message_sent(msg: &disco::Message, metrics: &MagicsockMetrics) { - match msg { - disco::Message::Ping(_) => { - metrics.sent_disco_ping.inc(); - } - disco::Message::Pong(_) => { - metrics.sent_disco_pong.inc(); - } - disco::Message::CallMeMaybe(_) => { - metrics.sent_disco_call_me_maybe.inc(); - } - } } /// A *direct address* on which an iroh-endpoint might be contactable. @@ -2468,6 +1468,9 @@ fn disco_message_sent(msg: &disco::Message, metrics: &MagicsockMetrics) { /// Direct addresses are UDP socket addresses on which an iroh endpoint could potentially be /// contacted. These can come from various sources depending on the network topology of the /// iroh endpoint, see [`DirectAddrType`] for the several kinds of sources. +/// +/// This is essentially a combination of our local addresses combined with any reflexive +/// transport addresses we discovered using QAD. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct DirectAddr { /// The address. @@ -2519,26 +1522,28 @@ impl Display for DirectAddrType { #[cfg(test)] mod tests { - use std::{collections::BTreeSet, net::SocketAddr, sync::Arc, time::Duration}; + use std::{net::SocketAddrV4, sync::Arc, time::Duration}; use data_encoding::HEXLOWER; - use iroh_base::{EndpointAddr, EndpointId, PublicKey, TransportAddr}; + use iroh_base::{EndpointAddr, EndpointId, TransportAddr}; use n0_error::{Result, StackResultExt, StdResultExt}; - use n0_future::{StreamExt, time}; + use n0_future::{MergeBounded, StreamExt, time}; use n0_watcher::Watcher; use quinn::ServerConfig; use rand::{CryptoRng, Rng, RngCore, SeedableRng}; - use tokio::task::JoinSet; use tokio_util::task::AbortOnDropHandle; use tracing::{Instrument, error, info, info_span, instrument}; use tracing_test::traced_test; - use super::{EndpointIdMappedAddr, Options}; + use super::Options; use crate::{ - Endpoint, RelayMap, RelayMode, SecretKey, + Endpoint, RelayMode, SecretKey, + discovery::static_provider::StaticProvider, dns::DnsResolver, - endpoint::{PathSelection, Source}, - magicsock::{Handle, MagicSock, endpoint_map}, + magicsock::{ + Handle, MagicSock, TransportConfig, + mapped_addrs::{EndpointIdMappedAddr, MappedAddr}, + }, tls::{self, DEFAULT_MAX_TLS_TICKETS}, }; @@ -2548,19 +1553,20 @@ mod tests { let secret_key = SecretKey::generate(rng); let server_config = make_default_server_config(&secret_key); Options { - addr_v4: None, - addr_v6: None, + transports: vec![ + TransportConfig::default_ipv4(), + TransportConfig::default_ipv6(), + ], secret_key, - relay_map: RelayMap::empty(), proxy_url: None, dns_resolver: DnsResolver::new(), server_config, #[cfg(any(test, feature = "test-utils"))] insecure_skip_relay_cert_verify: false, #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection::default(), discovery_user_data: None, metrics: Default::default(), + hooks: Default::default(), } } @@ -2574,137 +1580,10 @@ mod tests { server_config } - impl MagicSock { - #[track_caller] - pub fn add_test_addr(&self, endpoint_addr: EndpointAddr) { - self.add_endpoint_addr( - endpoint_addr, - Source::NamedApp { - name: "test".into(), - }, - ) - .unwrap() - } - } - - /// Magicsock plus wrappers for sending packets - #[derive(Clone)] - struct MagicStack { - secret_key: SecretKey, - endpoint: Endpoint, - } - - impl MagicStack { - async fn new(rng: &mut R, relay_mode: RelayMode) -> Self { - let secret_key = SecretKey::generate(rng); - - let mut transport_config = quinn::TransportConfig::default(); - transport_config.max_idle_timeout(Some(Duration::from_secs(10).try_into().unwrap())); - - let endpoint = Endpoint::empty_builder(relay_mode) - .secret_key(secret_key.clone()) - .transport_config(transport_config) - .alpns(vec![ALPN.to_vec()]) - .bind() - .await - .unwrap(); - - Self { - secret_key, - endpoint, - } - } - - fn tracked_endpoints(&self) -> Vec { - self.endpoint - .magic_sock() - .list_remote_infos() - .into_iter() - .map(|ep| ep.endpoint_id) - .collect() - } - - fn public(&self) -> PublicKey { - self.secret_key.public() - } - } - - /// Monitors endpoint changes and plumbs things together. - /// - /// This is a way of connecting endpoints without a relay server. Whenever the local - /// endpoints of a magic endpoint change this address is added to the other magic - /// sockets. This function will await until the endpoints are connected the first time - /// before returning. - /// - /// When the returned drop guard is dropped, the tasks doing this updating are stopped. - #[instrument(skip_all)] - async fn mesh_stacks(stacks: Vec) -> Result> { - /// Registers endpoint addresses of an endpoint to all other endpoints. - fn update_direct_addrs( - stacks: &[MagicStack], - my_idx: usize, - new_addrs: BTreeSet, - ) { - let me = &stacks[my_idx]; - for (i, m) in stacks.iter().enumerate() { - if i == my_idx { - continue; - } - - let addr = EndpointAddr::from_parts( - me.public(), - new_addrs.iter().copied().map(TransportAddr::Ip), - ); - m.endpoint.magic_sock().add_test_addr(addr); - } - } - - // For each endpoint, start a task which monitors its local endpoints and registers them - // with the other endpoints as local endpoints become known. - let mut tasks = JoinSet::new(); - for (my_idx, m) in stacks.iter().enumerate() { - let m = m.clone(); - let stacks = stacks.clone(); - tasks.spawn(async move { - let me = m.endpoint.id().fmt_short(); - let mut stream = m.endpoint.watch_addr().stream(); - while let Some(addr) = stream.next().await { - info!(%me, "conn{} endpoints update: {:?}", my_idx + 1, addr.ip_addrs().collect::>()); - update_direct_addrs(&stacks, my_idx, addr.ip_addrs().copied().collect()); - } - }); - } - - // Wait for all endpoints to be registered with each other. - time::timeout(Duration::from_secs(10), async move { - let all_endpoint_ids: Vec<_> = stacks.iter().map(|ms| ms.endpoint.id()).collect(); - loop { - let mut ready = Vec::with_capacity(stacks.len()); - for ms in stacks.iter() { - let endpoints = ms.tracked_endpoints(); - let my_endpoint_id = ms.endpoint.id(); - let all_endpoints_meshed = all_endpoint_ids - .iter() - .filter(|endpoint_id| **endpoint_id != my_endpoint_id) - .all(|endpoint_id| endpoints.contains(endpoint_id)); - ready.push(all_endpoints_meshed); - } - if ready.iter().all(|meshed| *meshed) { - break; - } - time::sleep(Duration::from_millis(200)).await; - } - }) - .await - .std_context("timeout")?; - info!("all endpoints meshed"); - Ok(tasks) - } - - #[instrument(skip_all, fields(me = %ep.endpoint.id().fmt_short()))] - async fn echo_receiver(ep: MagicStack, loss: ExpectedLoss) -> Result { + #[instrument(skip_all, fields(me = %ep.id().fmt_short()))] + async fn echo_receiver(ep: Endpoint, loss: ExpectedLoss) -> Result { info!("accepting conn"); - let conn = ep.endpoint.accept().await.expect("no conn"); + let conn = ep.accept().await.expect("no conn"); info!("accepting"); let conn = conn.await.context("accepting")?; @@ -2728,32 +1607,34 @@ mod tests { let stats = conn.stats(); info!("stats: {:#?}", stats); - // TODO: ensure panics in this function are reported ok if matches!(loss, ExpectedLoss::AlmostNone) { - assert!( - stats.path.lost_packets < 10, - "[receiver] should not loose many packets", - ); + for info in conn.paths().get().iter() { + assert!( + info.stats().lost_packets < 10, + "[receiver] path {:?} should not loose many packets", + info.remote_addr() + ); + } } - info!("close"); - conn.close(0u32.into(), b"done"); - info!("wait idle"); - ep.endpoint.endpoint().wait_idle().await; + conn.closed().await; + info!("closed"); + ep.endpoint().wait_idle().await; + info!("idle"); Ok(()) } - #[instrument(skip_all, fields(me = %ep.endpoint.id().fmt_short()))] + #[instrument(skip_all, fields(me = %ep.id().fmt_short()))] async fn echo_sender( - ep: MagicStack, - dest_id: PublicKey, + ep: Endpoint, + dest_id: EndpointId, msg: &[u8], loss: ExpectedLoss, ) -> Result { info!("connecting to {}", dest_id.fmt_short()); let dest = EndpointAddr::new(dest_id); - let conn = ep.endpoint.connect(dest, ALPN).await?; + let conn = ep.connect(dest, ALPN).await?; info!("opening bi"); let (mut send_bi, mut recv_bi) = conn.open_bi().await.std_context("open bi")?; @@ -2781,16 +1662,19 @@ mod tests { let stats = conn.stats(); info!("stats: {:#?}", stats); if matches!(loss, ExpectedLoss::AlmostNone) { - assert!( - stats.path.lost_packets < 10, - "[sender] should not loose many packets", - ); + for info in conn.paths().get() { + assert!( + info.stats().lost_packets < 10, + "[sender] path {:?} should not loose many packets", + info.remote_addr() + ); + } } - info!("close"); conn.close(0u32.into(), b"done"); - info!("wait idle"); - ep.endpoint.endpoint().wait_idle().await; + info!("closed"); + ep.endpoint().wait_idle().await; + info!("idle"); Ok(()) } @@ -2802,35 +1686,35 @@ mod tests { /// Runs a roundtrip between the [`echo_sender`] and [`echo_receiver`]. async fn run_roundtrip( - sender: MagicStack, - receiver: MagicStack, + sender: Endpoint, + receiver: Endpoint, payload: &[u8], loss: ExpectedLoss, ) { - let send_endpoint_id = sender.endpoint.id(); - let recv_endpoint_id = receiver.endpoint.id(); + let send_endpoint_id = sender.id(); + let recv_endpoint_id = receiver.id(); info!("\nroundtrip: {send_endpoint_id:#} -> {recv_endpoint_id:#}"); - let receiver_task = tokio::spawn(echo_receiver(receiver, loss)); + let receiver_task = AbortOnDropHandle::new(tokio::spawn(echo_receiver(receiver, loss))); let sender_res = echo_sender(sender, recv_endpoint_id, payload, loss).await; let sender_is_err = match sender_res { Ok(()) => false, Err(err) => { - eprintln!("[sender] Error:\n{err:#?}"); + error!("[sender] Error:\n{err:#?}"); true } }; let receiver_is_err = match receiver_task.await { Ok(Ok(())) => false, Ok(Err(err)) => { - eprintln!("[receiver] Error:\n{err:#?}"); + error!("[receiver] Error:\n{err:#?}"); true } Err(joinerr) => { if joinerr.is_panic() { std::panic::resume_unwind(joinerr.into_panic()); } else { - eprintln!("[receiver] Error:\n{joinerr:#?}"); + error!("[receiver] Error:\n{joinerr:#?}"); } true } @@ -2840,14 +1724,46 @@ mod tests { } } + /// Returns a pair of endpoints with a shared [`StaticDiscovery`]. + /// + /// The endpoints do not use a relay server but can connect to each other via local + /// addresses. Dialing by [`EndpointId`] is possible, and the addresses get updated even if + /// the endpoints rebind. + async fn endpoint_pair() -> (AbortOnDropHandle<()>, Endpoint, Endpoint) { + let discovery = StaticProvider::new(); + let ep1 = Endpoint::builder() + .relay_mode(RelayMode::Disabled) + .alpns(vec![ALPN.to_vec()]) + .discovery(discovery.clone()) + .bind() + .await + .unwrap(); + let ep2 = Endpoint::builder() + .relay_mode(RelayMode::Disabled) + .alpns(vec![ALPN.to_vec()]) + .discovery(discovery.clone()) + .bind() + .await + .unwrap(); + discovery.add_endpoint_info(ep1.addr()); + discovery.add_endpoint_info(ep2.addr()); + + let ep1_addr_stream = ep1.watch_addr().stream(); + let ep2_addr_stream = ep2.watch_addr().stream(); + let mut addr_stream = MergeBounded::from_iter([ep1_addr_stream, ep2_addr_stream]); + let task = tokio::spawn(async move { + while let Some(addr) = addr_stream.next().await { + discovery.add_endpoint_info(addr); + } + }); + + (AbortOnDropHandle::new(task), ep1, ep2) + } + #[tokio::test(flavor = "multi_thread")] #[traced_test] async fn test_two_devices_roundtrip_quinn_magic() -> Result { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let m1 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - let m2 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - - let _guard = mesh_stacks(vec![m1.clone(), m2.clone()]).await?; + let (_guard, m1, m2) = endpoint_pair().await; for i in 0..5 { info!("\n-- round {i}"); @@ -2868,9 +1784,11 @@ mod tests { info!("\n-- larger data"); let mut data = vec![0u8; 10 * 1024]; + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); rng.fill_bytes(&mut data); run_roundtrip(m1.clone(), m2.clone(), &data, ExpectedLoss::AlmostNone).await; run_roundtrip(m2.clone(), m1.clone(), &data, ExpectedLoss::AlmostNone).await; + info!("\n-- round {i} finished"); } Ok(()) @@ -2879,18 +1797,14 @@ mod tests { #[tokio::test] #[traced_test] async fn test_regression_network_change_rebind_wakes_connection_driver() -> Result { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let m1 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - let m2 = MagicStack::new(&mut rng, RelayMode::Disabled).await; + let (_guard, m1, m2) = endpoint_pair().await; println!("Net change"); - m1.endpoint.magic_sock().force_network_change(true).await; + m1.magic_sock().force_network_change(true).await; tokio::time::sleep(Duration::from_secs(1)).await; // wait for socket rebinding - let _guard = mesh_stacks(vec![m1.clone(), m2.clone()]).await?; - let _handle = AbortOnDropHandle::new(tokio::spawn({ - let endpoint = m2.endpoint.clone(); + let endpoint = m2.clone(); async move { while let Some(incoming) = endpoint.accept().await { println!("Incoming first conn!"); @@ -2903,7 +1817,7 @@ mod tests { })); println!("first conn!"); - let conn = m1.endpoint.connect(m2.endpoint.addr(), ALPN).await?; + let conn = m1.connect(m2.addr(), ALPN).await?; println!("Closing first conn"); conn.close(0u32.into(), b"bye lolz"); conn.closed().await; @@ -2927,10 +1841,7 @@ mod tests { /// with (simulated) network changes. async fn test_two_devices_roundtrip_network_change_impl() -> Result { let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let m1 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - let m2 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - - let _guard = mesh_stacks(vec![m1.clone(), m2.clone()]).await?; + let (_guard, m1, m2) = endpoint_pair().await; let offset = |rng: &mut rand_chacha::ChaCha8Rng| { let delay = rng.random_range(10..=500); @@ -2945,7 +1856,7 @@ mod tests { let task = tokio::spawn(async move { loop { println!("[m1] network change"); - m1.endpoint.magic_sock().force_network_change(true).await; + m1.magic_sock().force_network_change(true).await; time::sleep(offset(&mut rng)).await; } }); @@ -2973,7 +1884,7 @@ mod tests { let task = tokio::spawn(async move { loop { println!("[m2] network change"); - m2.endpoint.magic_sock().force_network_change(true).await; + m2.magic_sock().force_network_change(true).await; time::sleep(offset(&mut rng)).await; } }); @@ -3001,9 +1912,9 @@ mod tests { let mut rng = rng.clone(); let task = tokio::spawn(async move { println!("-- [m1] network change"); - m1.endpoint.magic_sock().force_network_change(true).await; + m1.magic_sock().force_network_change(true).await; println!("-- [m2] network change"); - m2.endpoint.magic_sock().force_network_change(true).await; + m2.magic_sock().force_network_change(true).await; time::sleep(offset(&mut rng)).await; }); AbortOnDropHandle::new(task) @@ -3028,20 +1939,16 @@ mod tests { #[tokio::test(flavor = "multi_thread")] #[traced_test] async fn test_two_devices_setup_teardown() -> Result { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); for i in 0..10 { - println!("-- round {i}"); - println!("setting up magic stack"); - let m1 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - let m2 = MagicStack::new(&mut rng, RelayMode::Disabled).await; - - let _guard = mesh_stacks(vec![m1.clone(), m2.clone()]).await?; + info!("-- round {i}"); + info!("setting up magic stack"); + let (_guard, m1, m2) = endpoint_pair().await; - println!("closing endpoints"); - let msock1 = m1.endpoint.magic_sock(); - let msock2 = m2.endpoint.magic_sock(); - m1.endpoint.close().await; - m2.endpoint.close().await; + info!("closing endpoints"); + let msock1 = m1.magic_sock(); + let msock2 = m2.magic_sock(); + m1.close().await; + m2.close().await; assert!(msock1.msock.is_closed()); assert!(msock2.msock.is_closed()); @@ -3081,17 +1988,18 @@ mod tests { let dns_resolver = DnsResolver::new(); let opts = Options { - addr_v4: None, - addr_v6: None, + transports: vec![ + TransportConfig::default_ipv4(), + TransportConfig::default_ipv6(), + ], secret_key: secret_key.clone(), - relay_map: RelayMap::empty(), discovery_user_data: None, dns_resolver, proxy_url: None, server_config, insecure_skip_relay_cert_verify: false, - path_selection: PathSelection::default(), metrics: Default::default(), + hooks: Default::default(), }; let msock = MagicSock::spawn(opts).await?; Ok(msock) @@ -3165,7 +2073,7 @@ mod tests { let msock_1 = magicsock_ep(secret_key_1.clone()).await.unwrap(); - // Generate an address not present in the EndpointMap. + // Generate an address not present in the RemoteMap. let bad_addr = EndpointIdMappedAddr::generate(); // 500ms is rather fast here. Running this locally it should always be the correct @@ -3218,15 +2126,11 @@ mod tests { .into_iter() .map(|x| TransportAddr::Ip(x.addr)); let endpoint_addr_2 = EndpointAddr::from_parts(endpoint_id_2, addrs); - msock_1 - .add_endpoint_addr( - endpoint_addr_2, - Source::NamedApp { - name: "test".into(), - }, - ) + let addr = msock_1 + .resolve_remote(endpoint_addr_2) + .await + .unwrap() .unwrap(); - let addr = msock_1.get_mapping_addr(endpoint_id_2).unwrap(); let res = tokio::time::timeout( Duration::from_secs(10), magicsock_connect( @@ -3286,16 +2190,15 @@ mod tests { }); let _accept_task = AbortOnDropHandle::new(accept_task); - // Add an empty entry in the EndpointMap of ep_1 - msock_1.endpoint_map.add_endpoint_addr( - EndpointAddr::from_parts(endpoint_id_2, []), - Source::NamedApp { - name: "test".into(), - }, - true, - &msock_1.metrics.magicsock, + // Add an entry in the RemoteMap of ep_1 with an invalid socket address + let empty_addr_2 = EndpointAddr::from_parts( + endpoint_id_2, + [TransportAddr::Ip( + // Reserved IP range for documentation (unreachable) + SocketAddrV4::new([192, 0, 2, 1].into(), 12345).into(), + )], ); - let addr_2 = msock_1.get_mapping_addr(endpoint_id_2).unwrap(); + let addr_2 = msock_1.resolve_remote(empty_addr_2).await.unwrap().unwrap(); // Set a low max_idle_timeout so quinn gives up on this quickly and our test does // not take forever. You need to check the log output to verify this is really @@ -3321,19 +2224,20 @@ mod tests { info!("first connect timed out as expected"); // Provide correct addressing information - let addrs = msock_2 - .ip_addrs() - .get() - .into_iter() - .map(|x| TransportAddr::Ip(x.addr)); - msock_1.endpoint_map.add_endpoint_addr( - EndpointAddr::from_parts(endpoint_id_2, addrs), - Source::NamedApp { - name: "test".into(), - }, - true, - &msock_1.metrics.magicsock, + let correct_addr_2 = EndpointAddr::from_parts( + endpoint_id_2, + msock_2 + .ip_addrs() + .get() + .into_iter() + .map(|x| TransportAddr::Ip(x.addr)), ); + let addr_2a = msock_1 + .resolve_remote(correct_addr_2) + .await + .unwrap() + .unwrap(); + assert_eq!(addr_2, addr_2a); // We can now connect tokio::time::timeout(Duration::from_secs(10), async move { @@ -3357,66 +2261,6 @@ mod tests { .expect("connection timed out"); // TODO: could remove the addresses again, send, add it back and see it recover. - // But we don't have that much private access to the EndpointMap. This will do for now. - } - - #[tokio::test] - async fn test_add_endpoint_addr() -> Result { - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let stack = MagicStack::new(&mut rng, RelayMode::Default).await; - - assert_eq!(stack.endpoint.magic_sock().endpoint_map.endpoint_count(), 0); - - // Empty - let empty_addr = EndpointAddr::new(SecretKey::generate(&mut rng).public()); - - let err = stack - .endpoint - .magic_sock() - .add_endpoint_addr(empty_addr, endpoint_map::Source::App) - .unwrap_err(); - assert!( - err.to_string() - .to_lowercase() - .contains("empty addressing info") - ); - - // relay url only - let addr = EndpointAddr::from_parts( - SecretKey::generate(&mut rng).public(), - [TransportAddr::Relay("http://my-relay.com".parse().unwrap())], - ); - stack - .endpoint - .magic_sock() - .add_endpoint_addr(addr, endpoint_map::Source::App)?; - assert_eq!(stack.endpoint.magic_sock().endpoint_map.endpoint_count(), 1); - - // addrs only - let addr = EndpointAddr::from_parts( - SecretKey::generate(&mut rng).public(), - [TransportAddr::Ip("127.0.0.1:1234".parse().unwrap())], - ); - stack - .endpoint - .magic_sock() - .add_endpoint_addr(addr, endpoint_map::Source::App)?; - assert_eq!(stack.endpoint.magic_sock().endpoint_map.endpoint_count(), 2); - - // both - let addr = EndpointAddr::from_parts( - SecretKey::generate(&mut rng).public(), - [ - TransportAddr::Relay("http://my-relay.com".parse().unwrap()), - TransportAddr::Ip("127.0.0.1:1234".parse().unwrap()), - ], - ); - stack - .endpoint - .magic_sock() - .add_endpoint_addr(addr, endpoint_map::Source::App)?; - assert_eq!(stack.endpoint.magic_sock().endpoint_map.endpoint_count(), 3); - - Ok(()) + // But we don't have that much private access to the RemoteMap. This will do for now. } } diff --git a/iroh/src/magicsock/endpoint_map.rs b/iroh/src/magicsock/endpoint_map.rs deleted file mode 100644 index e23da332321..00000000000 --- a/iroh/src/magicsock/endpoint_map.rs +++ /dev/null @@ -1,956 +0,0 @@ -use std::{ - collections::{BTreeSet, HashMap, hash_map::Entry}, - hash::Hash, - net::{IpAddr, SocketAddr}, - sync::Mutex, - time::Duration, -}; - -use iroh_base::{EndpointAddr, EndpointId, PublicKey, RelayUrl}; -use n0_future::time::Instant; -use serde::{Deserialize, Serialize}; -use tracing::{debug, info, instrument, trace, warn}; - -use self::endpoint_state::{EndpointState, Options, PingHandled}; -use super::{ActorMessage, EndpointIdMappedAddr, metrics::Metrics, transports}; -use crate::disco::{CallMeMaybe, Pong, SendAddr, TransactionId}; -#[cfg(any(test, feature = "test-utils"))] -use crate::endpoint::PathSelection; - -mod endpoint_state; -mod path_state; -mod path_validity; -mod udp_paths; - -pub use endpoint_state::{ConnectionType, ControlMsg, DirectAddrInfo}; -pub(super) use endpoint_state::{DiscoPingPurpose, PingAction, PingRole, RemoteInfo, SendPing}; - -/// Number of endpoints that are inactive for which we keep info about. This limit is enforced -/// periodically via [`EndpointMap::prune_inactive`]. -const MAX_INACTIVE_ENDPOINTS: usize = 30; - -/// Map of the [`EndpointState`] information for all the known endpoints. -/// -/// The endpoints can be looked up by: -/// -/// - The endpoint's ID in this map, only useful if you know the ID from an insert or lookup. -/// This is static and never changes. -/// -/// - The [`EndpointIdMappedAddr`] which internally identifies the endpoint to the QUIC stack. This -/// is static and never changes. -/// -/// - The endpoints's public key, aka `PublicKey` or "endpoint_key". This is static and never changes, -/// however an endpoint could be added when this is not yet known. -/// -/// - A public socket address on which they are reachable on the internet, known as ip-port. -/// These come and go as the endpoint moves around on the internet -/// -/// An index of endpointInfos by endpoint key, EndpointIdMappedAddr, and discovered ip:port endpoints. -#[derive(Debug, Default)] -pub(super) struct EndpointMap { - inner: Mutex, -} - -#[derive(Default, Debug)] -pub(super) struct EndpointMapInner { - by_endpoint_key: HashMap, - by_ip_port: HashMap, - by_quic_mapped_addr: HashMap, - by_id: HashMap, - next_id: usize, - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection, -} - -/// Identifier to look up a [`EndpointState`] in the [`EndpointMap`]. -/// -/// You can look up entries in [`EndpointMap`] with various keys, depending on the context you -/// have for the endpoint. These are all the keys the [`EndpointMap`] can use. -#[derive(Debug, Clone)] -enum EndpointStateKey { - Idx(usize), - EndpointId(EndpointId), - EndpointIdMappedAddr(EndpointIdMappedAddr), - IpPort(IpPort), -} - -/// The origin or *source* through which an address associated with a remote endpoint -/// was discovered. -/// -/// An aggregate of the [`Source`]s of all the addresses of an endpoint describe the -/// [`Source`]s of the endpoint itself. -/// -/// A [`Source`] helps track how and where an address was learned. Multiple -/// sources can be associated with a single address, if we have discovered this -/// address through multiple means. -/// -/// Each time a [`EndpointAddr`] is added to the endpoint map a [`Source`] must be supplied to indicate -/// how the address was obtained. -/// -/// A [`Source`] can describe a variety of places that an address or endpoint was -/// discovered, such as a configured discovery service, the network itself -/// (if another endpoint has reached out to us), or as a user supplied [`EndpointAddr`]. - -#[derive(Serialize, Deserialize, strum::Display, Debug, Clone, Eq, PartialEq, Hash)] -#[strum(serialize_all = "kebab-case")] -pub enum Source { - /// Address was loaded from the fs. - Saved, - /// An endpoint communicated with us first via UDP. - Udp, - /// An endpoint communicated with us first via relay. - Relay, - /// Application layer added the address directly. - App, - /// The address was discovered by a discovery service. - #[strum(serialize = "{name}")] - Discovery { - /// The name of the discovery service that discovered the address. - name: String, - }, - /// Application layer with a specific name added the endpoint directly. - #[strum(serialize = "{name}")] - NamedApp { - /// The name of the application that added the endpoint - name: String, - }, -} - -impl EndpointMap { - /// Create a new [`EndpointMap`] from a list of [`EndpointAddr`]s. - pub(super) fn load_from_vec( - endpoints: Vec, - #[cfg(any(test, feature = "test-utils"))] path_selection: PathSelection, - have_ipv6: bool, - metrics: &Metrics, - ) -> Self { - Self::from_inner(EndpointMapInner::load_from_vec( - endpoints, - #[cfg(any(test, feature = "test-utils"))] - path_selection, - have_ipv6, - metrics, - )) - } - - fn from_inner(inner: EndpointMapInner) -> Self { - Self { - inner: Mutex::new(inner), - } - } - - /// Add the contact information for an endpoint. - pub(super) fn add_endpoint_addr( - &self, - endpoint_addr: EndpointAddr, - source: Source, - have_v6: bool, - metrics: &Metrics, - ) { - self.inner.lock().expect("poisoned").add_endpoint_addr( - endpoint_addr, - source, - have_v6, - metrics, - ) - } - - /// Number of endpoints currently listed. - pub(super) fn endpoint_count(&self) -> usize { - self.inner.lock().expect("poisoned").endpoint_count() - } - - #[cfg(not(wasm_browser))] - pub(super) fn receive_udp( - &self, - udp_addr: SocketAddr, - ) -> Option<(PublicKey, EndpointIdMappedAddr)> { - self.inner.lock().expect("poisoned").receive_udp(udp_addr) - } - - pub(super) fn receive_relay( - &self, - relay_url: &RelayUrl, - src: EndpointId, - ) -> EndpointIdMappedAddr { - self.inner - .lock() - .expect("poisoned") - .receive_relay(relay_url, src) - } - - pub(super) fn notify_ping_sent( - &self, - id: usize, - dst: SendAddr, - tx_id: TransactionId, - purpose: DiscoPingPurpose, - msg_sender: tokio::sync::mpsc::Sender, - ) { - if let Some(ep) = self - .inner - .lock() - .expect("poisoned") - .get_mut(EndpointStateKey::Idx(id)) - { - ep.ping_sent(dst, tx_id, purpose, msg_sender); - } - } - - pub(super) fn notify_ping_timeout(&self, id: usize, tx_id: TransactionId, metrics: &Metrics) { - if let Some(ep) = self - .inner - .lock() - .expect("poisoned") - .get_mut(EndpointStateKey::Idx(id)) - { - ep.ping_timeout(tx_id, Instant::now(), metrics); - } - } - - pub(super) fn get_quic_mapped_addr_for_endpoint_key( - &self, - endpoint_key: EndpointId, - ) -> Option { - self.inner - .lock() - .expect("poisoned") - .get(EndpointStateKey::EndpointId(endpoint_key)) - .map(|ep| *ep.quic_mapped_addr()) - } - - /// Insert a received ping into the endpoint map, and return whether a ping with this tx_id was already - /// received. - pub(super) fn handle_ping( - &self, - sender: PublicKey, - src: SendAddr, - tx_id: TransactionId, - ) -> PingHandled { - self.inner - .lock() - .expect("poisoned") - .handle_ping(sender, src, tx_id) - } - - pub(super) fn handle_pong( - &self, - sender: PublicKey, - src: &transports::Addr, - pong: Pong, - metrics: &Metrics, - ) { - self.inner - .lock() - .expect("poisoned") - .handle_pong(sender, src, pong, metrics) - } - - #[must_use = "actions must be handled"] - pub(super) fn handle_call_me_maybe( - &self, - sender: PublicKey, - cm: CallMeMaybe, - metrics: &Metrics, - ) -> Vec { - self.inner - .lock() - .expect("poisoned") - .handle_call_me_maybe(sender, cm, metrics) - } - - #[allow(clippy::type_complexity)] - pub(super) fn get_send_addrs( - &self, - addr: EndpointIdMappedAddr, - have_ipv6: bool, - metrics: &Metrics, - ) -> Option<( - PublicKey, - Option, - Option, - Vec, - )> { - let mut inner = self.inner.lock().expect("poisoned"); - let ep = inner.get_mut(EndpointStateKey::EndpointIdMappedAddr(addr))?; - let public_key = *ep.public_key(); - trace!(dest = %addr, endpoint_id = %public_key.fmt_short(), "dst mapped to EndpointId"); - let (udp_addr, relay_url, ping_actions) = ep.get_send_addrs(have_ipv6, metrics); - Some((public_key, udp_addr, relay_url, ping_actions)) - } - - pub(super) fn reset_endpoint_states(&self, metrics: &Metrics) { - let now = Instant::now(); - let mut inner = self.inner.lock().expect("poisoned"); - for (_, ep) in inner.endpoint_states_mut() { - ep.note_connectivity_change(now, metrics); - } - } - - pub(super) fn endpoints_stayin_alive(&self, have_ipv6: bool) -> Vec { - let mut inner = self.inner.lock().expect("poisoned"); - inner - .endpoint_states_mut() - .flat_map(|(_idx, endpoint_state)| endpoint_state.stayin_alive(have_ipv6)) - .collect() - } - - /// Returns the [`RemoteInfo`]s for each endpoint in the endpoint map. - #[cfg(test)] - pub(super) fn list_remote_infos(&self, now: Instant) -> Vec { - // NOTE: calls to this method will often call `into_iter` (or similar methods). Note that - // we can't avoid `collect` here since it would hold a lock for an indefinite time. Even if - // we were to find this acceptable, dealing with the lifetimes of the mutex's guard and the - // internal iterator will be a hassle, if possible at all. - self.inner - .lock() - .expect("poisoned") - .remote_infos_iter(now) - .collect() - } - - /// Returns a [`n0_watcher::Direct`] for given endpoint's [`ConnectionType`]. - /// - /// # Errors - /// - /// Will return `None` if there is not an entry in the [`EndpointMap`] for - /// the `endpoint_id` - pub(super) fn conn_type( - &self, - endpoint_id: EndpointId, - ) -> Option> { - self.inner.lock().expect("poisoned").conn_type(endpoint_id) - } - - pub(super) fn latency(&self, endpoint_id: EndpointId) -> Option { - self.inner.lock().expect("poisoned").latency(endpoint_id) - } - - /// Get the [`RemoteInfo`]s for the endpoint identified by [`EndpointId`]. - pub(super) fn remote_info(&self, endpoint_id: EndpointId) -> Option { - self.inner - .lock() - .expect("poisoned") - .remote_info(endpoint_id) - } - - /// Prunes endpoints without recent activity so that at most [`MAX_INACTIVE_ENDPOINTS`] are kept. - pub(super) fn prune_inactive(&self) { - self.inner.lock().expect("poisoned").prune_inactive(); - } - - pub(crate) fn on_direct_addr_discovered(&self, discovered: BTreeSet) { - self.inner - .lock() - .expect("poisoned") - .on_direct_addr_discovered(discovered, Instant::now()); - } -} - -impl EndpointMapInner { - /// Create a new [`EndpointMap`] from a list of [`EndpointAddr`]s. - fn load_from_vec( - endpoints: Vec, - #[cfg(any(test, feature = "test-utils"))] path_selection: PathSelection, - have_ipv6: bool, - metrics: &Metrics, - ) -> Self { - let mut me = Self { - #[cfg(any(test, feature = "test-utils"))] - path_selection, - ..Default::default() - }; - for endpoint_addr in endpoints { - me.add_endpoint_addr(endpoint_addr, Source::Saved, have_ipv6, metrics); - } - me - } - - /// Add the contact information for an endpoint. - #[instrument(skip_all, fields(endpoint = %endpoint_addr.id.fmt_short()))] - fn add_endpoint_addr( - &mut self, - endpoint_addr: EndpointAddr, - source: Source, - have_ipv6: bool, - metrics: &Metrics, - ) { - let source0 = source.clone(); - let endpoint_id = endpoint_addr.id; - let relay_url = endpoint_addr.relay_urls().next().cloned(); - #[cfg(any(test, feature = "test-utils"))] - let path_selection = self.path_selection; - let endpoint_state = - self.get_or_insert_with(EndpointStateKey::EndpointId(endpoint_id), || Options { - endpoint_id, - relay_url, - active: false, - source, - #[cfg(any(test, feature = "test-utils"))] - path_selection, - }); - endpoint_state.update_from_endpoint_addr( - endpoint_addr.relay_urls().next(), - endpoint_addr.ip_addrs().copied(), - source0, - have_ipv6, - metrics, - ); - let id = endpoint_state.id(); - for addr in endpoint_addr.ip_addrs() { - self.set_endpoint_state_for_ip_port(*addr, id); - } - } - - /// Prunes direct addresses from endpoints that claim to share an address we know points to us. - pub(super) fn on_direct_addr_discovered( - &mut self, - discovered: BTreeSet, - now: Instant, - ) { - for addr in discovered { - self.remove_by_ipp(addr.into(), now, "matches our local addr") - } - } - - /// Removes a direct address from an endpoint. - fn remove_by_ipp(&mut self, ipp: IpPort, now: Instant, why: &'static str) { - if let Some(id) = self.by_ip_port.remove(&ipp) { - if let Entry::Occupied(mut entry) = self.by_id.entry(id) { - let endpoint = entry.get_mut(); - endpoint.remove_direct_addr(&ipp, now, why); - if endpoint.ip_addrs().count() == 0 { - let endpoint_id = endpoint.public_key(); - let mapped_addr = endpoint.quic_mapped_addr(); - self.by_endpoint_key.remove(endpoint_id); - self.by_quic_mapped_addr.remove(mapped_addr); - debug!(endpoint_id=%endpoint_id.fmt_short(), why, "removing endpoint"); - entry.remove(); - } - } - } - } - - fn get_id(&self, id: EndpointStateKey) -> Option { - match id { - EndpointStateKey::Idx(id) => Some(id), - EndpointStateKey::EndpointId(endpoint_key) => { - self.by_endpoint_key.get(&endpoint_key).copied() - } - EndpointStateKey::EndpointIdMappedAddr(addr) => { - self.by_quic_mapped_addr.get(&addr).copied() - } - EndpointStateKey::IpPort(ipp) => self.by_ip_port.get(&ipp).copied(), - } - } - - fn get_mut(&mut self, id: EndpointStateKey) -> Option<&mut EndpointState> { - self.get_id(id).and_then(|id| self.by_id.get_mut(&id)) - } - - fn get(&self, id: EndpointStateKey) -> Option<&EndpointState> { - self.get_id(id).and_then(|id| self.by_id.get(&id)) - } - - fn get_or_insert_with( - &mut self, - id: EndpointStateKey, - f: impl FnOnce() -> Options, - ) -> &mut EndpointState { - let id = self.get_id(id); - match id { - None => self.insert_endpoint(f()), - Some(id) => self.by_id.get_mut(&id).expect("is not empty"), - } - } - - /// Number of endpoints currently listed. - fn endpoint_count(&self) -> usize { - self.by_id.len() - } - - /// Marks the endpoint we believe to be at `ipp` as recently used. - #[cfg(not(wasm_browser))] - fn receive_udp(&mut self, udp_addr: SocketAddr) -> Option<(EndpointId, EndpointIdMappedAddr)> { - let ip_port: IpPort = udp_addr.into(); - let Some(endpoint_state) = self.get_mut(EndpointStateKey::IpPort(ip_port)) else { - trace!(src=%udp_addr, "receive_udp: no endpoint_state found for addr, ignore"); - return None; - }; - endpoint_state.receive_udp(ip_port, Instant::now()); - Some(( - *endpoint_state.public_key(), - *endpoint_state.quic_mapped_addr(), - )) - } - - #[instrument(skip_all, fields(src = %src.fmt_short()))] - fn receive_relay(&mut self, relay_url: &RelayUrl, src: EndpointId) -> EndpointIdMappedAddr { - #[cfg(any(test, feature = "test-utils"))] - let path_selection = self.path_selection; - let endpoint_state = self.get_or_insert_with(EndpointStateKey::EndpointId(src), || { - trace!("packets from unknown endpoint, insert into endpoint map"); - Options { - endpoint_id: src, - relay_url: Some(relay_url.clone()), - active: true, - source: Source::Relay, - #[cfg(any(test, feature = "test-utils"))] - path_selection, - } - }); - endpoint_state.receive_relay(relay_url, src, Instant::now()); - *endpoint_state.quic_mapped_addr() - } - - #[cfg(test)] - fn endpoint_states(&self) -> impl Iterator { - self.by_id.iter() - } - - fn endpoint_states_mut(&mut self) -> impl Iterator { - self.by_id.iter_mut() - } - - /// Get the [`RemoteInfo`]s for all endpoints. - #[cfg(test)] - fn remote_infos_iter(&self, now: Instant) -> impl Iterator + '_ { - self.endpoint_states().map(move |(_, ep)| ep.info(now)) - } - - /// Get the [`RemoteInfo`]s for each endpoint. - fn remote_info(&self, endpoint_id: EndpointId) -> Option { - self.get(EndpointStateKey::EndpointId(endpoint_id)) - .map(|ep| ep.info(Instant::now())) - } - - /// Returns a stream of [`ConnectionType`]. - /// - /// Sends the current [`ConnectionType`] whenever any changes to the - /// connection type for `public_key` has occurred. - /// - /// # Errors - /// - /// Will return `None` if there is not an entry in the [`EndpointMap`] for - /// the `public_key` - fn conn_type(&self, endpoint_id: EndpointId) -> Option> { - self.get(EndpointStateKey::EndpointId(endpoint_id)) - .map(|ep| ep.conn_type()) - } - - fn latency(&self, endpoint_id: EndpointId) -> Option { - self.get(EndpointStateKey::EndpointId(endpoint_id)) - .and_then(|ep| ep.latency()) - } - - fn handle_pong( - &mut self, - sender: EndpointId, - src: &transports::Addr, - pong: Pong, - metrics: &Metrics, - ) { - if let Some(ns) = self.get_mut(EndpointStateKey::EndpointId(sender)).as_mut() { - let insert = ns.handle_pong(&pong, src.clone().into(), metrics); - if let Some((src, key)) = insert { - self.set_endpoint_key_for_ip_port(src, &key); - } - trace!(?insert, "received pong") - } else { - warn!("received pong: endpoint unknown, ignore") - } - } - - #[must_use = "actions must be handled"] - fn handle_call_me_maybe( - &mut self, - sender: EndpointId, - cm: CallMeMaybe, - metrics: &Metrics, - ) -> Vec { - let ns_id = EndpointStateKey::EndpointId(sender); - if let Some(id) = self.get_id(ns_id.clone()) { - for number in &cm.my_numbers { - // ensure the new addrs are known - self.set_endpoint_state_for_ip_port(*number, id); - } - } - match self.get_mut(ns_id) { - None => { - debug!("received call-me-maybe: ignore, endpoint is unknown"); - metrics.recv_disco_call_me_maybe_bad_disco.inc(); - vec![] - } - Some(ns) => { - debug!(endpoints = ?cm.my_numbers, "received call-me-maybe"); - - ns.handle_call_me_maybe(cm, metrics) - } - } - } - - fn handle_ping( - &mut self, - sender: EndpointId, - src: SendAddr, - tx_id: TransactionId, - ) -> PingHandled { - #[cfg(any(test, feature = "test-utils"))] - let path_selection = self.path_selection; - let endpoint_state = self.get_or_insert_with(EndpointStateKey::EndpointId(sender), || { - debug!("received ping: endpoint unknown, add to endpoint map"); - let source = if src.is_relay() { - Source::Relay - } else { - Source::Udp - }; - Options { - endpoint_id: sender, - relay_url: src.relay_url(), - active: true, - source, - #[cfg(any(test, feature = "test-utils"))] - path_selection, - } - }); - - let handled = endpoint_state.handle_ping(src.clone(), tx_id); - if let SendAddr::Udp(ref addr) = src { - if matches!(handled.role, PingRole::NewPath) { - self.set_endpoint_key_for_ip_port(*addr, &sender); - } - } - handled - } - - /// Inserts a new endpoint into the [`EndpointMap`]. - fn insert_endpoint(&mut self, options: Options) -> &mut EndpointState { - info!( - endpoint = %options.endpoint_id.fmt_short(), - relay_url = ?options.relay_url, - source = %options.source, - "inserting new endpoint in EndpointMap", - ); - let id = self.next_id; - self.next_id = self.next_id.wrapping_add(1); - let endpoint_state = EndpointState::new(id, options); - - // update indices - self.by_quic_mapped_addr - .insert(*endpoint_state.quic_mapped_addr(), id); - self.by_endpoint_key - .insert(*endpoint_state.public_key(), id); - - self.by_id.insert(id, endpoint_state); - self.by_id.get_mut(&id).expect("just inserted") - } - - /// Makes future endpoint lookups by ipp return the same endpoint as a lookup by nk. - /// - /// This should only be called with a fully verified mapping of ipp to - /// nk, because calling this function defines the endpoint we hand to - /// WireGuard for packets received from ipp. - fn set_endpoint_key_for_ip_port(&mut self, ipp: impl Into, nk: &PublicKey) { - let ipp = ipp.into(); - if let Some(id) = self.by_ip_port.get(&ipp) { - if !self.by_endpoint_key.contains_key(nk) { - self.by_endpoint_key.insert(*nk, *id); - } - self.by_ip_port.remove(&ipp); - } - if let Some(id) = self.by_endpoint_key.get(nk) { - trace!("insert ip -> id: {:?} -> {}", ipp, id); - self.by_ip_port.insert(ipp, *id); - } - } - - fn set_endpoint_state_for_ip_port(&mut self, ipp: impl Into, id: usize) { - let ipp = ipp.into(); - trace!(?ipp, ?id, "set endpoint for ip:port"); - self.by_ip_port.insert(ipp, id); - } - - /// Prunes endpoints without recent activity so that at most [`MAX_INACTIVE_ENDPOINTS`] are kept. - fn prune_inactive(&mut self) { - let now = Instant::now(); - let mut prune_candidates: Vec<_> = self - .by_id - .values() - .filter(|endpoint| !endpoint.is_active(&now)) - .map(|endpoint| (*endpoint.public_key(), endpoint.last_used())) - .collect(); - - let prune_count = prune_candidates - .len() - .saturating_sub(MAX_INACTIVE_ENDPOINTS); - if prune_count == 0 { - // within limits - return; - } - - prune_candidates.sort_unstable_by_key(|(_pk, last_used)| *last_used); - prune_candidates.truncate(prune_count); - for (public_key, last_used) in prune_candidates.into_iter() { - let endpoint = public_key.fmt_short(); - match last_used.map(|instant| instant.elapsed()) { - Some(last_used) => trace!(%endpoint, ?last_used, "pruning inactive"), - None => trace!(%endpoint, last_used=%"never", "pruning inactive"), - } - - let Some(id) = self.by_endpoint_key.remove(&public_key) else { - debug_assert!(false, "missing by_endpoint_key entry for pk in by_id"); - continue; - }; - - let Some(ep) = self.by_id.remove(&id) else { - debug_assert!(false, "missing by_id entry for id in by_endpoint_key"); - continue; - }; - - for ip_port in ep.ip_addrs() { - self.by_ip_port.remove(&ip_port); - } - - self.by_quic_mapped_addr.remove(ep.quic_mapped_addr()); - } - } -} - -/// An (Ip, Port) pair. -/// -/// NOTE: storing an [`IpPort`] is safer than storing a [`SocketAddr`] because for IPv6 socket -/// addresses include fields that can't be assumed consistent even within a single connection. -#[derive(Debug, derive_more::Display, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[display("{}", SocketAddr::from(*self))] -pub struct IpPort { - ip: IpAddr, - port: u16, -} - -impl From for IpPort { - fn from(socket_addr: SocketAddr) -> Self { - Self { - ip: socket_addr.ip(), - port: socket_addr.port(), - } - } -} - -impl From for SocketAddr { - fn from(ip_port: IpPort) -> Self { - let IpPort { ip, port } = ip_port; - (ip, port).into() - } -} - -impl IpPort { - pub fn ip(&self) -> &IpAddr { - &self.ip - } - - pub fn port(&self) -> u16 { - self.port - } -} - -#[cfg(test)] -mod tests { - use std::net::Ipv4Addr; - - use iroh_base::{SecretKey, TransportAddr}; - use rand::SeedableRng; - use tracing_test::traced_test; - - use super::{endpoint_state::MAX_INACTIVE_DIRECT_ADDRESSES, *}; - - impl EndpointMap { - #[track_caller] - fn add_test_addr(&self, endpoint_addr: EndpointAddr) { - self.add_endpoint_addr( - endpoint_addr, - Source::NamedApp { - name: "test".into(), - }, - true, - &Default::default(), - ) - } - } - - /// Test persisting and loading of known endpoints. - #[tokio::test] - #[traced_test] - async fn restore_from_vec() { - let endpoint_map = EndpointMap::default(); - - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let endpoint_a = SecretKey::generate(&mut rng).public(); - let endpoint_b = SecretKey::generate(&mut rng).public(); - let endpoint_c = SecretKey::generate(&mut rng).public(); - let endpoint_d = SecretKey::generate(&mut rng).public(); - - let relay_x: RelayUrl = "https://my-relay-1.com".parse().unwrap(); - let relay_y: RelayUrl = "https://my-relay-2.com".parse().unwrap(); - - let ip_addresses_a = [TransportAddr::Ip(addr(4000)), TransportAddr::Ip(addr(4001))]; - let ip_addresses_c = [TransportAddr::Ip(addr(5000))]; - - let addrs_a = std::iter::once(TransportAddr::Relay(relay_x)).chain(ip_addresses_a); - let endpoint_addr_a = EndpointAddr::new(endpoint_a).with_addrs(addrs_a); - let endpoint_addr_b = EndpointAddr::new(endpoint_b).with_relay_url(relay_y); - let endpoint_addr_c = EndpointAddr::new(endpoint_c).with_addrs(ip_addresses_c); - let endpoint_addr_d = EndpointAddr::new(endpoint_d); - - endpoint_map.add_test_addr(endpoint_addr_a); - endpoint_map.add_test_addr(endpoint_addr_b); - endpoint_map.add_test_addr(endpoint_addr_c); - endpoint_map.add_test_addr(endpoint_addr_d); - - let mut addrs: Vec = endpoint_map - .list_remote_infos(Instant::now()) - .into_iter() - .filter_map(|info| { - let addr: EndpointAddr = info.into(); - if addr.is_empty() { - return None; - } - Some(addr) - }) - .collect(); - let loaded_endpoint_map = EndpointMap::load_from_vec( - addrs.clone(), - PathSelection::default(), - true, - &Default::default(), - ); - - let mut loaded: Vec = loaded_endpoint_map - .list_remote_infos(Instant::now()) - .into_iter() - .filter_map(|info| { - let addr: EndpointAddr = info.into(); - if addr.is_empty() { - return None; - } - Some(addr) - }) - .collect(); - - loaded.sort_unstable(); - addrs.sort_unstable(); - - // compare the endpoint maps via their known endpoints - assert_eq!(addrs, loaded); - } - - fn addr(port: u16) -> SocketAddr { - (std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), port).into() - } - - #[test] - #[traced_test] - fn test_prune_direct_addresses() { - let endpoint_map = EndpointMap::default(); - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - let public_key = SecretKey::generate(&mut rng).public(); - let id = endpoint_map - .inner - .lock() - .unwrap() - .insert_endpoint(Options { - endpoint_id: public_key, - relay_url: None, - active: false, - source: Source::NamedApp { - name: "test".into(), - }, - path_selection: PathSelection::default(), - }) - .id(); - - const LOCALHOST: IpAddr = IpAddr::V4(std::net::Ipv4Addr::LOCALHOST); - - // add [`MAX_INACTIVE_DIRECT_ADDRESSES`] active direct addresses and double - // [`MAX_INACTIVE_DIRECT_ADDRESSES`] that are inactive - - info!("Adding active addresses"); - for i in 0..MAX_INACTIVE_DIRECT_ADDRESSES { - let addr = SocketAddr::new(LOCALHOST, 5000 + i as u16); - let endpoint_addr = EndpointAddr::new(public_key).with_ip_addr(addr); - // add address - endpoint_map.add_test_addr(endpoint_addr); - // make it active - endpoint_map.inner.lock().unwrap().receive_udp(addr); - } - - info!("Adding offline/inactive addresses"); - for i in 0..MAX_INACTIVE_DIRECT_ADDRESSES * 2 { - let addr = SocketAddr::new(LOCALHOST, 6000 + i as u16); - let endpoint_addr = EndpointAddr::new(public_key).with_ip_addr(addr); - endpoint_map.add_test_addr(endpoint_addr); - } - - let mut endpoint_map_inner = endpoint_map.inner.lock().unwrap(); - let endpoint = endpoint_map_inner.by_id.get_mut(&id).unwrap(); - - info!("Adding alive addresses"); - for i in 0..MAX_INACTIVE_DIRECT_ADDRESSES { - let addr = SendAddr::Udp(SocketAddr::new(LOCALHOST, 7000 + i as u16)); - let txid = TransactionId::from([i as u8; 12]); - // Note that this already invokes .prune_direct_addresses() because these are - // new UDP paths. - endpoint.handle_ping(addr, txid); - } - - info!("Pruning addresses"); - endpoint.prune_direct_addresses(Instant::now()); - - // Half the offline addresses should have been pruned. All the active and alive - // addresses should have been kept. - assert_eq!( - endpoint.ip_addrs().count(), - MAX_INACTIVE_DIRECT_ADDRESSES * 3 - ); - - // We should have both offline and alive addresses which are not active. - assert_eq!( - endpoint - .ip_addr_states() - .filter(|(_addr, state)| !state.is_active()) - .count(), - MAX_INACTIVE_DIRECT_ADDRESSES * 2 - ) - } - - #[test] - fn test_prune_inactive() { - let endpoint_map = EndpointMap::default(); - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - - // add one active endpoint and more than MAX_INACTIVE_ENDPOINTS inactive endpoints - let active_endpoint = SecretKey::generate(&mut rng).public(); - let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 167); - endpoint_map.add_test_addr(EndpointAddr::new(active_endpoint).with_ip_addr(addr)); - endpoint_map - .inner - .lock() - .unwrap() - .receive_udp(addr) - .expect("registered"); - - for _ in 0..MAX_INACTIVE_ENDPOINTS + 1 { - let endpoint = SecretKey::generate(&mut rng).public(); - endpoint_map.add_test_addr(EndpointAddr::new(endpoint)); - } - - assert_eq!(endpoint_map.endpoint_count(), MAX_INACTIVE_ENDPOINTS + 2); - endpoint_map.prune_inactive(); - assert_eq!(endpoint_map.endpoint_count(), MAX_INACTIVE_ENDPOINTS + 1); - endpoint_map - .inner - .lock() - .unwrap() - .get(EndpointStateKey::EndpointId(active_endpoint)) - .expect("should not be pruned"); - } -} diff --git a/iroh/src/magicsock/endpoint_map/endpoint_state.rs b/iroh/src/magicsock/endpoint_map/endpoint_state.rs deleted file mode 100644 index ab55c0a6093..00000000000 --- a/iroh/src/magicsock/endpoint_map/endpoint_state.rs +++ /dev/null @@ -1,1746 +0,0 @@ -use std::{ - collections::{BTreeSet, HashMap, btree_map::Entry}, - hash::Hash, - net::{IpAddr, SocketAddr}, - sync::atomic::AtomicBool, -}; - -use data_encoding::HEXLOWER; -use iroh_base::{EndpointAddr, EndpointId, PublicKey, RelayUrl, TransportAddr}; -use n0_future::{ - task::{self, AbortOnDropHandle}, - time::{self, Duration, Instant}, -}; -use n0_watcher::Watchable; -use serde::{Deserialize, Serialize}; -use tokio::sync::mpsc; -use tracing::{Level, debug, event, info, instrument, trace, warn}; - -use super::{ - IpPort, Source, - path_state::{PathState, summarize_endpoint_paths}, - udp_paths::{EndpointUdpPaths, UdpSendAddr}, -}; -#[cfg(any(test, feature = "test-utils"))] -use crate::endpoint::PathSelection; -use crate::{ - disco::{self, SendAddr, TransactionId}, - magicsock::{ - ActorMessage, EndpointIdMappedAddr, HEARTBEAT_INTERVAL, MagicsockMetrics, - endpoint_map::path_validity::PathValidity, - }, -}; - -/// Number of addresses that are not active that we keep around per endpoint. -/// -/// See [`EndpointState::prune_direct_addresses`]. -pub(super) const MAX_INACTIVE_DIRECT_ADDRESSES: usize = 20; - -/// How long since an endpoint path was last alive before it might be pruned. -const LAST_ALIVE_PRUNE_DURATION: Duration = Duration::from_secs(120); - -/// How long we wait for a pong reply before assuming it's never coming. -const PING_TIMEOUT_DURATION: Duration = Duration::from_secs(5); - -/// The latency at or under which we don't try to upgrade to a better path. -const GOOD_ENOUGH_LATENCY: Duration = Duration::from_millis(5); - -/// How long since the last activity we try to keep an established endpoint peering alive. -/// It's also the idle time at which we stop doing QAD queries to keep NAT mappings alive. -pub(super) const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45); - -/// How often we try to upgrade to a better patheven if we have some non-relay route that works. -const UPGRADE_INTERVAL: Duration = Duration::from_secs(60); - -/// How long until we send a stayin alive ping -const STAYIN_ALIVE_MIN_ELAPSED: Duration = Duration::from_secs(2); - -#[derive(Debug)] -pub(in crate::magicsock) enum PingAction { - SendCallMeMaybe { - relay_url: RelayUrl, - dst_endpoint: EndpointId, - }, - SendPing(SendPing), -} - -#[derive(Debug)] -pub(in crate::magicsock) struct SendPing { - pub id: usize, - pub dst: SendAddr, - pub dst_endpoint: EndpointId, - pub tx_id: TransactionId, - pub purpose: DiscoPingPurpose, -} - -/// Indicating an [`EndpointState`] has handled a ping. -#[derive(Debug)] -pub struct PingHandled { - /// What this ping did to the [`EndpointState`]. - pub role: PingRole, - /// Whether the sender path should also be pinged. - /// - /// This is the case if an [`EndpointState`] does not yet have a direct path, i.e. it has no - /// best_addr. In this case we want to ping right back to open the direct path in this - /// direction as well. - pub needs_ping_back: Option, -} - -#[derive(Debug)] -pub enum PingRole { - Duplicate, - NewPath, - LikelyHeartbeat, - Activate, -} - -/// An iroh endpoint, which we can have connections with. -/// -/// The whole point of the magicsock is that we can have multiple **paths** to a particular -/// endpoint. One of these paths is via the endpoint's home relay endpoint but as we establish a -/// connection we'll hopefully discover more direct paths. -#[derive(Debug)] -pub(super) struct EndpointState { - /// The ID used as index in the [`EndpointMap`]. - /// - /// [`EndpointMap`]: super::EndpointMap - id: usize, - /// The UDP address used on the QUIC-layer to address this endpoint. - quic_mapped_addr: EndpointIdMappedAddr, - /// The global identifier for this endpoint. - endpoint_id: EndpointId, - /// The last time we pinged all endpoints. - last_full_ping: Option, - /// The url of relay endpoint that we can relay over to communicate. - /// - /// The fallback/bootstrap path, if non-zero (non-zero for well-behaved clients). - relay_url: Option<(RelayUrl, PathState)>, - udp_paths: EndpointUdpPaths, - sent_pings: HashMap, - /// Last time this endpoint was used. - /// - /// An endpoint is marked as in use when sending datagrams to them, or when having received - /// datagrams from it. Regardless of whether the datagrams are payload or DISCO, and whether - /// they go via UDP or the relay. - /// - /// Note that sending datagrams to an endpoint does not mean the endpoint receives them. - last_used: Option, - /// Last time we sent a call-me-maybe. - /// - /// When we do not have a direct connection and we try to send some data, we will try to - /// do a full ping + call-me-maybe. Usually each side only needs to send one - /// call-me-maybe to the other for holes to be punched in both directions however. So - /// we only try and send one per [`HEARTBEAT_INTERVAL`]. Each [`HEARTBEAT_INTERVAL`] - /// the [`EndpointState::stayin_alive`] function is called, which will trigger new - /// call-me-maybe messages as backup. - last_call_me_maybe: Option, - /// The type of connection we have to the endpoint, either direct, relay, mixed, or none. - conn_type: Watchable, - /// Whether the conn_type was ever observed to be `Direct` at some point. - /// - /// Used for metric reporting. - has_been_direct: AtomicBool, - /// Configuration for what path selection to use - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection, -} - -/// Options for creating a new [`EndpointState`]. -#[derive(Debug)] -pub(super) struct Options { - pub(super) endpoint_id: EndpointId, - pub(super) relay_url: Option, - /// Is this endpoint currently active (sending data)? - pub(super) active: bool, - pub(super) source: super::Source, - #[cfg(any(test, feature = "test-utils"))] - pub(super) path_selection: PathSelection, -} - -impl EndpointState { - pub(super) fn new(id: usize, options: Options) -> Self { - let quic_mapped_addr = EndpointIdMappedAddr::generate(); - - // TODO(frando): I don't think we need to track the `num_relay_conns_added` - // metric here. We do so in `Self::addr_for_send`. - // if options.relay_url.is_some() { - // // we potentially have a relay connection to the endpoint - // inc!(MagicsockMetrics, num_relay_conns_added); - // } - - let now = Instant::now(); - - EndpointState { - id, - quic_mapped_addr, - endpoint_id: options.endpoint_id, - last_full_ping: None, - relay_url: options.relay_url.map(|url| { - ( - url.clone(), - PathState::new( - options.endpoint_id, - SendAddr::Relay(url), - options.source, - now, - ), - ) - }), - udp_paths: EndpointUdpPaths::new(), - sent_pings: HashMap::new(), - last_used: options.active.then(Instant::now), - last_call_me_maybe: None, - conn_type: Watchable::new(ConnectionType::None), - has_been_direct: AtomicBool::new(false), - #[cfg(any(test, feature = "test-utils"))] - path_selection: options.path_selection, - } - } - - pub(super) fn public_key(&self) -> &PublicKey { - &self.endpoint_id - } - - pub(super) fn quic_mapped_addr(&self) -> &EndpointIdMappedAddr { - &self.quic_mapped_addr - } - - pub(super) fn id(&self) -> usize { - self.id - } - - pub(super) fn conn_type(&self) -> n0_watcher::Direct { - self.conn_type.watch() - } - - pub(super) fn latency(&self) -> Option { - match self.conn_type.get() { - ConnectionType::Direct(addr) => self - .udp_paths - .paths() - .get(&addr.into()) - .and_then(|state| state.latency()), - ConnectionType::Relay(ref url) => self - .relay_url - .as_ref() - .filter(|(relay_url, _)| relay_url == url) - .and_then(|(_, state)| state.latency()), - ConnectionType::Mixed(addr, ref url) => { - let addr_latency = self - .udp_paths - .paths() - .get(&addr.into()) - .and_then(|state| state.latency()); - let relay_latency = self - .relay_url - .as_ref() - .filter(|(relay_url, _)| relay_url == url) - .and_then(|(_, state)| state.latency()); - addr_latency.min(relay_latency) - } - ConnectionType::None => None, - } - } - - /// Returns info about this endpoint. - pub(super) fn info(&self, now: Instant) -> RemoteInfo { - let conn_type = self.conn_type.get(); - let latency = self.latency(); - - let addrs = self - .udp_paths - .paths() - .iter() - .map(|(addr, path_state)| DirectAddrInfo { - addr: SocketAddr::from(*addr), - latency: path_state.validity.latency(), - last_control: path_state.last_control_msg(now), - last_payload: path_state - .last_payload_msg - .as_ref() - .map(|instant| now.duration_since(*instant)), - last_alive: path_state - .last_alive() - .map(|instant| now.duration_since(instant)), - sources: path_state - .sources - .iter() - .map(|(source, instant)| (source.clone(), now.duration_since(*instant))) - .collect(), - }) - .collect(); - - RemoteInfo { - endpoint_id: self.endpoint_id, - relay_url: self.relay_url.clone().map(|r| r.into()), - addrs, - conn_type, - latency, - last_used: self.last_used.map(|instant| now.duration_since(instant)), - } - } - - /// Returns the relay url of this endpoint - pub(super) fn relay_url(&self) -> Option { - self.relay_url.as_ref().map(|(url, _state)| url.clone()) - } - - /// Returns the address(es) that should be used for sending the next packet. - /// - /// This may return to send on one, both or no paths. - fn addr_for_send( - &self, - have_ipv6: bool, - metrics: &MagicsockMetrics, - ) -> (Option, Option) { - #[cfg(any(test, feature = "test-utils"))] - if self.path_selection == PathSelection::RelayOnly { - debug!( - "in `RelayOnly` mode, giving the relay address as the only viable address for this endpoint" - ); - return (None, self.relay_url()); - } - let (best_addr, relay_url) = match self.udp_paths.send_addr(have_ipv6) { - UdpSendAddr::Valid(addr) => { - // If we have a valid address we use it. - trace!(%addr, ?have_ipv6, "UdpSendAddr is valid, use it"); - (Some(*addr), None) - } - UdpSendAddr::Outdated(addr) => { - // If the address is outdated we use it, but send via relay at the same time. - // We also send disco pings so that it will become valid again if it still - // works (i.e. we don't need to holepunch again). - trace!(%addr, ?have_ipv6, "UdpSendAddr is outdated, use it together with relay"); - (Some(*addr), self.relay_url()) - } - UdpSendAddr::Unconfirmed(addr) => { - trace!(%addr, ?have_ipv6, "UdpSendAddr is unconfirmed, use it together with relay"); - (Some(*addr), self.relay_url()) - } - UdpSendAddr::None => { - trace!(?have_ipv6, "No UdpSendAddr, use relay"); - (None, self.relay_url()) - } - }; - let typ = match (best_addr, relay_url.clone()) { - (Some(best_addr), Some(relay_url)) => ConnectionType::Mixed(best_addr, relay_url), - (Some(best_addr), None) => ConnectionType::Direct(best_addr), - (None, Some(relay_url)) => ConnectionType::Relay(relay_url), - (None, None) => ConnectionType::None, - }; - if matches!(&typ, ConnectionType::Direct(_)) { - let before = self - .has_been_direct - .swap(true, std::sync::atomic::Ordering::Relaxed); - if !before { - metrics.endpoints_contacted_directly.inc(); - } - } - if let Ok(prev_typ) = self.conn_type.set(typ.clone()) { - // The connection type has changed. - event!( - target: "iroh::_events::conn_type::changed", - Level::DEBUG, - remote_endpoint = %self.endpoint_id.fmt_short(), - conn_type = ?typ, - ); - info!(%typ, "new connection type"); - - // Update some metrics - match (prev_typ, typ) { - (ConnectionType::Relay(_), ConnectionType::Direct(_)) - | (ConnectionType::Mixed(_, _), ConnectionType::Direct(_)) => { - metrics.num_direct_conns_added.inc(); - metrics.num_relay_conns_removed.inc(); - } - (ConnectionType::Direct(_), ConnectionType::Relay(_)) - | (ConnectionType::Direct(_), ConnectionType::Mixed(_, _)) => { - metrics.num_direct_conns_removed.inc(); - metrics.num_relay_conns_added.inc(); - } - (ConnectionType::None, ConnectionType::Direct(_)) => { - metrics.num_direct_conns_added.inc(); - } - (ConnectionType::Direct(_), ConnectionType::None) => { - metrics.num_direct_conns_removed.inc(); - } - (ConnectionType::None, ConnectionType::Relay(_)) - | (ConnectionType::None, ConnectionType::Mixed(_, _)) => { - metrics.num_relay_conns_added.inc(); - } - (ConnectionType::Relay(_), ConnectionType::None) - | (ConnectionType::Mixed(_, _), ConnectionType::None) => { - metrics.num_relay_conns_removed.inc(); - } - _ => (), - } - } - (best_addr, relay_url) - } - - /// Removes a direct address for this endpoint. - /// - /// If this is also the best address, it will be cleared as well. - pub(super) fn remove_direct_addr(&mut self, ip_port: &IpPort, now: Instant, why: &'static str) { - let Some(state) = self.udp_paths.access_mut(now).paths().remove(ip_port) else { - return; - }; - - match state.last_alive().map(|instant| instant.elapsed()) { - Some(last_alive) => debug!(%ip_port, ?last_alive, why, "pruning address"), - None => debug!(%ip_port, last_seen=%"never", why, "pruning address"), - } - } - - /// Whether we need to send another call-me-maybe to the endpoint. - /// - /// Basically we need to send a call-me-maybe if we need to find a better path. Maybe - /// we only have a relay path, or our path is expired. - /// - /// When a call-me-maybe message is sent we also need to send pings to all known paths - /// of the endpoint. The [`EndpointState::send_call_me_maybe`] function takes care of this. - #[cfg(not(wasm_browser))] - #[instrument("want_call_me_maybe", skip_all)] - fn want_call_me_maybe(&self, now: &Instant, have_ipv6: bool) -> bool { - trace!("full ping: wanted?"); - let Some(last_full_ping) = self.last_full_ping else { - debug!("no previous full ping: need full ping"); - return true; - }; - match &self.udp_paths.send_addr(have_ipv6) { - UdpSendAddr::None | UdpSendAddr::Unconfirmed(_) => { - debug!("best addr not set: need full ping"); - true - } - UdpSendAddr::Outdated(_) => { - debug!("best addr expired: need full ping"); - true - } - UdpSendAddr::Valid(addr) => { - let latency = self - .udp_paths - .paths() - .get(&(*addr).into()) - .expect("send path not tracked?") - .latency() - .expect("send_addr marked valid incorrectly"); - if latency > GOOD_ENOUGH_LATENCY && *now - last_full_ping >= UPGRADE_INTERVAL { - debug!( - "full ping interval expired and latency is only {}ms: need full ping", - latency.as_millis() - ); - true - } else { - trace!(?now, "best_addr valid: not needed"); - false - } - } - } - } - - #[cfg(wasm_browser)] - fn want_call_me_maybe(&self, _now: &Instant, _have_ipv6: bool) -> bool { - trace!("full ping: skipped in browser"); - false - } - - /// Cleanup the expired ping for the passed in txid. - #[instrument("disco", skip_all, fields(endpoint = %self.endpoint_id.fmt_short()))] - pub(super) fn ping_timeout( - &mut self, - txid: TransactionId, - now: Instant, - metrics: &MagicsockMetrics, - ) { - if let Some(sp) = self.sent_pings.remove(&txid) { - debug!(tx = %HEXLOWER.encode(&txid), addr = %sp.to, "pong not received in timeout"); - match sp.to { - SendAddr::Udp(addr) => { - if let Some(path_state) = - self.udp_paths.access_mut(now).paths().get_mut(&addr.into()) - { - path_state.last_ping = None; - let consider_alive = path_state - .last_alive() - .map(|last_alive| last_alive.elapsed() <= PING_TIMEOUT_DURATION) - .unwrap_or(false); - if !consider_alive { - // If there was no sign of life from this path during the time - // which we should have received the pong, clear best addr and - // pong. Both are used to select this path again, but we know - // it's not a usable path now. - path_state.validity = PathValidity::empty(); - metrics.path_ping_failures.inc(); - - path_state.validity.record_metrics(metrics); - metrics.path_marked_outdated.inc(); - } - } - } - SendAddr::Relay(ref url) => { - if let Some((home_relay, relay_state)) = self.relay_url.as_mut() { - if home_relay == url { - // lost connectivity via relay - relay_state.last_ping = None; - } - } - } - } - } - } - - #[must_use = "pings must be handled"] - fn start_ping(&self, dst: SendAddr, purpose: DiscoPingPurpose) -> Option { - #[cfg(any(test, feature = "test-utils"))] - if self.path_selection == PathSelection::RelayOnly && !dst.is_relay() { - // don't attempt any hole punching in relay only mode - warn!("in `RelayOnly` mode, ignoring request to start a hole punching attempt."); - return None; - } - #[cfg(wasm_browser)] - if !dst.is_relay() { - return None; // Similar to `RelayOnly` mode, we don't send UDP pings for hole-punching. - } - - let tx_id = TransactionId::default(); - trace!(tx = %HEXLOWER.encode(&tx_id), %dst, ?purpose, - dst = %self.endpoint_id.fmt_short(), "start ping"); - event!( - target: "iroh::_events::ping::sent", - Level::DEBUG, - remote_endpoint = %self.endpoint_id.fmt_short(), - ?dst, - txn = ?tx_id, - ?purpose, - ); - Some(SendPing { - id: self.id, - dst, - dst_endpoint: self.endpoint_id, - tx_id, - purpose, - }) - } - - /// Record the fact that a ping has been sent out. - pub(super) fn ping_sent( - &mut self, - to: SendAddr, - tx_id: TransactionId, - purpose: DiscoPingPurpose, - sender: mpsc::Sender, - ) { - trace!(%to, tx = %HEXLOWER.encode(&tx_id), ?purpose, "record ping sent"); - - let now = Instant::now(); - let mut path_found = false; - match to { - SendAddr::Udp(addr) => { - if let Some(st) = self.udp_paths.access_mut(now).paths().get_mut(&addr.into()) { - st.last_ping.replace(now); - st.validity.record_ping_sent(); - path_found = true - } - } - SendAddr::Relay(ref url) => { - if let Some((home_relay, relay_state)) = self.relay_url.as_mut() { - if home_relay == url { - relay_state.last_ping.replace(now); - path_found = true - } - } - } - } - if !path_found { - // Shouldn't happen. But don't ping an endpoint that's not active for us. - warn!(%to, ?purpose, "unexpected attempt to ping no longer live path"); - return; - } - - let id = self.id; - let _expiry_task = AbortOnDropHandle::new(task::spawn(async move { - time::sleep(PING_TIMEOUT_DURATION).await; - sender - .send(ActorMessage::EndpointPingExpired(id, tx_id)) - .await - .ok(); - })); - self.sent_pings.insert( - tx_id, - SentPing { - to, - at: now, - purpose, - _expiry_task, - }, - ); - } - - /// Send a DISCO call-me-maybe message to the peer. - /// - /// This takes care of sending the needed pings beforehand. This ensures that we open - /// our firewall's port so that when the receiver sends us DISCO pings in response to - /// our call-me-maybe they will reach us and the other side establishes a direct - /// connection upon our subsequent pong response. - /// - /// For [`SendCallMeMaybe::IfNoRecent`], **no** paths will be pinged if there already - /// was a recent call-me-maybe sent. - /// - /// The caller is responsible for sending the messages. - #[must_use = "actions must be handled"] - fn send_call_me_maybe(&mut self, now: Instant, always: SendCallMeMaybe) -> Vec { - match always { - SendCallMeMaybe::Always => (), - SendCallMeMaybe::IfNoRecent => { - let had_recent_call_me_maybe = self - .last_call_me_maybe - .map(|when| when.elapsed() < HEARTBEAT_INTERVAL) - .unwrap_or(false); - if had_recent_call_me_maybe { - trace!("skipping call-me-maybe, still recent"); - return Vec::new(); - } - } - } - // We send pings regardless of whether we have a RelayUrl. If we were given any - // direct address paths to contact but no RelayUrl, we still need to send a DISCO - // ping to the direct address paths so that the other endpoint will learn about us and - // accepts the connection. - let mut msgs = self.send_pings(now); - - if let Some(url) = self.relay_url() { - debug!(%url, "queue call-me-maybe"); - msgs.push(PingAction::SendCallMeMaybe { - relay_url: url, - dst_endpoint: self.endpoint_id, - }); - self.last_call_me_maybe = Some(now); - } else { - debug!("can not send call-me-maybe, no relay URL"); - } - - msgs - } - - /// Send DISCO Pings to all the paths of this endpoint. - /// - /// Any paths to the endpoint which have not been recently pinged will be sent a disco - /// ping. - /// - /// The caller is responsible for sending the messages. - #[must_use = "actions must be handled"] - fn send_pings(&mut self, now: Instant) -> Vec { - // We allocate +1 in case the caller wants to add a call-me-maybe message. - let mut ping_msgs = Vec::with_capacity(self.udp_paths.paths().len() + 1); - - if let Some((url, state)) = self.relay_url.as_ref() { - if state.needs_ping(&now) { - debug!(%url, "relay path needs ping"); - if let Some(msg) = - self.start_ping(SendAddr::Relay(url.clone()), DiscoPingPurpose::Discovery) - { - ping_msgs.push(PingAction::SendPing(msg)) - } - } - } - - #[cfg(any(test, feature = "test-utils"))] - if self.path_selection == PathSelection::RelayOnly { - warn!("in `RelayOnly` mode, ignoring request to respond to a hole punching attempt."); - return ping_msgs; - } - - self.prune_direct_addresses(now); - let mut ping_dsts = String::from("["); - self.udp_paths - .paths() - .iter() - .filter_map(|(ipp, state)| state.needs_ping(&now).then_some(*ipp)) - .filter_map(|ipp| { - self.start_ping(SendAddr::Udp(ipp.into()), DiscoPingPurpose::Discovery) - }) - .for_each(|msg| { - use std::fmt::Write; - write!(&mut ping_dsts, " {} ", msg.dst).ok(); - ping_msgs.push(PingAction::SendPing(msg)); - }); - ping_dsts.push(']'); - debug!( - %ping_dsts, - dst = %self.endpoint_id.fmt_short(), - paths = %summarize_endpoint_paths(self.udp_paths.paths()), - "sending pings to endpoint", - ); - self.last_full_ping.replace(now); - ping_msgs - } - - pub(super) fn update_from_endpoint_addr( - &mut self, - new_relay_url: Option<&RelayUrl>, - new_addrs: impl Iterator, - source: super::Source, - have_ipv6: bool, - metrics: &MagicsockMetrics, - ) { - if matches!( - self.udp_paths.send_addr(have_ipv6), - UdpSendAddr::None | UdpSendAddr::Unconfirmed(_) - ) { - // we do not have a direct connection, so changing the relay information may - // have an effect on our connection status - if self.relay_url.is_none() && new_relay_url.is_some() { - // we did not have a relay connection before, but now we do - metrics.num_relay_conns_added.inc(); - } else if self.relay_url.is_some() && new_relay_url.is_none() { - // we had a relay connection before but do not have one now - metrics.num_relay_conns_removed.inc(); - } - } - - let now = Instant::now(); - - if new_relay_url.is_some() && new_relay_url != self.relay_url().as_ref() { - debug!( - "Changing relay endpoint from {:?} to {:?}", - self.relay_url, new_relay_url - ); - self.relay_url = new_relay_url.map(|url| { - ( - url.clone(), - PathState::new(self.endpoint_id, url.clone().into(), source.clone(), now), - ) - }); - } - - let mut access = self.udp_paths.access_mut(now); - let mut new_addrs_list = Vec::new(); - for addr in new_addrs { - access - .paths() - .entry(addr.into()) - .and_modify(|path_state| { - path_state.add_source(source.clone(), now); - }) - .or_insert_with(|| { - PathState::new(self.endpoint_id, SendAddr::from(addr), source.clone(), now) - }); - new_addrs_list.push(addr); - } - drop(access); - let paths = summarize_endpoint_paths(self.udp_paths.paths()); - debug!(new = ?new_addrs_list , %paths, "added new direct paths for endpoint"); - } - - /// Handle a received Disco Ping. - /// - /// - Ensures the paths the ping was received on is a known path for this endpoint. - /// - /// - If there is no best_addr for this endpoint yet, sends a ping itself to try and - /// establish one. - /// - /// This is called once we've already verified that we got a valid discovery message - /// from `self` via ep. - pub(super) fn handle_ping(&mut self, path: SendAddr, tx_id: TransactionId) -> PingHandled { - let now = Instant::now(); - - let role = match path { - SendAddr::Udp(addr) => { - match self.udp_paths.access_mut(now).paths().entry(addr.into()) { - Entry::Occupied(mut occupied) => occupied.get_mut().handle_ping(tx_id, now), - Entry::Vacant(vacant) => { - info!(%addr, "new direct addr for endpoint"); - vacant.insert(PathState::with_ping( - self.endpoint_id, - path.clone(), - tx_id, - Source::Udp, - now, - )); - PingRole::NewPath - } - } - } - SendAddr::Relay(ref url) => { - match self.relay_url.as_mut() { - Some((home_url, _state)) if home_url != url => { - // either the endpoint changed relays or we didn't have a relay address for the - // endpoint. In both cases, trust the new confirmed url - info!(%url, "new relay addr for endpoint"); - self.relay_url = Some(( - url.clone(), - PathState::with_ping( - self.endpoint_id, - path.clone(), - tx_id, - Source::Relay, - now, - ), - )); - PingRole::NewPath - } - Some((_home_url, state)) => state.handle_ping(tx_id, now), - None => { - info!(%url, "new relay addr for endpoint"); - self.relay_url = Some(( - url.clone(), - PathState::with_ping( - self.endpoint_id, - path.clone(), - tx_id, - Source::Relay, - now, - ), - )); - PingRole::NewPath - } - } - } - }; - event!( - target: "iroh::_events::ping::recv", - Level::DEBUG, - remote_endpoint = %self.endpoint_id.fmt_short(), - src = ?path, - txn = ?tx_id, - ?role, - ); - - if matches!(path, SendAddr::Udp(_)) && matches!(role, PingRole::NewPath) { - self.prune_direct_addresses(now); - } - - // if the endpoint does not yet have a best_addr - let needs_ping_back = if matches!(path, SendAddr::Udp(_)) - && matches!( - self.udp_paths.send_addr(true), - UdpSendAddr::None | UdpSendAddr::Unconfirmed(_) | UdpSendAddr::Outdated(_) - ) { - // We also need to send a ping to make this path available to us as well. This - // is always sent together with a pong. So in the worst case the pong gets lost - // and this ping does not. In that case we ping-pong until both sides have - // received at least one pong. Once both sides have received one pong they both - // have a best_addr and this ping will stop being sent. - self.start_ping(path, DiscoPingPurpose::PingBack) - } else { - None - }; - - debug!( - ?role, - needs_ping_back = ?needs_ping_back.is_some(), - paths = %summarize_endpoint_paths(self.udp_paths.paths()), - "endpoint handled ping", - ); - PingHandled { - role, - needs_ping_back, - } - } - - /// Prune inactive paths. - /// - /// This trims the list of inactive paths for an endpoint. At most - /// [`MAX_INACTIVE_DIRECT_ADDRESSES`] are kept. - pub(super) fn prune_direct_addresses(&mut self, now: Instant) { - // prune candidates are addresses that are not active - let mut prune_candidates: Vec<_> = self - .udp_paths - .paths() - .iter() - .filter(|(_ip_port, state)| !state.is_active()) - .map(|(ip_port, state)| (*ip_port, state.last_alive())) - .filter(|(_ipp, last_alive)| match last_alive { - Some(last_seen) => last_seen.elapsed() > LAST_ALIVE_PRUNE_DURATION, - None => true, - }) - .collect(); - let prune_count = prune_candidates - .len() - .saturating_sub(MAX_INACTIVE_DIRECT_ADDRESSES); - if prune_count == 0 { - // nothing to do, within limits - debug!( - paths = %summarize_endpoint_paths(self.udp_paths.paths()), - "prune addresses: {prune_count} pruned", - ); - return; - } - - // sort leaving the worst addresses first (never contacted) and better ones (most recently - // used ones) last - prune_candidates.sort_unstable_by_key(|(_ip_port, last_alive)| *last_alive); - prune_candidates.truncate(prune_count); - for (ip_port, _last_alive) in prune_candidates.into_iter() { - self.remove_direct_addr(&ip_port, now, "inactive"); - } - debug!( - paths = %summarize_endpoint_paths(self.udp_paths.paths()), - "prune addresses: {prune_count} pruned", - ); - } - - /// Called when connectivity changes enough that we should question our earlier - /// assumptions about which paths work. - #[instrument("disco", skip_all, fields(endpoint = %self.endpoint_id.fmt_short()))] - pub(super) fn note_connectivity_change(&mut self, now: Instant, metrics: &MagicsockMetrics) { - let mut guard = self.udp_paths.access_mut(now); - for es in guard.paths().values_mut() { - es.validity.record_metrics(metrics); - es.clear(); - } - } - - /// Handles a Pong message (a reply to an earlier ping). - /// - /// It reports the address and key that should be inserted for the endpoint if any. - #[instrument(skip(self, metrics))] - pub(super) fn handle_pong( - &mut self, - m: &disco::Pong, - src: SendAddr, - metrics: &MagicsockMetrics, - ) -> Option<(SocketAddr, PublicKey)> { - event!( - target: "iroh::_events::pong::recv", - Level::DEBUG, - remote_endpoint = %self.endpoint_id.fmt_short(), - ?src, - txn = ?m.tx_id, - ); - let is_relay = src.is_relay(); - match self.sent_pings.remove(&m.tx_id) { - None => { - // This is not a pong for a ping we sent. In reality however we probably - // did send this ping but it has timed-out by the time we receive this pong - // so we removed the state already. - debug!(tx = %HEXLOWER.encode(&m.tx_id), "received unknown pong (did it timeout?)"); - None - } - Some(sp) => { - let mut endpoint_map_insert = None; - - let now = Instant::now(); - let latency = now - sp.at; - - debug!( - tx = %HEXLOWER.encode(&m.tx_id), - src = %src, - reported_ping_src = %m.ping_observed_addr, - ping_dst = %sp.to, - is_relay = %src.is_relay(), - latency = %latency.as_millis(), - "received pong", - ); - - match src { - SendAddr::Udp(addr) => { - match self.udp_paths.access_mut(now).paths().get_mut(&addr.into()) { - None => { - warn!("ignoring pong: no state for src addr"); - // This is no longer an endpoint we care about. - return endpoint_map_insert; - } - Some(st) => { - endpoint_map_insert = Some((addr, self.endpoint_id)); - st.add_pong_reply( - PongReply { - latency, - pong_at: now, - from: src, - pong_src: m.ping_observed_addr.clone(), - }, - metrics, - ); - } - } - debug!( - paths = %summarize_endpoint_paths(self.udp_paths.paths()), - "handled pong", - ); - } - SendAddr::Relay(ref url) => match self.relay_url.as_mut() { - Some((home_url, state)) if home_url == url => { - state.add_pong_reply( - PongReply { - latency, - pong_at: now, - from: src, - pong_src: m.ping_observed_addr.clone(), - }, - metrics, - ); - } - other => { - // if we are here then we sent this ping, but the url changed - // waiting for the response. It was either set to None or changed to - // another relay. This should either never happen or be extremely - // unlikely. Log and ignore for now - warn!( - stored=?other, - received=?url, - "ignoring pong via relay for different relay from last one", - ); - } - }, - } - - // Promote this pong response to our current best address if it's lower latency. - // TODO(bradfitz): decide how latency vs. preference order affects decision - if let SendAddr::Udp(_to) = sp.to { - debug_assert!(!is_relay, "mismatching relay & udp"); - } - - endpoint_map_insert - } - } - } - - /// Handles a DISCO CallMeMaybe discovery message. - /// - /// The contract for use of this message is that the endpoint has already pinged to us via - /// UDP, so their stateful firewall should be open. Now we can Ping back and make it - /// through. - /// - /// However if the remote side has no direct path information to us, they would not have - /// had any [`IpPort`]s to send pings to and our pings might end up blocked. But at - /// least open the firewalls on our side, giving the other side another change of making - /// it through when it pings in response. - pub(super) fn handle_call_me_maybe( - &mut self, - m: disco::CallMeMaybe, - metrics: &MagicsockMetrics, - ) -> Vec { - let now = Instant::now(); - let mut call_me_maybe_ipps = BTreeSet::new(); - - let mut guard = self.udp_paths.access_mut(now); - - for peer_sockaddr in &m.my_numbers { - if let IpAddr::V6(ip) = peer_sockaddr.ip() { - if netwatch::ip::is_unicast_link_local(ip) { - // We send these out, but ignore them for now. - // TODO: teach the ping code to ping on all interfaces for these. - continue; - } - } - let ipp = IpPort::from(*peer_sockaddr); - call_me_maybe_ipps.insert(ipp); - guard - .paths() - .entry(ipp) - .or_insert_with(|| { - PathState::new( - self.endpoint_id, - SendAddr::from(*peer_sockaddr), - Source::Relay, - now, - ) - }) - .call_me_maybe_time - .replace(now); - } - - // Zero out all the last_ping times to force send_pings to send new ones, even if - // it's been less than 5 seconds ago. Also clear pongs for direct addresses not - // included in the updated set. - for (ipp, st) in guard.paths().iter_mut() { - st.last_ping = None; - if !call_me_maybe_ipps.contains(ipp) { - // TODO: This seems like a weird way to signal that the endpoint no longer - // thinks it has this IpPort as an available path. - if !st.validity.is_empty() { - debug!(path=?ipp ,"clearing recent pong"); - st.validity.record_metrics(metrics); - st.validity = PathValidity::empty(); - } - } - } - if guard.has_best_addr_changed() { - // Clear the last call-me-maybe send time so we will send one again. - self.last_call_me_maybe = None; - } - debug!( - paths = %summarize_endpoint_paths(self.udp_paths.paths()), - "updated endpoint paths from call-me-maybe", - ); - self.send_pings(now) - } - - /// Marks this endpoint as having received a UDP payload message. - #[cfg(not(wasm_browser))] - pub(super) fn receive_udp(&mut self, addr: IpPort, now: Instant) { - let mut guard = self.udp_paths.access_mut(now); - let Some(state) = guard.paths().get_mut(&addr) else { - debug_assert!( - false, - "endpoint map inconsistency by_ip_port <-> direct addr" - ); - return; - }; - state.receive_payload(now); - self.last_used = Some(now); - } - - pub(super) fn receive_relay(&mut self, url: &RelayUrl, src: EndpointId, now: Instant) { - match self.relay_url.as_mut() { - Some((current_home, state)) if current_home == url => { - // We received on the expected url. update state. - state.receive_payload(now); - } - Some((_current_home, _state)) => { - // we have a different url. we only update on ping, not on receive_relay. - } - None => { - self.relay_url = Some(( - url.clone(), - PathState::with_last_payload( - src, - SendAddr::from(url.clone()), - Source::Relay, - now, - ), - )); - } - } - self.last_used = Some(now); - } - - pub(super) fn last_ping(&self, addr: &SendAddr) -> Option { - match addr { - SendAddr::Udp(addr) => self - .udp_paths - .paths() - .get(&(*addr).into()) - .and_then(|ep| ep.last_ping), - SendAddr::Relay(url) => self - .relay_url - .as_ref() - .filter(|(home_url, _state)| home_url == url) - .and_then(|(_home_url, state)| state.last_ping), - } - } - - /// Checks if this `Endpoint` is currently actively being used. - pub(super) fn is_active(&self, now: &Instant) -> bool { - match self.last_used { - Some(last_active) => now.duration_since(last_active) <= SESSION_ACTIVE_TIMEOUT, - None => false, - } - } - - /// Send a heartbeat to the endpoint to keep the connection alive, or trigger a full ping - /// if necessary. - #[instrument("stayin_alive", skip_all, fields(endpoint = %self.endpoint_id.fmt_short()))] - pub(super) fn stayin_alive(&mut self, have_ipv6: bool) -> Vec { - trace!("stayin_alive"); - let now = Instant::now(); - if !self.is_active(&now) { - trace!("skipping stayin alive: session is inactive"); - return Vec::new(); - } - - // If we do not have an optimal addr, send pings to all known places. - if self.want_call_me_maybe(&now, have_ipv6) { - debug!("sending a call-me-maybe"); - return self.send_call_me_maybe(now, SendCallMeMaybe::Always); - } - - // Send heartbeat ping to keep the current addr going as long as we need it. - if let Some(udp_addr) = self.udp_paths.send_addr(have_ipv6).get_addr() { - let elapsed = self.last_ping(&SendAddr::Udp(udp_addr)).map(|l| now - l); - // Send a ping if the last ping is older than 2 seconds. - let needs_ping = match elapsed { - Some(e) => e >= STAYIN_ALIVE_MIN_ELAPSED, - None => false, - }; - - if needs_ping { - debug!( - dst = %udp_addr, - since_last_ping=?elapsed, - "send stayin alive ping", - ); - if let Some(msg) = - self.start_ping(SendAddr::Udp(udp_addr), DiscoPingPurpose::StayinAlive) - { - return vec![PingAction::SendPing(msg)]; - } - } - } - - Vec::new() - } - - /// Returns the addresses on which a payload should be sent right now. - /// - /// This is in the hot path of `.poll_send()`. - // TODO(matheus23): Make this take &self. That's not quite possible yet due to `send_call_me_maybe` - // eventually calling `prune_direct_addresses` (which needs &mut self) - #[instrument("get_send_addrs", skip_all, fields(endpoint = %self.endpoint_id.fmt_short()))] - pub(crate) fn get_send_addrs( - &mut self, - have_ipv6: bool, - metrics: &MagicsockMetrics, - ) -> (Option, Option, Vec) { - let now = Instant::now(); - let prev = self.last_used.replace(now); - if prev.is_none() { - // this is the first time we are trying to connect to this endpoint - metrics.endpoints_contacted.inc(); - } - let (udp_addr, relay_url) = self.addr_for_send(have_ipv6, metrics); - - let ping_msgs = if self.want_call_me_maybe(&now, have_ipv6) { - self.send_call_me_maybe(now, SendCallMeMaybe::IfNoRecent) - } else { - Vec::new() - }; - trace!( - ?udp_addr, - ?relay_url, - pings = %ping_msgs.len(), - "found send address", - ); - (udp_addr, relay_url, ping_msgs) - } - - /// Get the IP addresses for this endpoint. - pub(super) fn ip_addrs(&self) -> impl Iterator + '_ { - self.udp_paths.paths().keys().copied() - } - - #[cfg(test)] - pub(super) fn ip_addr_states(&self) -> impl Iterator + '_ { - self.udp_paths.paths().iter() - } - - pub(super) fn last_used(&self) -> Option { - self.last_used - } -} - -impl From for EndpointAddr { - fn from(info: RemoteInfo) -> Self { - let mut addrs = info - .addrs - .into_iter() - .map(|info| TransportAddr::Ip(info.addr)) - .collect::>(); - - if let Some(url) = info.relay_url { - addrs.insert(TransportAddr::Relay(url.into())); - } - - EndpointAddr::from_parts(info.endpoint_id, addrs) - } -} - -/// Whether to send a call-me-maybe message after sending pings to all known paths. -/// -/// `IfNoRecent` will only send a call-me-maybe if no previous one was sent in the last -/// [`HEARTBEAT_INTERVAL`]. -#[derive(Debug)] -enum SendCallMeMaybe { - Always, - IfNoRecent, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(super) struct PongReply { - pub(super) latency: Duration, - /// When we received the pong. - pub(super) pong_at: Instant, - /// The pong's src (usually same as endpoint map key). - pub(super) from: SendAddr, - /// What they reported they heard. - pub(super) pong_src: SendAddr, -} - -#[derive(Debug)] -pub(super) struct SentPing { - pub(super) to: SendAddr, - pub(super) at: Instant, - #[allow(dead_code)] - pub(super) purpose: DiscoPingPurpose, - pub(super) _expiry_task: AbortOnDropHandle<()>, -} - -/// The reason why a discovery ping message was sent. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum DiscoPingPurpose { - /// The purpose of a ping was to see if a path was valid. - Discovery, - /// Ping to ensure the current route is still valid. - StayinAlive, - /// When a ping was received and no direct connection exists yet. - /// - /// When a ping was received we suspect a direct connection is possible. If we do not - /// yet have one that triggers a ping, indicated with this reason. - PingBack, -} - -/// The type of control message we have received. -#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, derive_more::Display)] -pub enum ControlMsg { - /// We received a Ping from the endpoint. - #[display("ping←")] - Ping, - /// We received a Pong from the endpoint. - #[display("pong←")] - Pong, - /// We received a CallMeMaybe. - #[display("call me")] - CallMeMaybe, -} - -/// Information about a *direct address*. -/// -/// The *direct addresses* of an iroh endpoint are those that could be used by other endpoints to -/// establish direct connectivity, depending on the network situation. Due to NAT configurations, -/// for example, not all direct addresses of an endpoint are usable by all peers. -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct DirectAddrInfo { - /// The UDP address reported by the remote endpoint. - pub addr: SocketAddr, - /// The latency to the remote endpoint over this network path. - /// - /// If there has never been any connectivity via this address no latency will be known. - pub latency: Option, - /// Last control message received by this endpoint about this address. - /// - /// This contains the elapsed duration since the control message was received and the - /// kind of control message received at that time. Only the most recent control message - /// is returned. - /// - /// Note that [`ControlMsg::CallMeMaybe`] is received via a relay path, while - /// [`ControlMsg::Ping`] and [`ControlMsg::Pong`] are received on the path to - /// [`DirectAddrInfo::addr`] itself and thus convey very different information. - pub last_control: Option<(Duration, ControlMsg)>, - /// Elapsed time since the last payload message was received on this network path. - /// - /// This indicates how long ago a QUIC datagram was received from the remote endpoint sent - /// from this [`DirectAddrInfo::addr`]. It indicates the network path was in use to - /// transport payload data. - pub last_payload: Option, - /// Elapsed time since this network path was known to exist. - /// - /// A network path is considered to exist only because the remote endpoint advertised it. - /// It may not mean the path is usable. However, if there was any communication with - /// the remote endpoint over this network path it also means the path exists. - /// - /// The elapsed time since *any* confirmation of the path's existence was received is - /// returned. If the remote endpoint moved networks and no longer has this path, this could - /// be a long duration. - pub last_alive: Option, - /// A [`HashMap`] of [`Source`]s to [`Duration`]s. - /// - /// The [`Duration`] indicates the elapsed time since this source last - /// recorded this address. - /// - /// The [`Duration`] will always indicate the most recent time the source - /// recorded this address. - pub sources: HashMap, -} - -/// Information about the network path to a remote endpoint via a relay server. -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct RelayUrlInfo { - /// The relay URL. - pub relay_url: RelayUrl, - /// Elapsed time since this relay path last received payload or control data. - pub last_alive: Option, - /// Latency to the remote endpoint over this relayed network path. - pub latency: Option, -} - -impl From<(RelayUrl, PathState)> for RelayUrlInfo { - fn from(value: (RelayUrl, PathState)) -> Self { - RelayUrlInfo { - relay_url: value.0, - last_alive: value.1.last_alive().map(|i| i.elapsed()), - latency: value.1.latency(), - } - } -} - -impl From for RelayUrl { - fn from(value: RelayUrlInfo) -> Self { - value.relay_url - } -} - -/// Details about a remote iroh endpoint which is known to this endpoint. -/// -/// Having details of an endpoint does not mean it can be connected to, nor that it has ever been -/// connected to in the past. There are various reasons an endpoint might be known: it could have -/// been manually added via [`Endpoint::add_endpoint_addr`], it could have been added by some -/// discovery mechanism, the endpoint could have contacted this endpoint, etc. -/// -/// [`Endpoint::add_endpoint_addr`]: crate::endpoint::Endpoint::add_endpoint_addr -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub(crate) struct RemoteInfo { - /// The globally unique identifier for this endpoint. - pub endpoint_id: EndpointId, - /// Relay server information, if available. - pub relay_url: Option, - /// The addresses at which this endpoint might be reachable. - /// - /// Some of these addresses might only be valid for networks we are not part of, but the remote - /// endpoint might be a part of. - pub addrs: Vec, - /// The type of connection we have to the endpoint, either direct or over relay. - pub conn_type: ConnectionType, - /// The latency of the current network path to the remote endpoint. - pub latency: Option, - /// Time elapsed time since last we have sent to or received from the endpoint. - /// - /// This is the duration since *any* data (payload or control messages) was sent or receive - /// from the remote endpoint. Note that sending to the remote endpoint does not imply - /// the remote endpoint received anything. - pub last_used: Option, -} - -impl RemoteInfo { - /// Get the duration since the last activity we received from this endpoint - /// on any of its direct addresses. - pub(crate) fn last_received(&self) -> Option { - self.addrs - .iter() - .filter_map(|addr| addr.last_control.map(|x| x.0).min(addr.last_payload)) - .min() - } - - /// Whether there is a possible known network path to the remote endpoint. - /// - /// Note that this does not provide any guarantees of whether any network path is - /// usable. - pub(crate) fn has_send_address(&self) -> bool { - self.relay_url.is_some() || !self.addrs.is_empty() - } -} - -/// The type of connection we have to the endpoint. -#[derive(derive_more::Display, Default, Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub enum ConnectionType { - /// Direct UDP connection - #[display("direct({_0})")] - Direct(SocketAddr), - /// Relay connection over relay - #[display("relay({_0})")] - Relay(RelayUrl), - /// Both a UDP and a relay connection are used. - /// - /// This is the case if we do have a UDP address, but are missing a recent confirmation that - /// the address works. - #[display("mixed(udp: {_0}, relay: {_1})")] - Mixed(SocketAddr, RelayUrl), - /// We have no verified connection to this PublicKey - #[default] - #[display("none")] - None, -} - -#[cfg(test)] -mod tests { - use std::{collections::BTreeMap, net::Ipv4Addr}; - - use iroh_base::SecretKey; - use rand::SeedableRng; - - use super::*; - use crate::magicsock::endpoint_map::{EndpointMap, EndpointMapInner}; - - #[test] - fn test_remote_infos() { - let now = Instant::now(); - let elapsed = Duration::from_secs(3); - let later = now + elapsed; - let send_addr: RelayUrl = "https://my-relay.com".parse().unwrap(); - let pong_src = SendAddr::Udp("0.0.0.0:1".parse().unwrap()); - let latency = Duration::from_millis(50); - - let relay_and_state = |endpoint_id: EndpointId, url: RelayUrl| { - let relay_state = PathState::with_pong_reply( - endpoint_id, - PongReply { - latency, - pong_at: now, - from: SendAddr::Relay(send_addr.clone()), - pong_src: pong_src.clone(), - }, - ); - Some((url, relay_state)) - }; - - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - - // endpoint with a `best_addr` that has a latency but no relay - let (a_endpoint, a_socket_addr) = { - let key = SecretKey::generate(&mut rng); - let endpoint_id = key.public(); - let ip_port = IpPort { - ip: Ipv4Addr::UNSPECIFIED.into(), - port: 10, - }; - let endpoint_state = BTreeMap::from([( - ip_port, - PathState::with_pong_reply( - endpoint_id, - PongReply { - latency, - pong_at: now, - from: SendAddr::Udp(ip_port.into()), - pong_src: pong_src.clone(), - }, - ), - )]); - ( - EndpointState { - id: 0, - quic_mapped_addr: EndpointIdMappedAddr::generate(), - endpoint_id: key.public(), - last_full_ping: None, - relay_url: None, - udp_paths: EndpointUdpPaths::from_parts( - endpoint_state, - UdpSendAddr::Valid(ip_port.into()), - ), - sent_pings: HashMap::new(), - last_used: Some(now), - last_call_me_maybe: None, - conn_type: Watchable::new(ConnectionType::Direct(ip_port.into())), - has_been_direct: AtomicBool::new(true), - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection::default(), - }, - ip_port.into(), - ) - }; - // endpoint w/ no best addr but a relay w/ latency - let b_endpoint = { - // let socket_addr = "0.0.0.0:9".parse().unwrap(); - let key = SecretKey::generate(&mut rng); - EndpointState { - id: 1, - quic_mapped_addr: EndpointIdMappedAddr::generate(), - endpoint_id: key.public(), - last_full_ping: None, - relay_url: relay_and_state(key.public(), send_addr.clone()), - udp_paths: EndpointUdpPaths::new(), - sent_pings: HashMap::new(), - last_used: Some(now), - last_call_me_maybe: None, - conn_type: Watchable::new(ConnectionType::Relay(send_addr.clone())), - has_been_direct: AtomicBool::new(false), - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection::default(), - } - }; - - // endpoint w/ no best addr but a relay w/ no latency - let c_endpoint = { - // let socket_addr = "0.0.0.0:8".parse().unwrap(); - let key = SecretKey::generate(&mut rng); - EndpointState { - id: 2, - quic_mapped_addr: EndpointIdMappedAddr::generate(), - endpoint_id: key.public(), - last_full_ping: None, - relay_url: Some(( - send_addr.clone(), - PathState::new( - key.public(), - SendAddr::from(send_addr.clone()), - Source::App, - now, - ), - )), - udp_paths: EndpointUdpPaths::new(), - sent_pings: HashMap::new(), - last_used: Some(now), - last_call_me_maybe: None, - conn_type: Watchable::new(ConnectionType::Relay(send_addr.clone())), - has_been_direct: AtomicBool::new(false), - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection::default(), - } - }; - - // endpoint w/ expired best addr and relay w/ latency - let (d_endpoint, d_socket_addr) = { - let socket_addr: SocketAddr = "0.0.0.0:7".parse().unwrap(); - let key = SecretKey::generate(&mut rng); - let endpoint_id = key.public(); - let endpoint_state = BTreeMap::from([( - IpPort::from(socket_addr), - PathState::with_pong_reply( - endpoint_id, - PongReply { - latency, - pong_at: now, - from: SendAddr::Udp(socket_addr), - pong_src: pong_src.clone(), - }, - ), - )]); - ( - EndpointState { - id: 3, - quic_mapped_addr: EndpointIdMappedAddr::generate(), - endpoint_id: key.public(), - last_full_ping: None, - relay_url: relay_and_state(key.public(), send_addr.clone()), - udp_paths: EndpointUdpPaths::from_parts( - endpoint_state, - UdpSendAddr::Outdated(socket_addr), - ), - sent_pings: HashMap::new(), - last_used: Some(now), - last_call_me_maybe: None, - conn_type: Watchable::new(ConnectionType::Mixed( - socket_addr, - send_addr.clone(), - )), - has_been_direct: AtomicBool::new(false), - #[cfg(any(test, feature = "test-utils"))] - path_selection: PathSelection::default(), - }, - socket_addr, - ) - }; - - let mut expect = Vec::from([ - RemoteInfo { - endpoint_id: a_endpoint.endpoint_id, - relay_url: None, - addrs: Vec::from([DirectAddrInfo { - addr: a_socket_addr, - latency: Some(latency), - last_control: Some((elapsed, ControlMsg::Pong)), - last_payload: None, - last_alive: Some(elapsed), - sources: HashMap::new(), - }]), - conn_type: ConnectionType::Direct(a_socket_addr), - latency: Some(latency), - last_used: Some(elapsed), - }, - RemoteInfo { - endpoint_id: b_endpoint.endpoint_id, - relay_url: Some(RelayUrlInfo { - relay_url: b_endpoint.relay_url.as_ref().unwrap().0.clone(), - last_alive: None, - latency: Some(latency), - }), - addrs: Vec::new(), - conn_type: ConnectionType::Relay(send_addr.clone()), - latency: Some(latency), - last_used: Some(elapsed), - }, - RemoteInfo { - endpoint_id: c_endpoint.endpoint_id, - relay_url: Some(RelayUrlInfo { - relay_url: c_endpoint.relay_url.as_ref().unwrap().0.clone(), - last_alive: None, - latency: None, - }), - addrs: Vec::new(), - conn_type: ConnectionType::Relay(send_addr.clone()), - latency: None, - last_used: Some(elapsed), - }, - RemoteInfo { - endpoint_id: d_endpoint.endpoint_id, - relay_url: Some(RelayUrlInfo { - relay_url: d_endpoint.relay_url.as_ref().unwrap().0.clone(), - last_alive: None, - latency: Some(latency), - }), - addrs: Vec::from([DirectAddrInfo { - addr: d_socket_addr, - latency: Some(latency), - last_control: Some((elapsed, ControlMsg::Pong)), - last_payload: None, - last_alive: Some(elapsed), - sources: HashMap::new(), - }]), - conn_type: ConnectionType::Mixed(d_socket_addr, send_addr.clone()), - latency: Some(Duration::from_millis(50)), - last_used: Some(elapsed), - }, - ]); - - let endpoint_map = EndpointMap::from_inner(EndpointMapInner { - by_endpoint_key: HashMap::from([ - (a_endpoint.endpoint_id, a_endpoint.id), - (b_endpoint.endpoint_id, b_endpoint.id), - (c_endpoint.endpoint_id, c_endpoint.id), - (d_endpoint.endpoint_id, d_endpoint.id), - ]), - by_ip_port: HashMap::from([ - (a_socket_addr.into(), a_endpoint.id), - (d_socket_addr.into(), d_endpoint.id), - ]), - by_quic_mapped_addr: HashMap::from([ - (a_endpoint.quic_mapped_addr, a_endpoint.id), - (b_endpoint.quic_mapped_addr, b_endpoint.id), - (c_endpoint.quic_mapped_addr, c_endpoint.id), - (d_endpoint.quic_mapped_addr, d_endpoint.id), - ]), - by_id: HashMap::from([ - (a_endpoint.id, a_endpoint), - (b_endpoint.id, b_endpoint), - (c_endpoint.id, c_endpoint), - (d_endpoint.id, d_endpoint), - ]), - next_id: 5, - path_selection: PathSelection::default(), - }); - let mut got = endpoint_map.list_remote_infos(later); - got.sort_by_key(|p| p.endpoint_id); - expect.sort_by_key(|p| p.endpoint_id); - remove_non_deterministic_fields(&mut got); - assert_eq!(expect, got); - } - - fn remove_non_deterministic_fields(infos: &mut [RemoteInfo]) { - for info in infos.iter_mut() { - if info.relay_url.is_some() { - info.relay_url.as_mut().unwrap().last_alive = None; - } - } - } - - #[test] - fn test_prune_direct_addresses() { - // When we handle a call-me-maybe with more than MAX_INACTIVE_DIRECT_ADDRESSES we do - // not want to prune them right away but send pings to all of them. - let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); - - let key = SecretKey::generate(&mut rng); - let opts = Options { - endpoint_id: key.public(), - relay_url: None, - active: true, - source: crate::magicsock::Source::NamedApp { - name: "test".into(), - }, - path_selection: PathSelection::default(), - }; - let mut ep = EndpointState::new(0, opts); - - let my_numbers_count: u16 = (MAX_INACTIVE_DIRECT_ADDRESSES + 5).try_into().unwrap(); - let my_numbers = (0u16..my_numbers_count) - .map(|i| SocketAddr::new(Ipv4Addr::LOCALHOST.into(), 1000 + i)) - .collect(); - let call_me_maybe = disco::CallMeMaybe { my_numbers }; - - let metrics = MagicsockMetrics::default(); - let ping_messages = ep.handle_call_me_maybe(call_me_maybe, &metrics); - - // We have no relay server and no previous direct addresses, so we should get the same - // number of pings as direct addresses in the call-me-maybe. - assert_eq!(ping_messages.len(), my_numbers_count as usize); - } -} diff --git a/iroh/src/magicsock/endpoint_map/path_state.rs b/iroh/src/magicsock/endpoint_map/path_state.rs deleted file mode 100644 index d78b67179f3..00000000000 --- a/iroh/src/magicsock/endpoint_map/path_state.rs +++ /dev/null @@ -1,335 +0,0 @@ -//! The state kept for each network path to a remote endpoint. - -use std::collections::{BTreeMap, HashMap}; - -use iroh_base::EndpointId; -use n0_future::time::{Duration, Instant}; -use tracing::{Level, debug, event}; - -use super::{ - IpPort, PingRole, Source, - endpoint_state::{ControlMsg, PongReply, SESSION_ACTIVE_TIMEOUT}, -}; -use crate::{ - disco::{SendAddr, TransactionId}, - magicsock::{ - HEARTBEAT_INTERVAL, Metrics as MagicsockMetrics, - endpoint_map::path_validity::{self, PathValidity}, - }, -}; - -/// The minimum time between pings to an endpoint. -/// -/// Except in the case of CallMeMaybe frames resetting the counter, as the first pings -/// likely didn't through the firewall. -const DISCO_PING_INTERVAL: Duration = Duration::from_secs(5); - -/// State about a particular path to another [`EndpointState`]. -/// -/// This state is used for both the relay path and any direct UDP paths. -/// -/// [`EndpointState`]: super::endpoint_state::EndpointState -#[derive(Debug, Clone)] -pub(super) struct PathState { - /// The endpoint for which this path exists. - endpoint_id: EndpointId, - /// The path this applies for. - path: SendAddr, - /// The last (outgoing) ping time. - pub(super) last_ping: Option, - - /// If non-zero, means that this was an endpoint that we learned about at runtime (from an - /// incoming ping). If so, we keep the time updated and use it to discard old candidates. - // NOTE: tx_id Originally added in tailscale due to . - last_got_ping: Option<(Instant, TransactionId)>, - - /// The time this endpoint was last advertised via a call-me-maybe DISCO message. - pub(super) call_me_maybe_time: Option, - - /// Tracks whether this path is valid. - /// - /// Also stores the latest [`PongReply`], if there is one. - /// - /// See [`PathValidity`] docs. - pub(super) validity: PathValidity, - /// When the last payload data was **received** via this path. - /// - /// This excludes DISCO messages. - pub(super) last_payload_msg: Option, - /// Sources is a map of [`Source`]s to [`Instant`]s, keeping track of all the ways we have - /// learned about this path - /// - /// We keep track of only the latest [`Instant`] for each [`Source`], keeping the size of - /// the map of sources down to one entry per type of source. - pub(super) sources: HashMap, -} - -impl PathState { - pub(super) fn new( - endpoint_id: EndpointId, - path: SendAddr, - source: Source, - now: Instant, - ) -> Self { - let mut sources = HashMap::new(); - sources.insert(source, now); - Self { - endpoint_id, - path, - last_ping: None, - last_got_ping: None, - call_me_maybe_time: None, - validity: PathValidity::empty(), - last_payload_msg: None, - sources, - } - } - - pub(super) fn with_last_payload( - endpoint_id: EndpointId, - path: SendAddr, - source: Source, - now: Instant, - ) -> Self { - let mut sources = HashMap::new(); - sources.insert(source, now); - PathState { - endpoint_id, - path, - last_ping: None, - last_got_ping: None, - call_me_maybe_time: None, - validity: PathValidity::empty(), - last_payload_msg: Some(now), - sources, - } - } - - pub(super) fn with_ping( - endpoint_id: EndpointId, - path: SendAddr, - tx_id: TransactionId, - source: Source, - now: Instant, - ) -> Self { - let mut new = PathState::new(endpoint_id, path, source, now); - new.handle_ping(tx_id, now); - new - } - - pub(super) fn add_pong_reply(&mut self, r: PongReply, metrics: &MagicsockMetrics) { - if let SendAddr::Udp(ref path) = self.path { - if self.validity.is_empty() { - event!( - target: "iroh::_events::holepunched", - Level::DEBUG, - remote_endpoint = %self.endpoint_id.fmt_short(), - path = ?path, - direction = "outgoing", - ); - } - } - - self.validity.update_pong(r.pong_at, r.latency); - - self.validity.record_metrics(metrics); - } - - pub(super) fn receive_payload(&mut self, now: Instant) { - self.last_payload_msg = Some(now); - self.validity - .receive_payload(now, path_validity::Source::QuicPayload); - } - - #[cfg(test)] - pub(super) fn with_pong_reply(endpoint_id: EndpointId, r: PongReply) -> Self { - PathState { - endpoint_id, - path: r.from.clone(), - last_ping: None, - last_got_ping: None, - call_me_maybe_time: None, - validity: PathValidity::new(r.pong_at, r.latency), - last_payload_msg: None, - sources: HashMap::new(), - } - } - - /// Check whether this path is considered active. - /// - /// Active means the path has received payload messages within the last - /// [`SESSION_ACTIVE_TIMEOUT`]. - /// - /// Note that a path might be alive but not active if it's contactable but not in - /// use. - pub(super) fn is_active(&self) -> bool { - self.last_payload_msg - .as_ref() - .map(|instant| instant.elapsed() <= SESSION_ACTIVE_TIMEOUT) - .unwrap_or(false) - } - - /// Returns the instant the last incoming ping was received. - pub(super) fn last_incoming_ping(&self) -> Option<&Instant> { - self.last_got_ping.as_ref().map(|(time, _tx_id)| time) - } - - /// Reports the last instant this path was considered alive. - /// - /// Alive means the path is considered in use by the remote endpoint. Either because we - /// received a payload message, a DISCO message (ping, pong) or it was advertised in a - /// call-me-maybe message. - /// - /// This is the most recent instant between: - /// - when last pong was received. - /// - when this path was last advertised in a received CallMeMaybe message. - /// - When the last payload transmission occurred. - /// - when the last ping from them was received. - pub(super) fn last_alive(&self) -> Option { - self.validity - .latest_pong() - .into_iter() - .chain(self.last_payload_msg) - .chain(self.call_me_maybe_time) - .chain(self.last_incoming_ping().cloned()) - .max() - } - - /// The last control or DISCO message **about** this path. - /// - /// This is the most recent instant among: - /// - when last pong was received. - /// - when this path was last advertised in a received CallMeMaybe message. - /// - when the last ping from them was received. - /// - /// Returns the time elapsed since the last control message, and the type of control message. - pub(super) fn last_control_msg(&self, now: Instant) -> Option<(Duration, ControlMsg)> { - // get every control message and assign it its kind - let last_pong = self - .validity - .latest_pong() - .map(|pong_at| (pong_at, ControlMsg::Pong)); - let last_call_me_maybe = self - .call_me_maybe_time - .as_ref() - .map(|call_me| (*call_me, ControlMsg::CallMeMaybe)); - let last_ping = self - .last_incoming_ping() - .map(|ping| (*ping, ControlMsg::Ping)); - - last_pong - .into_iter() - .chain(last_call_me_maybe) - .chain(last_ping) - .max_by_key(|(instant, _kind)| *instant) - .map(|(instant, kind)| (now.duration_since(instant), kind)) - } - - /// Returns the latency from the most recent pong, if available. - pub(super) fn latency(&self) -> Option { - self.validity.latency() - } - - pub(super) fn needs_ping(&self, now: &Instant) -> bool { - match self.last_ping { - None => true, - Some(last_ping) => { - let elapsed = now.duration_since(last_ping); - - // TODO: remove! - // This logs "ping is too new" for each send whenever the endpoint does *not* need - // a ping. Pretty sure this is not a useful log, but maybe there was a reason? - // if !needs_ping { - // debug!("ping is too new: {}ms", elapsed.as_millis()); - // } - elapsed > DISCO_PING_INTERVAL - } - } - } - - pub(super) fn handle_ping(&mut self, tx_id: TransactionId, now: Instant) -> PingRole { - if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) { - PingRole::Duplicate - } else { - let prev = self.last_got_ping.replace((now, tx_id)); - let heartbeat_deadline = HEARTBEAT_INTERVAL + (HEARTBEAT_INTERVAL / 2); - match prev { - Some((prev_time, _tx)) if now.duration_since(prev_time) <= heartbeat_deadline => { - PingRole::LikelyHeartbeat - } - Some((prev_time, _tx)) => { - debug!( - elapsed = ?now.duration_since(prev_time), - "heartbeat missed, reactivating", - ); - PingRole::Activate - } - None => { - if let SendAddr::Udp(ref addr) = self.path { - event!( - target: "iroh::_events::holepunched", - Level::DEBUG, - remote_endpoint = %self.endpoint_id.fmt_short(), - path = ?addr, - direction = "incoming", - ); - } - PingRole::Activate - } - } - } - } - - pub(super) fn add_source(&mut self, source: Source, now: Instant) { - self.sources.insert(source, now); - } - - pub(super) fn clear(&mut self) { - self.last_ping = None; - self.last_got_ping = None; - self.call_me_maybe_time = None; - self.validity = PathValidity::empty(); - } - - fn summary(&self, mut w: impl std::fmt::Write) -> std::fmt::Result { - write!(w, "{{ ")?; - if self.is_active() { - write!(w, "active ")?; - } - if let Some(pong_at) = self.validity.latest_pong() { - write!(w, "pong-received({:?} ago) ", pong_at.elapsed())?; - } - if let Some(when) = self.last_incoming_ping() { - write!(w, "ping-received({:?} ago) ", when.elapsed())?; - } - if let Some(ref when) = self.last_ping { - write!(w, "ping-sent({:?} ago) ", when.elapsed())?; - } - if let Some(last_source) = self.sources.iter().max_by_key(|&(_, instant)| instant) { - write!( - w, - "last-source: {}({:?} ago)", - last_source.0, - last_source.1.elapsed() - )?; - } - write!(w, "}}") - } -} - -// TODO: Make an `EndpointPaths` struct and do things nicely. -pub(super) fn summarize_endpoint_paths(paths: &BTreeMap) -> String { - use std::fmt::Write; - - let mut w = String::new(); - write!(&mut w, "[").ok(); - for (i, (ipp, state)) in paths.iter().enumerate() { - if i > 0 { - write!(&mut w, ", ").ok(); - } - write!(&mut w, "{ipp}").ok(); - state.summary(&mut w).ok(); - } - write!(&mut w, "]").ok(); - w -} diff --git a/iroh/src/magicsock/endpoint_map/path_validity.rs b/iroh/src/magicsock/endpoint_map/path_validity.rs deleted file mode 100644 index 569b994377f..00000000000 --- a/iroh/src/magicsock/endpoint_map/path_validity.rs +++ /dev/null @@ -1,433 +0,0 @@ -use n0_future::time::{Duration, Instant}; - -use crate::magicsock::Metrics as MagicsockMetrics; - -/// How long we trust a UDP address as the exclusive path (i.e. without also sending via the relay). -/// -/// Trust for a UDP address begins when we receive a DISCO UDP pong on that address. -/// It is then further extended by this duration every time we receive QUIC payload data while it's -/// currently trusted. -/// -/// If trust goes away, it can be brought back with another valid DISCO UDP pong. -const TRUST_UDP_ADDR_DURATION: Duration = Duration::from_millis(6500); - -/// Tracks a path's validity. -/// -/// A path is valid: -/// - For [`Source::trust_duration`] after a successful [`PongReply`]. -/// - For [`Source::trust_duration`] longer starting at the most recent -/// received application payload *while the path was valid*. -/// -/// [`PongReply`]: super::endpoint_state::PongReply -#[derive(Debug, Clone, Default)] -pub(super) struct PathValidity(Option); - -#[derive(Debug, Clone)] -struct Inner { - latest_pong: Instant, - latency: Duration, - trust_until: Instant, - congestion_metrics: CongestionMetrics, -} - -/// Congestion tracking for a UDP path. -#[derive(Debug, Default, Clone)] -struct CongestionMetrics { - /// Rolling window of recent latency measurements (stores up to 8 samples). - latency_samples: [Option; 8], - /// Index for next sample insertion (circular buffer). - sample_index: usize, - /// Total pings sent on this path. - pings_sent: u32, - /// Total pongs received on this path. - pongs_received: u32, -} - -impl CongestionMetrics { - fn add_latency_sample(&mut self, latency: Duration) { - self.latency_samples[self.sample_index] = Some(latency); - self.sample_index = (self.sample_index + 1) % self.latency_samples.len(); - self.pongs_received = self.pongs_received.saturating_add(1); - } - - fn record_ping_sent(&mut self) { - self.pings_sent = self.pings_sent.saturating_add(1); - } - - /// Calculate packet loss rate (0.0 to 1.0). - fn packet_loss_rate(&self) -> f64 { - if self.pings_sent == 0 { - return 0.0; - } - let lost = self.pings_sent.saturating_sub(self.pongs_received); - lost as f64 / self.pings_sent as f64 - } - - /// Calculate RTT variance as a congestion indicator. - /// Higher variance suggests congestion or unstable path. - fn rtt_variance(&self) -> Option { - let samples: Vec = self.latency_samples.iter().filter_map(|&s| s).collect(); - - if samples.len() < 2 { - return None; - } - - let mean = samples.iter().sum::() / samples.len() as u32; - let variance: f64 = samples - .iter() - .map(|&s| { - let diff = s.as_secs_f64() - mean.as_secs_f64(); - diff * diff - }) - .sum::() - / samples.len() as f64; - - Some(Duration::from_secs_f64(variance.sqrt())) - } - - /// Calculate average latency from recent samples. - #[cfg(test)] - fn avg_latency(&self) -> Option { - let samples: Vec = self.latency_samples.iter().filter_map(|&s| s).collect(); - - if samples.is_empty() { - return None; - } - - Some(samples.iter().sum::() / samples.len() as u32) - } - - /// Path quality score (0.0 = worst, 1.0 = best). - /// Factors in packet loss and RTT variance. - fn quality_score(&self) -> f64 { - let packet_loss = self.packet_loss_rate(); - - // Defensive: packet_loss should never exceed 1.0, but clamp just in case - if packet_loss > 1.0 { - tracing::warn!( - packet_loss, - pings_sent = self.pings_sent, - pongs_received = self.pongs_received, - "packet loss rate exceeded 1.0 - possible bug in tracking" - ); - } - let loss_penalty = (1.0 - packet_loss).clamp(0.0, 1.0); - - // Penalize high RTT variance - let variance_penalty = match self.rtt_variance() { - Some(var) if var.as_millis() > 50 => 0.7, - Some(var) if var.as_millis() > 20 => 0.85, - Some(_) => 1.0, - None => 1.0, - }; - - loss_penalty * variance_penalty - } -} - -#[derive(Debug)] -pub(super) enum Source { - ReceivedPong, - QuicPayload, -} - -impl Source { - fn trust_duration(&self) -> Duration { - match self { - Source::ReceivedPong => TRUST_UDP_ADDR_DURATION, - Source::QuicPayload => TRUST_UDP_ADDR_DURATION, - } - } -} - -impl PathValidity { - pub(super) fn new(pong_at: Instant, latency: Duration) -> Self { - let mut metrics = CongestionMetrics::default(); - // Account for the ping that must have been sent to receive this pong - metrics.record_ping_sent(); - metrics.add_latency_sample(latency); - Self(Some(Inner { - trust_until: pong_at + Source::ReceivedPong.trust_duration(), - latest_pong: pong_at, - latency, - congestion_metrics: metrics, - })) - } - - /// Update with a new pong, preserving congestion history. - pub(super) fn update_pong(&mut self, pong_at: Instant, latency: Duration) { - match &mut self.0 { - Some(inner) => { - inner.trust_until = pong_at + Source::ReceivedPong.trust_duration(); - inner.latest_pong = pong_at; - inner.latency = latency; - inner.congestion_metrics.add_latency_sample(latency); - } - None => { - *self = Self::new(pong_at, latency); - } - } - } - - pub(super) fn empty() -> Self { - Self(None) - } - - pub(super) fn is_empty(&self) -> bool { - self.0.is_none() - } - - pub(super) fn is_valid(&self, now: Instant) -> bool { - let Some(state) = self.0.as_ref() else { - return false; - }; - - state.is_valid(now) - } - - pub(super) fn latency_if_valid(&self, now: Instant) -> Option { - let state = self.0.as_ref()?; - state.is_valid(now).then_some(state.latency) - } - - pub(super) fn is_outdated(&self, now: Instant) -> bool { - let Some(state) = self.0.as_ref() else { - return false; - }; - - // We *used* to be valid, but are now outdated. - // This happens when we had a DISCO pong but didn't receive - // any payload data or further pongs for at least TRUST_UDP_ADDR_DURATION - state.is_outdated(now) - } - - pub(super) fn latency_if_outdated(&self, now: Instant) -> Option { - let state = self.0.as_ref()?; - state.is_outdated(now).then_some(state.latency) - } - - /// Reconfirms path validity, if a payload was received while the - /// path was valid. - pub(super) fn receive_payload(&mut self, now: Instant, source: Source) { - let Some(state) = self.0.as_mut() else { - return; - }; - - if state.is_valid(now) { - state.trust_until = now + source.trust_duration(); - } - } - - pub(super) fn latency(&self) -> Option { - Some(self.0.as_ref()?.latency) - } - - pub(super) fn latest_pong(&self) -> Option { - Some(self.0.as_ref()?.latest_pong) - } - - /// Record that a ping was sent on this path. - pub(super) fn record_ping_sent(&mut self) { - if let Some(state) = self.0.as_mut() { - state.congestion_metrics.record_ping_sent(); - } - } - - /// Get the path quality score (0.0 = worst, 1.0 = best). - #[cfg(test)] - pub(super) fn quality_score(&self) -> f64 { - self.0 - .as_ref() - .map(|state| state.congestion_metrics.quality_score()) - .unwrap_or(0.0) - } - - /// Get packet loss rate for this path. - #[cfg(test)] - pub(super) fn packet_loss_rate(&self) -> f64 { - self.0 - .as_ref() - .map(|state| state.congestion_metrics.packet_loss_rate()) - .unwrap_or(0.0) - } - - /// Get RTT variance as congestion indicator. - #[cfg(test)] - pub(super) fn rtt_variance(&self) -> Option { - self.0 - .as_ref() - .and_then(|state| state.congestion_metrics.rtt_variance()) - } - - /// Get average latency from recent samples. - #[cfg(test)] - pub(super) fn avg_latency(&self) -> Option { - self.0 - .as_ref() - .and_then(|state| state.congestion_metrics.avg_latency()) - } - - /// Record congestion metrics to the metrics system. - /// Should be called periodically or on significant events. - pub(super) fn record_metrics(&self, metrics: &MagicsockMetrics) { - let Some(state) = self.0.as_ref() else { - return; - }; - - let loss_rate = state.congestion_metrics.packet_loss_rate(); - metrics.path_packet_loss_rate.observe(loss_rate); - - if let Some(variance) = state.congestion_metrics.rtt_variance() { - metrics - .path_rtt_variance_ms - .observe(variance.as_millis() as f64); - } - - let quality = state.congestion_metrics.quality_score(); - metrics.path_quality_score.observe(quality); - - let latency_ms = state.latency.as_secs_f64() * 1000.0; - metrics.connection_latency_ms.observe(latency_ms); - } -} - -impl Inner { - fn is_valid(&self, now: Instant) -> bool { - self.latest_pong <= now && now < self.trust_until - } - - fn is_outdated(&self, now: Instant) -> bool { - self.latest_pong <= now && self.trust_until <= now - } -} - -#[cfg(test)] -mod tests { - use n0_future::time::{Duration, Instant}; - - use super::{PathValidity, Source, TRUST_UDP_ADDR_DURATION}; - - #[tokio::test(start_paused = true)] - async fn test_basic_path_validity_lifetime() { - let mut validity = PathValidity(None); - assert!(!validity.is_valid(Instant::now())); - assert!(!validity.is_outdated(Instant::now())); - - validity = PathValidity::new(Instant::now(), Duration::from_millis(20)); - assert!(validity.is_valid(Instant::now())); - assert!(!validity.is_outdated(Instant::now())); - - tokio::time::advance(TRUST_UDP_ADDR_DURATION / 2).await; - assert!(validity.is_valid(Instant::now())); - assert!(!validity.is_outdated(Instant::now())); - - validity.receive_payload(Instant::now(), Source::QuicPayload); - assert!(validity.is_valid(Instant::now())); - assert!(!validity.is_outdated(Instant::now())); - - tokio::time::advance(TRUST_UDP_ADDR_DURATION / 2).await; - assert!(validity.is_valid(Instant::now())); - assert!(!validity.is_outdated(Instant::now())); - - tokio::time::advance(TRUST_UDP_ADDR_DURATION / 2).await; - assert!(!validity.is_valid(Instant::now())); - assert!(validity.is_outdated(Instant::now())); - } - #[tokio::test] - async fn test_congestion_metrics() { - let mut validity = PathValidity::new(Instant::now(), Duration::from_millis(10)); - // new() initializes with pings_sent=1, pongs_received=1 - - // Record some additional ping sends - validity.record_ping_sent(); - validity.record_ping_sent(); - validity.record_ping_sent(); - // Now: pings_sent=4, pongs_received=1 - - validity.update_pong(Instant::now(), Duration::from_millis(15)); - // Now: pings_sent=4, pongs_received=2 - - // Packet loss should be (4-2)/4 = 0.5 - let loss_rate = validity.packet_loss_rate(); - assert!((loss_rate - 0.5).abs() < 0.01); - - // Quality score should be reduced due to packet loss - let quality = validity.quality_score(); - assert!(quality < 1.0); - assert!(quality > 0.45); // Should still be relatively good (1.0 - 0.5 = 0.5) - } - - #[tokio::test] - async fn test_congestion_rtt_variance() { - let mut validity = PathValidity::new(Instant::now(), Duration::from_millis(10)); - - // Add varying latencies - validity.update_pong(Instant::now(), Duration::from_millis(10)); - validity.update_pong(Instant::now(), Duration::from_millis(50)); - validity.update_pong(Instant::now(), Duration::from_millis(20)); - validity.update_pong(Instant::now(), Duration::from_millis(40)); - - // Should have variance - let variance = validity.rtt_variance(); - assert!(variance.is_some()); - assert!(variance.unwrap().as_millis() > 0); - - // Average latency should be around 30ms - let avg = validity.avg_latency(); - assert!(avg.is_some()); - let avg_ms = avg.unwrap().as_millis(); - assert!((25..=35).contains(&avg_ms)); - } - - #[tokio::test] - async fn test_quality_score_with_high_variance() { - let mut validity = PathValidity::new(Instant::now(), Duration::from_millis(10)); - - // Add highly varying latencies (simulating congestion) - for i in 0..8 { - let latency = if i % 2 == 0 { - Duration::from_millis(10) - } else { - Duration::from_millis(100) - }; - validity.update_pong(Instant::now(), latency); - validity.record_ping_sent(); - } - - // Quality should be penalized due to high variance - let quality = validity.quality_score(); - assert!(quality < 0.9); // Should be penalized - } - - #[tokio::test] - async fn test_connection_latency_histogram() { - use crate::magicsock::Metrics as MagicsockMetrics; - - let metrics = MagicsockMetrics::default(); - let mut validity = PathValidity::new(Instant::now(), Duration::from_millis(10)); - - validity.record_metrics(&metrics); - assert_eq!(metrics.connection_latency_ms.count(), 1); - - validity.update_pong(Instant::now(), Duration::from_millis(25)); - validity.record_metrics(&metrics); - assert_eq!(metrics.connection_latency_ms.count(), 2); - - validity.update_pong(Instant::now(), Duration::from_millis(50)); - validity.record_metrics(&metrics); - assert_eq!(metrics.connection_latency_ms.count(), 3); - - validity.update_pong(Instant::now(), Duration::from_millis(100)); - validity.record_metrics(&metrics); - assert_eq!(metrics.connection_latency_ms.count(), 4); - - let buckets = metrics.connection_latency_ms.buckets(); - assert!(!buckets.is_empty()); - - let p50 = metrics.connection_latency_ms.percentile(0.5); - assert!(p50 > 10.0 && p50 < 100.0); - - let p95 = metrics.connection_latency_ms.percentile(0.95); - assert!(p95 >= p50); - } -} diff --git a/iroh/src/magicsock/endpoint_map/udp_paths.rs b/iroh/src/magicsock/endpoint_map/udp_paths.rs deleted file mode 100644 index a378ed0bdf2..00000000000 --- a/iroh/src/magicsock/endpoint_map/udp_paths.rs +++ /dev/null @@ -1,249 +0,0 @@ -//! Path state for UDP addresses of a single peer endpoint. -//! -//! This started as simply moving the [`EndpointState`]'s `direct_addresses` and `best_addr` -//! into one place together. The aim is for external places to not directly interact with -//! the inside and instead only notifies this struct of state changes to each path. -//! -//! [`EndpointState`]: super::endpoint_state::EndpointState -use std::{collections::BTreeMap, net::SocketAddr}; - -use n0_future::time::Instant; -use tracing::{Level, event}; - -use super::{IpPort, path_state::PathState}; - -/// The address on which to send datagrams over UDP. -/// -/// The [`MagicSock`] sends packets to zero or one UDP address, depending on the known paths -/// to the remote endpoint. This conveys the UDP address to send on from the [`EndpointUdpPaths`] -/// to the [`EndpointState`]. -/// -/// [`EndpointUdpPaths`] contains all the UDP path states, while [`EndpointState`] has to decide the -/// bigger picture including the relay server. -/// -/// See [`EndpointUdpPaths::send_addr`]. -/// -/// [`MagicSock`]: crate::magicsock::MagicSock -/// [`EndpointState`]: super::endpoint_state::EndpointState -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -pub(super) enum UdpSendAddr { - /// The UDP address can be relied on to deliver data to the remote endpoint. - /// - /// This means this path is usable with a reasonable latency and can be fully trusted to - /// transport payload data to the remote endpoint. - Valid(SocketAddr), - /// The UDP address is highly likely to work, but has not been used for a while. - /// - /// The path should be usable but has not carried DISCO or payload data for a little too - /// long. It is best to also use a backup, i.e. relay, path if possible. - Outdated(SocketAddr), - /// The UDP address is not known to work, but it might. - /// - /// We know this UDP address belongs to the remote endpoint, but we do not know if the path - /// already works or may need holepunching before it will start to work. It might even - /// never work. It is still useful to send to this together with backup path, - /// i.e. relay, in case the path works: if the path does not need holepunching it might - /// be much faster. And if there is no relay path at all it might be the only way to - /// establish a connection. - Unconfirmed(SocketAddr), - /// No known UDP path exists to the remote endpoint. - #[default] - None, -} - -impl UdpSendAddr { - pub fn get_addr(&self) -> Option { - match self { - UdpSendAddr::Valid(addr) - | UdpSendAddr::Outdated(addr) - | UdpSendAddr::Unconfirmed(addr) => Some(*addr), - UdpSendAddr::None => None, - } - } -} - -/// The UDP paths for a single endpoint. -/// -/// Paths are identified by the [`IpPort`] of their UDP address. -/// -/// Initially this collects two structs directly from the [`EndpointState`] into one place, -/// leaving the APIs and astractions the same. The goal is that this slowly migrates -/// directly interacting with this data into only receiving [`PathState`] updates. This -/// will consolidate the logic of direct path selection and make this simpler to reason -/// about. However doing that all at once is too large a refactor. -/// -/// [`EndpointState`]: super::endpoint_state::EndpointState -#[derive(Debug, Default)] -pub(super) struct EndpointUdpPaths { - /// The state for each of this endpoint's direct paths. - paths: BTreeMap, - /// The current address we use to send on. - /// - /// This is *almost* the same as going through `paths` and finding - /// the best one, except that this is - /// 1. Not updated in `send_addr`, but instead when there's changes to `paths`, so that `send_addr` can take `&self`. - /// 2. Slightly sticky: It only changes when - /// - the current send addr is not a validated path anymore or - /// - we received a pong with lower latency. - best: UdpSendAddr, - /// The current best address to send on from all IPv4 addresses we have available. - /// - /// Follows the same logic as `best` above, but doesn't include any IPv6 addresses. - best_ipv4: UdpSendAddr, -} - -pub(super) struct MutAccess<'a> { - now: Instant, - inner: &'a mut EndpointUdpPaths, -} - -impl<'a> MutAccess<'a> { - pub fn paths(&mut self) -> &mut BTreeMap { - &mut self.inner.paths - } - - pub fn has_best_addr_changed(self) -> bool { - let changed = self.inner.update_to_best_addr(self.now); - std::mem::forget(self); // don't run drop - changed - } -} - -impl Drop for MutAccess<'_> { - fn drop(&mut self) { - self.inner.update_to_best_addr(self.now); - } -} - -impl EndpointUdpPaths { - pub(super) fn new() -> Self { - Default::default() - } - - #[cfg(test)] - pub(super) fn from_parts(paths: BTreeMap, best: UdpSendAddr) -> Self { - Self { - paths, - best_ipv4: best, // we only use ipv4 addrs in tests - best, - } - } - - /// Returns the current UDP address to send on. - pub(super) fn send_addr(&self, have_ipv6: bool) -> &UdpSendAddr { - if !have_ipv6 { - // If it's a valid address, it doesn't matter if our interface scan determined that we - // "probably" don't have IPv6, because we clearly were able to send and receive a ping/pong over IPv6. - if matches!(&self.best, UdpSendAddr::Valid(_)) { - return &self.best; - } - return &self.best_ipv4; - } - &self.best - } - - /// Returns a guard for accessing the inner paths mutably. - /// - /// This guard ensures that [`Self::send_addr`] will be updated on drop. - pub(super) fn access_mut(&mut self, now: Instant) -> MutAccess<'_> { - MutAccess { now, inner: self } - } - - /// Returns immutable access to the inner paths. - pub(super) fn paths(&self) -> &BTreeMap { - &self.paths - } - - /// Changes the current best address(es) to ones chosen as described in [`Self::best_addr`] docs. - /// - /// Returns whether one of the best addresses had to change. - /// - /// This should be called any time that `paths` is modified. - fn update_to_best_addr(&mut self, now: Instant) -> bool { - let best_ipv4 = self.best_addr(false, now); - let best = self.best_addr(true, now); - let mut changed = false; - if best_ipv4 != self.best_ipv4 { - event!( - target: "iroh::_events::udp::best_ipv4", - Level::DEBUG, - ?best_ipv4, - ); - changed = true; - } - if best != self.best { - event!( - target: "iroh::_events::udp::best", - Level::DEBUG, - ?best, - ); - changed = true; - } - self.best_ipv4 = best_ipv4; - self.best = best; - changed - } - - /// Returns the current best address of all available paths, ignoring - /// the currently chosen best address. - /// - /// We try to find the lowest latency [`UdpSendAddr::Valid`], if one exists, otherwise - /// we try to find the lowest latency [`UdpSendAddr::Outdated`], if one exists, otherwise - /// we return essentially an arbitrary [`UdpSendAddr::Unconfirmed`]. - /// - /// If we don't have any addresses, returns [`UdpSendAddr::None`]. - /// - /// If `have_ipv6` is false, we only search among ipv4 candidates. - fn best_addr(&self, have_ipv6: bool, now: Instant) -> UdpSendAddr { - let Some((ipp, path)) = self - .paths - .iter() - .filter(|(ipp, _)| have_ipv6 || ipp.ip.is_ipv4()) - .max_by_key(|(ipp, path)| { - // We find the best by sorting on a key of type (Option>, Option>, bool) - // where the first is set to Some(ReverseOrd(latency)) iff path.is_valid(now) and - // the second is set to Some(ReverseOrd(latency)) if path.is_outdated(now) and - // the third is set to whether the ipp is ipv6. - // This makes max_by_key sort for the lowest valid latency first, then sort for - // the lowest outdated latency second, and if latencies are equal, it'll sort IPv6 paths first. - let is_ipv6 = ipp.ip.is_ipv6(); - if let Some(latency) = path.validity.latency_if_valid(now) { - (Some(ReverseOrd(latency)), None, is_ipv6) - } else if let Some(latency) = path.validity.latency_if_outdated(now) { - (None, Some(ReverseOrd(latency)), is_ipv6) - } else { - (None, None, is_ipv6) - } - }) - else { - return UdpSendAddr::None; - }; - - if path.validity.is_valid(now) { - UdpSendAddr::Valid((*ipp).into()) - } else if path.validity.is_outdated(now) { - UdpSendAddr::Outdated((*ipp).into()) - } else { - UdpSendAddr::Unconfirmed((*ipp).into()) - } - } -} - -/// Implements the reverse [`Ord`] implementation for the wrapped type. -/// -/// Literally calls [`std::cmp::Ordering::reverse`] on the inner value's -/// ordering. -#[derive(PartialEq, Eq)] -struct ReverseOrd(N); - -impl Ord for ReverseOrd { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.0.cmp(&other.0).reverse() - } -} - -impl PartialOrd for ReverseOrd { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} diff --git a/iroh/src/magicsock/mapped_addrs.rs b/iroh/src/magicsock/mapped_addrs.rs new file mode 100644 index 00000000000..f12482f23f5 --- /dev/null +++ b/iroh/src/magicsock/mapped_addrs.rs @@ -0,0 +1,344 @@ +//! The various mapped addresses we use. + +//! We use non-IP transports to carry datagrams. Yet Quinn needs to address those +//! transports using IPv6 addresses. These defines mappings of several IPv6 Unique Local +//! Address ranges we use to keep track of the various "fake" address types we use. + +use std::{ + fmt, + hash::Hash, + net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}, + sync::{ + Arc, + atomic::{AtomicU64, Ordering}, + }, +}; + +use iroh_base::{EndpointId, RelayUrl}; +use n0_error::{e, stack_error}; +use rustc_hash::FxHashMap; +use tracing::{error, trace}; + +use super::transports; + +/// The Prefix/L of all Unique Local Addresses. +const ADDR_PREFIXL: u8 = 0xfd; + +/// The Global ID used in n0's Unique Local Addresses. +const ADDR_GLOBAL_ID: [u8; 5] = [21, 7, 10, 81, 11]; + +/// The Subnet ID for [`RelayMappedAddr]. +const RELAY_MAPPED_SUBNET: [u8; 2] = [0, 1]; + +/// The Subnet ID for [`EndpointIdMappedAddr`]. +const ENDPOINT_ID_SUBNET: [u8; 2] = [0; 2]; + +/// A default fake addr, using the maximum addr that the internal fake addrs could be using. +pub const DEFAULT_FAKE_ADDR: SocketAddrV6 = SocketAddrV6::new( + Ipv6Addr::new( + u16::from_be_bytes([ADDR_PREFIXL, 21]), + u16::from_be_bytes([7, 10]), + u16::from_be_bytes([81, 11]), + u16::from_be_bytes([0, 0]), + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + ), + MAPPED_PORT, + 0, + 0, +); + +/// The dummy port used for all mapped addresses. +/// +/// We map each entity, usually an [`EndpointId`], to an IPv6 address. But socket addresses +/// involve ports, so we use a dummy fixed port when creating socket addresses. +const MAPPED_PORT: u16 = 12345; + +/// Counter to always generate unique addresses for [`RelayMappedAddr`]. +static RELAY_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1); + +/// Counter to always generate unique addresses for [`EndpointIdMappedAddr`]. +static ENDPOINT_ID_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1); + +/// Generic mapped address. +/// +/// Allows implementing [`AddrMap`]. +pub(crate) trait MappedAddr { + /// Generates a new mapped address in the IPv6 Unique Local Address space. + fn generate() -> Self; + + /// Returns a consistent [`SocketAddr`] for the mapped addr. + /// + /// This socket address does not have a routable IP address. It uses a fake but + /// consistent port number, since the port does not play a role in the addressing. This + /// socket address is only to be used to pass into Quinn. + fn private_socket_addr(&self) -> SocketAddr; +} + +/// An enum encompassing all the mapped and unmapped addresses. +/// +/// This is essentially a slightly-stronger typed version of the IPv6 mapped addresses that +/// we use on the Quinn side. It categorises the addressed in what kind of mapped or +/// unmapped addresses they are. +/// +/// It does not guarantee that a mapped address exists in the mapping. Or that a particular +/// address is even supported on this platform. Hence no wasm exceptions here. +#[derive(Clone, Debug)] +pub(crate) enum MultipathMappedAddr { + /// An address for a [`EndpointId`], via one or more paths. + Mixed(EndpointIdMappedAddr), + /// An address for a particular [`EndpointId`] via a particular relay. + Relay(RelayMappedAddr), + /// An IP based transport address. + Ip(SocketAddr), +} + +impl From for MultipathMappedAddr { + fn from(value: SocketAddr) -> Self { + match value.ip() { + IpAddr::V4(_) => Self::Ip(value), + IpAddr::V6(addr) => { + if let Ok(addr) = EndpointIdMappedAddr::try_from(addr) { + return Self::Mixed(addr); + } + if let Ok(addr) = RelayMappedAddr::try_from(addr) { + return Self::Relay(addr); + } + Self::Ip(value) + } + } + } +} + +/// An address used to address a endpoint on any or all paths. +/// +/// This is only used for initially connecting to a remote endpoint. We instruct Quinn to +/// send to this address, and duplicate all packets for this address to send on all paths we +/// might want to send the initial on: +/// +/// - If this the first connection to the remote endpoint we don't know which path will work +/// and send to all of them. +/// +/// - If there already is an active connection to this endpoint we now which path to use. +/// +/// It is but a newtype around an IPv6 Unique Local Addr. And in our QUIC-facing socket +/// APIs like [`quinn::AsyncUdpSocket`] it comes in as the inner [`Ipv6Addr`], in those +/// interfaces we have to be careful to do the conversion to this type. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub(crate) struct EndpointIdMappedAddr(Ipv6Addr); + +impl MappedAddr for EndpointIdMappedAddr { + /// Generates a globally unique fake UDP address. + /// + /// This generates and IPv6 Unique Local Address according to RFC 4193. + fn generate() -> Self { + let mut addr = [0u8; 16]; + addr[0] = ADDR_PREFIXL; + addr[1..6].copy_from_slice(&ADDR_GLOBAL_ID); + addr[6..8].copy_from_slice(&ENDPOINT_ID_SUBNET); + + let counter = ENDPOINT_ID_ADDR_COUNTER.fetch_add(1, Ordering::Relaxed); + addr[8..16].copy_from_slice(&counter.to_be_bytes()); + + Self(Ipv6Addr::from(addr)) + } + + /// Returns a consistent [`SocketAddr`] for the [`EndpointIdMappedAddr`]. + /// + /// This socket address does not have a routable IP address and port. + /// + /// This uses a made-up port number, since the port does not play a role in the + /// addressing. This socket address is only to be used to pass into Quinn. + fn private_socket_addr(&self) -> SocketAddr { + SocketAddr::new(IpAddr::from(self.0), MAPPED_PORT) + } +} + +impl std::fmt::Display for EndpointIdMappedAddr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "EndpointIdMappedAddr({})", self.0) + } +} + +impl TryFrom for EndpointIdMappedAddr { + type Error = EndpointIdMappedAddrError; + + fn try_from(value: Ipv6Addr) -> Result { + let octets = value.octets(); + if octets[0] == ADDR_PREFIXL + && octets[1..6] == ADDR_GLOBAL_ID + && octets[6..8] == ENDPOINT_ID_SUBNET + { + return Ok(Self(value)); + } + Err(e!(EndpointIdMappedAddrError)) + } +} + +/// Can occur when converting a [`SocketAddr`] to an [`EndpointIdMappedAddr`] +#[stack_error(derive, add_meta)] +#[error("Failed to convert")] +pub(crate) struct EndpointIdMappedAddrError; + +/// An Ipv6 ULA address, identifying a relay path for a [`EndpointId`]. +/// +/// Since iroh endpoint are reachable via a relay server we have a network path indicated by +/// the `(EndpointId, RelayUrl)`. However Quinn can only handle socket addresses, so we use +/// IPv6 addresses in a private IPv6 Unique Local Address range, which map to a unique +/// `(EndointId, RelayUrl)` pair. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub(crate) struct RelayMappedAddr(Ipv6Addr); + +impl MappedAddr for RelayMappedAddr { + /// Generates a globally unique fake UDP address. + /// + /// This generates a new IPv6 address in the Unique Local Address range (RFC 4193) + /// which is recognised by iroh as an IP mapped address. + fn generate() -> Self { + let mut addr = [0u8; 16]; + addr[0] = ADDR_PREFIXL; + addr[1..6].copy_from_slice(&ADDR_GLOBAL_ID); + addr[6..8].copy_from_slice(&RELAY_MAPPED_SUBNET); + + let counter = RELAY_ADDR_COUNTER.fetch_add(1, Ordering::Relaxed); + addr[8..16].copy_from_slice(&counter.to_be_bytes()); + + Self(Ipv6Addr::from(addr)) + } + + /// Returns a consistent [`SocketAddr`] for the [`RelayMappedAddr`]. + /// + /// This socket address does not have a routable IP address and port. + /// + /// This uses a made-up port number, since the port does not play a role in the + /// addressing. This socket address is only to be used to pass into Quinn. + fn private_socket_addr(&self) -> SocketAddr { + SocketAddr::new(IpAddr::from(self.0), MAPPED_PORT) + } +} + +impl TryFrom for RelayMappedAddr { + type Error = RelayMappedAddrError; + + fn try_from(value: Ipv6Addr) -> std::result::Result { + let octets = value.octets(); + if octets[0] == ADDR_PREFIXL + && octets[1..6] == ADDR_GLOBAL_ID + && octets[6..8] == RELAY_MAPPED_SUBNET + { + return Ok(Self(value)); + } + Err(e!(RelayMappedAddrError)) + } +} + +/// Can occur when converting a [`SocketAddr`] to an [`RelayMappedAddr`] +#[stack_error(derive, add_meta)] +#[error("Failed to convert")] +pub(crate) struct RelayMappedAddrError; + +impl std::fmt::Display for RelayMappedAddr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "RelayMappedAddr({})", self.0) + } +} + +/// A bi-directional map between a key and a [`MappedAddr`]. +#[derive(Debug, Clone)] +pub(super) struct AddrMap { + inner: Arc>>, +} + +// Manual impl because derive ends up requiring T: Default. +impl Default for AddrMap { + fn default() -> Self { + Self { + inner: Default::default(), + } + } +} + +impl AddrMap +where + K: Eq + Hash + Clone + fmt::Debug, + V: MappedAddr + Eq + Hash + Copy + fmt::Debug, +{ + /// Returns the [`MappedAddr`], generating one if needed. + pub(super) fn get(&self, key: &K) -> V { + let mut inner = self.inner.lock().expect("poisoned"); + match inner.addrs.get(key) { + Some(addr) => *addr, + None => { + let addr = V::generate(); + inner.addrs.insert(key.clone(), addr); + inner.lookup.insert(addr, key.clone()); + trace!(?addr, ?key, "generated new addr"); + addr + } + } + } + + /// Performs the reverse lookup. + pub(super) fn lookup(&self, addr: &V) -> Option { + let inner = self.inner.lock().expect("poisoned"); + inner.lookup.get(addr).cloned() + } +} + +#[derive(Debug)] +struct AddrMapInner { + addrs: FxHashMap, + lookup: FxHashMap, +} + +// Manual impl because derive ends up requiring T: Default. +impl Default for AddrMapInner { + fn default() -> Self { + Self { + addrs: Default::default(), + lookup: Default::default(), + } + } +} + +/// Functions for the relay mapped address map. +impl AddrMap<(RelayUrl, EndpointId), RelayMappedAddr> { + /// Converts a mapped socket address to a transport address. + /// + /// This takes a socket address, converts it into a [`MultipathMappedAddr`] and then tries + /// to convert the mapped address into a [`transports::Addr`]. + /// + /// Returns `Some` with the transport address for IP mapped addresses and for relay mapped + /// addresses if an entry for the mapped address exists in `self`. + /// + /// Returns `None` and emits an error log if the mapped address is a [`MultipathMappedAddr::Mixed`], + /// or if the mapped address is a [`MultipathMappedAddr::Relay`] and `self` does not contain the + /// mapped address. + pub(crate) fn to_transport_addr( + &self, + addr: impl Into, + ) -> Option { + match addr.into() { + MultipathMappedAddr::Mixed(_) => { + error!( + "Failed to convert addr to transport addr: Mixed mapped addr has no transport address" + ); + None + } + MultipathMappedAddr::Relay(relay_mapped_addr) => { + match self.lookup(&relay_mapped_addr) { + Some(parts) => Some(transports::Addr::from(parts)), + None => { + error!( + "Failed to convert addr to transport addr: Unknown relay mapped addr" + ); + None + } + } + } + MultipathMappedAddr::Ip(addr) => Some(transports::Addr::from(addr)), + } + } +} diff --git a/iroh/src/magicsock/metrics.rs b/iroh/src/magicsock/metrics.rs index e6f53d95c7c..1bc9b9eb0b3 100644 --- a/iroh/src/magicsock/metrics.rs +++ b/iroh/src/magicsock/metrics.rs @@ -27,27 +27,15 @@ pub struct Metrics { /// Number of datagrams received using GRO pub recv_gro_datagrams: Counter, - // Disco packets - pub send_disco_udp: Counter, - pub send_disco_relay: Counter, - pub sent_disco_udp: Counter, - pub sent_disco_relay: Counter, - pub sent_disco_ping: Counter, - pub sent_disco_pong: Counter, - pub sent_disco_call_me_maybe: Counter, - pub recv_disco_bad_key: Counter, - pub recv_disco_bad_parse: Counter, - - pub recv_disco_udp: Counter, - pub recv_disco_relay: Counter, - pub recv_disco_ping: Counter, - pub recv_disco_pong: Counter, - pub recv_disco_call_me_maybe: Counter, - pub recv_disco_call_me_maybe_bad_disco: Counter, - // How many times our relay home endpoint DI has changed from non-zero to a different non-zero. pub relay_home_change: Counter, + /* + * Holepunching metrics + */ + /// The number of NAT traversal attempts initiated. + pub nat_traversal: Counter, + /* * Connection Metrics */ diff --git a/iroh/src/magicsock/remote_map.rs b/iroh/src/magicsock/remote_map.rs new file mode 100644 index 00000000000..ec1ef280b27 --- /dev/null +++ b/iroh/src/magicsock/remote_map.rs @@ -0,0 +1,234 @@ +use std::{ + collections::{BTreeSet, hash_map}, + hash::Hash, + net::{IpAddr, SocketAddr}, + sync::{Arc, Mutex}, + time::Duration, +}; + +use iroh_base::{EndpointId, RelayUrl}; +use rustc_hash::FxHashMap; +use serde::{Deserialize, Serialize}; +use tokio::sync::mpsc; + +pub(crate) use self::remote_state::PathsWatcher; +pub(super) use self::remote_state::RemoteStateMessage; +pub use self::remote_state::{PathInfo, PathInfoList}; +use self::remote_state::{RemoteStateActor, RemoteStateHandle}; +use super::{ + DirectAddr, MagicsockMetrics, + mapped_addrs::{AddrMap, EndpointIdMappedAddr, RelayMappedAddr}, +}; +use crate::discovery::ConcurrentDiscovery; + +mod remote_state; + +/// Interval in which handles to closed [`RemoteStateActor`]s should be removed. +pub(super) const REMOTE_MAP_GC_INTERVAL: Duration = Duration::from_secs(60); + +// TODO: use this +// /// Number of endpoints that are inactive for which we keep info about. This limit is enforced +// /// periodically via [`NodeMap::prune_inactive`]. +// const MAX_INACTIVE_NODES: usize = 30; + +/// Map containing all the state for endpoints. +/// +/// - Has actors which each manage all the connection state for a remote endpoint. +/// +/// - Has the mapped addresses we use to refer to non-IP transports destinations into IPv6 +/// addressing space that is used by Quinn. +#[derive(Debug)] +pub(crate) struct RemoteMap { + // + // State we keep about remote endpoints. + // + /// The actors tracking each remote endpoint. + actor_handles: Mutex>, + /// The mapping between [`EndpointId`]s and [`EndpointIdMappedAddr`]s. + pub(super) endpoint_mapped_addrs: AddrMap, + /// The mapping between endpoints via a relay and their [`RelayMappedAddr`]s. + pub(super) relay_mapped_addrs: AddrMap<(RelayUrl, EndpointId), RelayMappedAddr>, + + // + // State needed to start a new RemoteStateHandle. + // + /// The endpoint ID of the local endpoint. + local_endpoint_id: EndpointId, + metrics: Arc, + /// The "direct" addresses known for our local endpoint + local_direct_addrs: n0_watcher::Direct>, + discovery: ConcurrentDiscovery, +} + +impl RemoteMap { + /// Creates a new [`RemoteMap`]. + pub(super) fn new( + local_endpoint_id: EndpointId, + metrics: Arc, + local_direct_addrs: n0_watcher::Direct>, + discovery: ConcurrentDiscovery, + ) -> Self { + Self { + actor_handles: Mutex::new(FxHashMap::default()), + endpoint_mapped_addrs: Default::default(), + relay_mapped_addrs: Default::default(), + local_endpoint_id, + metrics, + local_direct_addrs, + discovery, + } + } + + pub(super) fn endpoint_mapped_addr(&self, eid: EndpointId) -> EndpointIdMappedAddr { + self.endpoint_mapped_addrs.get(&eid) + } + + /// Removes the handles for terminated [`RemoteStateActor`]s from the endpoint map. + /// + /// This should be called periodically to remove handles to endpoint state actors + /// that have shutdown after their idle timeout expired. + pub(super) fn remove_closed_remote_state_actors(&self) { + let mut handles = self.actor_handles.lock().expect("poisoned"); + handles.retain(|_eid, handle| !handle.sender.is_closed()) + } + + /// Returns the sender for the [`RemoteStateActor`]. + /// + /// If needed a new actor is started on demand. + /// + /// [`RemoteStateActor`]: remote_state::RemoteStateActor + pub(super) fn remote_state_actor(&self, eid: EndpointId) -> mpsc::Sender { + let mut handles = self.actor_handles.lock().expect("poisoned"); + match handles.entry(eid) { + hash_map::Entry::Occupied(mut entry) => { + if let Some(sender) = entry.get().sender.get() { + sender + } else { + // The actor is dead: Start a new actor. + let (handle, sender) = self.start_remote_state_actor(eid); + entry.insert(handle); + sender + } + } + hash_map::Entry::Vacant(entry) => { + let (handle, sender) = self.start_remote_state_actor(eid); + entry.insert(handle); + sender + } + } + } + + /// Starts a new remote state actor and returns a handle and a sender. + /// + /// The handle is not inserted into the endpoint map, this must be done by the caller of this function. + fn start_remote_state_actor( + &self, + eid: EndpointId, + ) -> (RemoteStateHandle, mpsc::Sender) { + // Ensure there is a RemoteMappedAddr for this EndpointId. + self.endpoint_mapped_addrs.get(&eid); + let handle = RemoteStateActor::new( + eid, + self.local_endpoint_id, + self.local_direct_addrs.clone(), + self.relay_mapped_addrs.clone(), + self.metrics.clone(), + self.discovery.clone(), + ) + .start(); + let sender = handle.sender.get().expect("just created"); + (handle, sender) + } +} + +/// The origin or *source* through which an address associated with a remote endpoint +/// was discovered. +/// +/// An aggregate of the [`Source`]s of all the addresses of an endpoint describe the +/// [`Source`]s of the endpoint itself. +/// +/// A [`Source`] helps track how and where an address was learned. Multiple +/// sources can be associated with a single address, if we have discovered this +/// address through multiple means. +#[derive(Serialize, Deserialize, strum::Display, Debug, Clone, Eq, PartialEq, Hash)] +#[strum(serialize_all = "kebab-case")] +#[allow(private_interfaces)] +pub enum Source { + /// An endpoint communicated with us first via UDP. + Udp, + /// An endpoint communicated with us first via relay. + Relay, + /// Application layer added the address directly. + App, + /// The address was discovered by a discovery service. + #[strum(serialize = "{name}")] + Discovery { + /// The name of the discovery service that discovered the address. + name: String, + }, + /// Application layer with a specific name added the endpoint directly. + #[strum(serialize = "{name}")] + NamedApp { + /// The name of the application that added the endpoint + name: String, + }, + /// The address was advertised by a call-me-maybe DISCO message. + #[strum(serialize = "CallMeMaybe")] + CallMeMaybe { + /// private marker + _0: Private, + }, + /// We received a ping on the path. + #[strum(serialize = "Ping")] + Ping { + /// private marker + _0: Private, + }, + /// We established a connection on this address. + #[strum(serialize = "Connection")] + Connection { + /// private marker + _0: Private, + }, +} + +/// Helper to ensure certain `Source` variants can not be constructed externally. +#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq, Hash)] +struct Private; + +/// An (Ip, Port) pair. +/// +/// NOTE: storing an [`IpPort`] is safer than storing a [`SocketAddr`] because for IPv6 socket +/// addresses include fields that can't be assumed consistent even within a single connection. +#[derive(Debug, derive_more::Display, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[display("{}", SocketAddr::from(*self))] +pub struct IpPort { + ip: IpAddr, + port: u16, +} + +impl From for IpPort { + fn from(socket_addr: SocketAddr) -> Self { + Self { + ip: socket_addr.ip(), + port: socket_addr.port(), + } + } +} + +impl From for SocketAddr { + fn from(ip_port: IpPort) -> Self { + let IpPort { ip, port } = ip_port; + (ip, port).into() + } +} + +impl IpPort { + pub fn ip(&self) -> &IpAddr { + &self.ip + } + + pub fn port(&self) -> u16 { + self.port + } +} diff --git a/iroh/src/magicsock/remote_map/remote_state.rs b/iroh/src/magicsock/remote_map/remote_state.rs new file mode 100644 index 00000000000..e7878e8d9c5 --- /dev/null +++ b/iroh/src/magicsock/remote_map/remote_state.rs @@ -0,0 +1,1323 @@ +use std::{ + collections::{BTreeSet, VecDeque}, + net::SocketAddr, + pin::Pin, + sync::Arc, + task::Poll, +}; + +use iroh_base::{EndpointId, RelayUrl, TransportAddr}; +use n0_error::StackResultExt; +use n0_future::{ + Either, FuturesUnordered, MergeUnbounded, Stream, StreamExt, + boxed::BoxStream, + task::{self, AbortOnDropHandle}, + time::{self, Duration, Instant}, +}; +use n0_watcher::{Watchable, Watcher}; +use quinn::{PathStats, WeakConnectionHandle}; +use quinn_proto::{PathError, PathEvent, PathId, PathStatus, iroh_hp}; +use rustc_hash::FxHashMap; +use smallvec::SmallVec; +use sync_wrapper::SyncStream; +use tokio::sync::oneshot; +use tokio_stream::wrappers::{BroadcastStream, errors::BroadcastStreamRecvError}; +use tracing::{Instrument, Level, debug, error, event, info_span, instrument, trace, warn}; + +use self::{ + guarded_channel::{GuardedReceiver, GuardedSender, guarded_channel}, + path_state::RemotePathState, +}; +use super::Source; +use crate::{ + discovery::{ConcurrentDiscovery, Discovery, DiscoveryError, DiscoveryItem}, + endpoint::DirectAddr, + magicsock::{ + HEARTBEAT_INTERVAL, MagicsockMetrics, PATH_MAX_IDLE_TIMEOUT, + mapped_addrs::{AddrMap, MappedAddr, RelayMappedAddr}, + remote_map::Private, + transports::{self, OwnedTransmit, TransportsSender}, + }, + util::MaybeFuture, +}; + +/// How often to attempt holepunching. +/// +/// If there have been no changes to the NAT address candidates, holepunching will not be +/// attempted more frequently than at this interval. +const HOLEPUNCH_ATTEMPTS_INTERVAL: Duration = Duration::from_secs(5); + +mod guarded_channel; +mod path_state; + +// TODO: use this +// /// The latency at or under which we don't try to upgrade to a better path. +// const GOOD_ENOUGH_LATENCY: Duration = Duration::from_millis(5); + +// TODO: use this +// /// How long since the last activity we try to keep an established endpoint peering alive. +// /// +// /// It's also the idle time at which we stop doing QAD queries to keep NAT mappings alive. +// pub(super) const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45); + +// TODO: use this +// /// How often we try to upgrade to a better path. +// /// +// /// Even if we have some non-relay route that works. +// const UPGRADE_INTERVAL: Duration = Duration::from_secs(60); + +/// The value which we close paths. +// TODO: Quinn should just do this. Also, I made this value up. +const APPLICATION_ABANDON_PATH: u8 = 30; + +/// The time after which an idle [`RemoteStateActor`] stops. +/// +/// The actor only enters the idle state if no connections are active and no inbox senders exist +/// apart from the one stored in the endpoint map. Stopping and restarting the actor in this state +/// is not an issue; a timeout here serves the purpose of not stopping-and-recreating actors +/// in a high frequency, and to keep data about previous path around for subsequent connections. +const ACTOR_MAX_IDLE_TIMEOUT: Duration = Duration::from_secs(60); + +/// A stream of events from all paths for all connections. +/// +/// The connection is identified using [`ConnId`]. The event `Err` variant happens when the +/// actor has lagged processing the events, which is rather critical for us. +type PathEvents = MergeUnbounded< + Pin< + Box)> + Send + Sync>, + >, +>; + +/// A stream of events of announced NAT traversal candidate addresses for all connections. +/// +/// The connection is identified using [`ConnId`]. +type AddrEvents = MergeUnbounded< + Pin< + Box< + dyn Stream)> + + Send + + Sync, + >, + >, +>; + +/// Either a stream of incoming results from [`ConcurrentDiscovery::resolve`] or infinitely pending. +/// +/// Set to [`Either::Left`] with an always-pending stream while discovery is not running, and to +/// [`Either::Right`] while discovery is running. +/// +/// The stream returned from [`ConcurrentDiscovery::resolve`] is `!Sync`. We use the (safe) [`SyncStream`] +/// wrapper to make it `Sync` so that the [`RemoteStateActor::run`] future stays `Send`. +type DiscoveryStream = Either< + n0_future::stream::Pending>, + SyncStream>>, +>; + +/// List of addrs and path ids for open paths in a connection. +pub(crate) type PathAddrList = SmallVec<[(TransportAddr, PathId); 4]>; + +/// The state we need to know about a single remote endpoint. +/// +/// This actor manages all connections to the remote endpoint. It will trigger holepunching +/// and select the best path etc. +pub(super) struct RemoteStateActor { + /// The endpoint ID of the remote endpoint. + endpoint_id: EndpointId, + /// The endpoint ID of the local endpoint. + local_endpoint_id: EndpointId, + + // Hooks into the rest of the MagicSocket. + // + /// Metrics. + metrics: Arc, + /// Our local addresses. + /// + /// These are our local addresses and any reflexive transport addresses. + local_direct_addrs: n0_watcher::Direct>, + /// The mapping between endpoints via a relay and their [`RelayMappedAddr`]s. + relay_mapped_addrs: AddrMap<(RelayUrl, EndpointId), RelayMappedAddr>, + /// Discovery service, cloned from the magicsock. + discovery: ConcurrentDiscovery, + + // Internal state - Quinn Connections we are managing. + // + /// All connections we have to this remote endpoint. + connections: FxHashMap, + /// Notifications when connections are closed. + connections_close: FuturesUnordered, + /// Events emitted by Quinn about path changes, for all paths, all connections. + path_events: PathEvents, + /// A stream of events of announced NAT traversal candidate addresses for all connections. + addr_events: AddrEvents, + + // Internal state - Holepunching and path state. + // + /// All possible paths we are aware of. + /// + /// These paths might be entirely impossible to use, since they are added by discovery + /// mechanisms. The are only potentially usable. + paths: RemotePathState, + /// Information about the last holepunching attempt. + last_holepunch: Option, + /// The path we currently consider the preferred path to the remote endpoint. + /// + /// **We expect this path to work.** If we become aware this path is broken then it is + /// set back to `None`. Having a selected path does not mean we may not be able to get + /// a better path: e.g. when the selected path is a relay path we still need to trigger + /// holepunching regularly. + /// + /// We only select a path once the path is functional in Quinn. + selected_path: Watchable>, + /// Time at which we should schedule the next holepunch attempt. + scheduled_holepunch: Option, + /// When to next attempt opening paths in [`Self::pending_open_paths`]. + scheduled_open_path: Option, + /// Paths which we still need to open. + /// + /// They failed to open because we did not have enough CIDs issued by the remote. + pending_open_paths: VecDeque, + + // Internal state - Discovery + // + /// Stream of discovery results, or always pending if discovery is not running. + discovery_stream: DiscoveryStream, +} + +impl RemoteStateActor { + #[allow(clippy::too_many_arguments)] + pub(super) fn new( + endpoint_id: EndpointId, + local_endpoint_id: EndpointId, + local_direct_addrs: n0_watcher::Direct>, + relay_mapped_addrs: AddrMap<(RelayUrl, EndpointId), RelayMappedAddr>, + metrics: Arc, + discovery: ConcurrentDiscovery, + ) -> Self { + Self { + endpoint_id, + local_endpoint_id, + metrics, + local_direct_addrs, + relay_mapped_addrs, + discovery, + connections: FxHashMap::default(), + connections_close: Default::default(), + path_events: Default::default(), + addr_events: Default::default(), + paths: Default::default(), + last_holepunch: None, + selected_path: Default::default(), + scheduled_holepunch: None, + scheduled_open_path: None, + pending_open_paths: VecDeque::new(), + discovery_stream: Either::Left(n0_future::stream::pending()), + } + } + + pub(super) fn start(self) -> RemoteStateHandle { + let (tx, rx) = guarded_channel(16); + let me = self.local_endpoint_id; + let endpoint_id = self.endpoint_id; + + // Ideally we'd use the endpoint span as parent. We'd have to plug that span into + // here somehow. Instead we have no parent and explicitly set the me attribute. If + // we don't explicitly set a span we get the spans from whatever call happens to + // first create the actor, which is often very confusing as it then keeps those + // spans for all logging of the actor. + let task = task::spawn(self.run(rx).instrument(info_span!( + parent: None, + "RemoteStateActor", + me = %me.fmt_short(), + remote = %endpoint_id.fmt_short(), + ))); + RemoteStateHandle { + sender: tx, + _task: AbortOnDropHandle::new(task), + } + } + + /// Runs the main loop of the actor. + /// + /// Note that the actor uses async handlers for tasks from the main loop. The actor is + /// not processing items from the inbox while waiting on any async calls. So some + /// discipline is needed to not turn pending for a long time. + async fn run(mut self, mut inbox: GuardedReceiver) { + trace!("actor started"); + let idle_timeout = time::sleep(ACTOR_MAX_IDLE_TIMEOUT); + n0_future::pin!(idle_timeout); + loop { + let scheduled_path_open = match self.scheduled_open_path { + Some(when) => MaybeFuture::Some(time::sleep_until(when)), + None => MaybeFuture::None, + }; + n0_future::pin!(scheduled_path_open); + let scheduled_hp = match self.scheduled_holepunch { + Some(when) => MaybeFuture::Some(time::sleep_until(when)), + None => MaybeFuture::None, + }; + n0_future::pin!(scheduled_hp); + if !inbox.is_idle() || !self.connections.is_empty() { + idle_timeout + .as_mut() + .reset(Instant::now() + ACTOR_MAX_IDLE_TIMEOUT); + } + tokio::select! { + biased; + msg = inbox.recv() => { + match msg { + Some(msg) => self.handle_message(msg).await, + None => break, + } + } + Some((id, evt)) = self.path_events.next() => { + self.handle_path_event(id, evt); + } + Some((id, evt)) = self.addr_events.next() => { + trace!(?id, ?evt, "remote addrs updated, triggering holepunching"); + self.trigger_holepunching().await; + } + Some(conn_id) = self.connections_close.next(), if !self.connections_close.is_empty() => { + self.connections.remove(&conn_id); + if self.connections.is_empty() { + trace!("last connection closed - clearing selected_path"); + self.selected_path.set(None).ok(); + } + } + res = self.local_direct_addrs.updated() => { + if let Err(n0_watcher::Disconnected) = res { + trace!("direct address watcher disconnected, shutting down"); + break; + } + self.local_addrs_updated(); + trace!("local addrs updated, triggering holepunching"); + self.trigger_holepunching().await; + } + _ = &mut scheduled_path_open => { + trace!("triggering scheduled path_open"); + self.scheduled_open_path = None; + let mut addrs = std::mem::take(&mut self.pending_open_paths); + while let Some(addr) = addrs.pop_front() { + self.open_path(&addr); + } + } + _ = &mut scheduled_hp => { + trace!("triggering scheduled holepunching"); + self.scheduled_holepunch = None; + self.trigger_holepunching().await; + } + item = self.discovery_stream.next() => { + self.handle_discovery_item(item); + } + _ = &mut idle_timeout => { + if self.connections.is_empty() && inbox.close_if_idle() { + trace!("idle timeout expired and still idle: terminate actor"); + break; + } else { + // Seems like we weren't really idle, so we reset + idle_timeout.as_mut().reset(Instant::now() + ACTOR_MAX_IDLE_TIMEOUT); + } + } + } + } + trace!("actor terminating"); + } + + /// Handles an actor message. + /// + /// Error returns are fatal and kill the actor. + #[instrument(skip(self))] + async fn handle_message(&mut self, msg: RemoteStateMessage) { + // trace!("handling message"); + match msg { + RemoteStateMessage::SendDatagram(sender, transmit) => { + self.handle_msg_send_datagram(sender, transmit).await; + } + RemoteStateMessage::AddConnection(handle, tx) => { + self.handle_msg_add_connection(handle, tx).await; + } + RemoteStateMessage::ResolveRemote(addrs, tx) => { + self.handle_msg_resolve_remote(addrs, tx); + } + } + } + + /// Handles [`RemoteStateMessage::SendDatagram`]. + async fn handle_msg_send_datagram( + &mut self, + mut sender: TransportsSender, + transmit: OwnedTransmit, + ) { + // Sending datagrams might fail, e.g. because we don't have the right transports set + // up to handle sending this owned transmit to. + // After all, we try every single path that we know (relay URL, IP address), even + // though we might not have a relay transport or ip-capable transport set up. + // So these errors must not be fatal for this actor (or even this operation). + + if let Some(addr) = self.selected_path.get() { + trace!(?addr, "sending datagram to selected path"); + + if let Err(err) = send_datagram(&mut sender, addr.clone(), transmit).await { + debug!(?addr, "failed to send datagram on selected_path: {err:#}"); + } + } else { + trace!( + paths = ?self.paths.addrs().collect::>(), + "sending datagram to all known paths", + ); + if self.paths.is_empty() { + warn!("Cannot send datagrams: No paths to remote endpoint known"); + } + + for addr in self.paths.addrs() { + // We never want to send to our local addresses. + // The local address set is updated in the main loop so we can use `peek` here. + if let transports::Addr::Ip(sockaddr) = addr + && self + .local_direct_addrs + .peek() + .iter() + .any(|a| a.addr == *sockaddr) + { + trace!(%sockaddr, "not sending datagram to our own address"); + } else if let Err(err) = + send_datagram(&mut sender, addr.clone(), transmit.clone()).await + { + debug!(?addr, "failed to send datagram: {err:#}"); + } + } + // This message is received *before* a connection is added. So we do + // not yet have a connection to holepunch. Instead we trigger + // holepunching when AddConnection is received. + } + } + + /// Handles [`RemoteStateMessage::AddConnection`]. + /// + /// Error returns are fatal and kill the actor. + async fn handle_msg_add_connection( + &mut self, + handle: WeakConnectionHandle, + tx: oneshot::Sender, + ) { + let pub_open_paths = Watchable::default(); + if let Some(conn) = handle.upgrade() { + // Remove any conflicting stable_ids from the local state. + let conn_id = ConnId(conn.stable_id()); + self.connections.remove(&conn_id); + + // Hook up paths, NAT addresses and connection closed event streams. + self.path_events.push(Box::pin( + BroadcastStream::new(conn.path_events()).map(move |evt| (conn_id, evt)), + )); + self.addr_events.push(Box::pin( + BroadcastStream::new(conn.nat_traversal_updates()).map(move |evt| (conn_id, evt)), + )); + self.connections_close.push(OnClosed::new(&conn)); + + // Add local addrs to the connection + let local_addrs = self + .local_direct_addrs + .get() + .iter() + .map(|d| d.addr) + .collect::>(); + Self::set_local_addrs(&conn, &local_addrs); + + // Store the connection + let conn_state = self + .connections + .entry(conn_id) + .insert_entry(ConnectionState { + handle: handle.clone(), + pub_open_paths: pub_open_paths.clone(), + paths: Default::default(), + open_paths: Default::default(), + path_ids: Default::default(), + }) + .into_mut(); + + // Store PathId(0), set path_status and select best path, check if holepunching + // is needed. + if let Some(path) = conn.path(PathId::ZERO) + && let Ok(socketaddr) = path.remote_address() + && let Some(path_remote) = self.relay_mapped_addrs.to_transport_addr(socketaddr) + { + trace!(?path_remote, "added new connection"); + let path_remote_is_ip = path_remote.is_ip(); + let status = match path_remote { + transports::Addr::Ip(_) => PathStatus::Available, + transports::Addr::Relay(_, _) => PathStatus::Backup, + }; + path.set_status(status).ok(); + conn_state.add_open_path(path_remote.clone(), PathId::ZERO); + self.paths + .insert_open_path(path_remote.clone(), Source::Connection { _0: Private }); + self.select_path(); + + if path_remote_is_ip { + // We may have raced this with a relay address. Try and add any + // relay addresses we have back. + let relays = self + .paths + .addrs() + .filter(|a| a.is_relay()) + .cloned() + .collect::>(); + for remote in relays { + self.open_path(&remote); + } + } + } + self.trigger_holepunching().await; + } + tx.send(PathsWatcher::new( + pub_open_paths.watch(), + self.selected_path.watch(), + handle, + )) + .ok(); + } + + /// Handles [`RemoteStateMessage::ResolveRemote`]. + fn handle_msg_resolve_remote( + &mut self, + addrs: BTreeSet, + tx: oneshot::Sender>, + ) { + let addrs = to_transports_addr(self.endpoint_id, addrs); + self.paths.insert_multiple(addrs, Source::App); + self.paths.resolve_remote(tx); + // Start discovery if we have no selected path. + self.trigger_discovery(); + } + + fn handle_discovery_item(&mut self, item: Option>) { + match item { + None => { + self.discovery_stream = Either::Left(n0_future::stream::pending()); + self.paths.discovery_finished(Ok(())); + } + Some(Err(err)) => { + warn!("Discovery failed: {err:#}"); + self.discovery_stream = Either::Left(n0_future::stream::pending()); + self.paths.discovery_finished(Err(err)); + } + Some(Ok(item)) => { + if item.endpoint_id() != self.endpoint_id { + warn!(?item, "Discovery emitted item for wrong remote endpoint"); + } else { + let source = Source::Discovery { + name: item.provenance().to_string(), + }; + let addrs = + to_transports_addr(self.endpoint_id, item.into_endpoint_addr().addrs); + self.paths.insert_multiple(addrs, source); + } + } + } + } + + /// Triggers discovery for the remote endpoint, if needed. + /// + /// Does not start discovery if we have a selected path or if discovery is currently running. + fn trigger_discovery(&mut self) { + if self.selected_path.get().is_some() || matches!(self.discovery_stream, Either::Right(_)) { + return; + } + match self.discovery.resolve(self.endpoint_id) { + Some(stream) => self.discovery_stream = Either::Right(SyncStream::new(stream)), + None => self.paths.discovery_finished(Ok(())), + } + } + + /// Sets the current local addresses to QNT's state to all connections + fn local_addrs_updated(&mut self) { + let local_addrs = self + .local_direct_addrs + .get() + .iter() + .map(|d| d.addr) + .collect::>(); + + for conn in self.connections.values().filter_map(|s| s.handle.upgrade()) { + Self::set_local_addrs(&conn, &local_addrs); + } + // todo: trace + } + + /// Sets the current local addresses to QNT's state + fn set_local_addrs(conn: &quinn::Connection, local_addrs: &BTreeSet) { + let quinn_local_addrs = match conn.get_local_nat_traversal_addresses() { + Ok(addrs) => BTreeSet::from_iter(addrs), + Err(err) => { + warn!("failed to get local nat candidates: {err:#}"); + return; + } + }; + for addr in local_addrs.difference(&quinn_local_addrs) { + if let Err(err) = conn.add_nat_traversal_address(*addr) { + warn!("failed adding local addr: {err:#}",); + } + } + for addr in quinn_local_addrs.difference(local_addrs) { + if let Err(err) = conn.remove_nat_traversal_address(*addr) { + warn!("failed removing local addr: {err:#}"); + } + } + trace!(?local_addrs, "updated local QNT addresses"); + } + + /// Triggers holepunching to the remote endpoint. + /// + /// This will manage the entire process of holepunching with the remote endpoint. + /// + /// - Holepunching happens on the Connection with the lowest [`ConnId`] which is a + /// client. + /// - Both endpoints may initiate holepunching if both have a client connection. + /// - Any opened paths are opened on all other connections without holepunching. + /// - If there are no changes in local or remote candidate addresses since the + /// last attempt **and** there was a recent attempt, a trigger_holepunching call + /// will be scheduled instead. + async fn trigger_holepunching(&mut self) { + if self.connections.is_empty() { + trace!("not holepunching: no connections"); + return; + } + + let Some(conn) = self + .connections + .iter() + .filter_map(|(id, state)| state.handle.upgrade().map(|conn| (*id, conn))) + .filter(|(_, conn)| conn.side().is_client()) + .min_by_key(|(id, _)| *id) + .map(|(_, conn)| conn) + else { + trace!("not holepunching: no client connection"); + return; + }; + let remote_candidates = match conn.get_remote_nat_traversal_addresses() { + Ok(addrs) => BTreeSet::from_iter(addrs), + Err(err) => { + warn!("failed to get nat candidate addresses: {err:#}"); + return; + } + }; + let local_candidates: BTreeSet = self + .local_direct_addrs + .get() + .iter() + .map(|daddr| daddr.addr) + .collect(); + let new_candidates = self + .last_holepunch + .as_ref() + .map(|last_hp| { + // Addrs are allowed to disappear, but if there are new ones we need to + // holepunch again. + trace!( + ?last_hp, + ?local_candidates, + ?remote_candidates, + "candidates to holepunch?" + ); + !remote_candidates.is_subset(&last_hp.remote_candidates) + || !local_candidates.is_subset(&last_hp.local_candidates) + }) + .unwrap_or(true); + if !new_candidates { + if let Some(ref last_hp) = self.last_holepunch { + let next_hp = last_hp.when + HOLEPUNCH_ATTEMPTS_INTERVAL; + let now = Instant::now(); + if next_hp > now { + trace!(scheduled_in = ?(next_hp - now), "not holepunching: no new addresses"); + self.scheduled_holepunch = Some(next_hp); + return; + } + } + } + + self.do_holepunching(conn).await; + } + + /// Unconditionally perform holepunching. + #[instrument(skip_all)] + async fn do_holepunching(&mut self, conn: quinn::Connection) { + self.metrics.nat_traversal.inc(); + let local_candidates = self + .local_direct_addrs + .get() + .iter() + .map(|daddr| daddr.addr) + .collect::>(); + match conn.initiate_nat_traversal_round() { + Ok(remote_candidates) => { + let remote_candidates = remote_candidates + .iter() + .map(|addr| SocketAddr::new(addr.ip().to_canonical(), addr.port())) + .collect(); + event!( + target: "iroh::_events::qnt::init", + Level::DEBUG, + remote = %self.endpoint_id.fmt_short(), + ?local_candidates, + ?remote_candidates, + ); + self.last_holepunch = Some(HolepunchAttempt { + when: Instant::now(), + local_candidates, + remote_candidates, + }); + } + Err(err) => { + warn!("failed to initiate NAT traversal: {err:#}"); + } + } + } + + /// Open the path on all connections. + /// + /// This goes through all the connections for which we are the client, and makes sure + /// the path exists, or opens it. + #[instrument(level = "warn", skip(self))] + fn open_path(&mut self, open_addr: &transports::Addr) { + let path_status = match open_addr { + transports::Addr::Ip(_) => PathStatus::Available, + transports::Addr::Relay(_, _) => PathStatus::Backup, + }; + let quic_addr = match &open_addr { + transports::Addr::Ip(socket_addr) => *socket_addr, + transports::Addr::Relay(relay_url, eid) => self + .relay_mapped_addrs + .get(&(relay_url.clone(), *eid)) + .private_socket_addr(), + }; + + for (conn_id, conn_state) in self.connections.iter_mut() { + if conn_state.path_ids.contains_key(open_addr) { + continue; + } + let Some(conn) = conn_state.handle.upgrade() else { + continue; + }; + if conn.side().is_server() { + continue; + } + let fut = conn.open_path_ensure(quic_addr, path_status); + match fut.path_id() { + Some(path_id) => { + trace!(?conn_id, ?path_id, "opening new path"); + conn_state.add_path(open_addr.clone(), path_id); + } + None => { + let ret = now_or_never(fut); + match ret { + Some(Err(PathError::RemoteCidsExhausted)) => { + self.scheduled_open_path = + Some(Instant::now() + Duration::from_millis(333)); + self.pending_open_paths.push_back(open_addr.clone()); + trace!(?open_addr, "scheduling open_path"); + } + _ => warn!(?ret, "Opening path failed"), + } + } + } + } + } + + #[instrument(skip(self))] + fn handle_path_event( + &mut self, + conn_id: ConnId, + event: Result, + ) { + let Ok(event) = event else { + warn!("missed a PathEvent, RemoteStateActor lagging"); + // TODO: Is it possible to recover using the sync APIs to figure out what the + // state of the connection and it's paths are? + return; + }; + let Some(conn_state) = self.connections.get_mut(&conn_id) else { + trace!("event for removed connection"); + return; + }; + let Some(conn) = conn_state.handle.upgrade() else { + trace!("event for closed connection"); + return; + }; + trace!("path event"); + match event { + PathEvent::Opened { id: path_id } => { + let Some(path) = conn.path(path_id) else { + trace!("path open event for unknown path"); + return; + }; + // TODO: We configure this as defaults when we setup the endpoint, do we + // really need to duplicate this? + path.set_keep_alive_interval(Some(HEARTBEAT_INTERVAL)).ok(); + path.set_max_idle_timeout(Some(PATH_MAX_IDLE_TIMEOUT)).ok(); + + if let Ok(socketaddr) = path.remote_address() + && let Some(path_remote) = self.relay_mapped_addrs.to_transport_addr(socketaddr) + { + event!( + target: "iroh::_events::path::open", + Level::DEBUG, + remote = %self.endpoint_id.fmt_short(), + ?path_remote, + ?conn_id, + ?path_id, + ); + conn_state.add_open_path(path_remote.clone(), path_id); + self.paths + .insert_open_path(path_remote.clone(), Source::Connection { _0: Private }); + } + + self.select_path(); + } + PathEvent::Abandoned { id, path_stats } => { + trace!(?path_stats, "path abandoned"); + // This is the last event for this path. + if let Some(addr) = conn_state.remove_path(&id) { + self.paths.abandoned_path(&addr); + } + } + PathEvent::Closed { id, .. } | PathEvent::LocallyClosed { id, .. } => { + let Some(path_remote) = conn_state.paths.get(&id).cloned() else { + debug!("path not in path_id_map"); + return; + }; + event!( + target: "iroh::_events::path::closed", + Level::DEBUG, + remote = %self.endpoint_id.fmt_short(), + ?path_remote, + ?conn_id, + path_id = ?id, + ); + conn_state.remove_open_path(&id); + + // If one connection closes this path, close it on all connections. + for (conn_id, conn_state) in self.connections.iter_mut() { + let Some(path_id) = conn_state.path_ids.get(&path_remote) else { + continue; + }; + let Some(conn) = conn_state.handle.upgrade() else { + continue; + }; + if let Some(path) = conn.path(*path_id) { + trace!(?path_remote, ?conn_id, ?path_id, "closing path"); + if let Err(err) = path.close(APPLICATION_ABANDON_PATH.into()) { + trace!( + ?path_remote, + ?conn_id, + ?path_id, + "path close failed: {err:#}" + ); + } + } + } + + // If the remote closed our selected path, select a new one. + self.select_path(); + } + PathEvent::RemoteStatus { .. } | PathEvent::ObservedAddr { .. } => { + // Nothing to do for these events. + } + } + } + + /// Selects the path with the lowest RTT, prefers direct paths. + /// + /// If there are direct paths, this selects the direct path with the lowest RTT. If + /// there are only relay paths, the relay path with the lowest RTT is chosen. + /// + /// The selected path is added to any connections which do not yet have it. Any unused + /// direct paths are closed for all connections. + #[instrument(skip_all)] + fn select_path(&mut self) { + // Find the lowest RTT across all connections for each open path. The long way, so + // we get to log *all* RTTs. + let mut all_path_rtts: FxHashMap> = FxHashMap::default(); + for conn_state in self.connections.values() { + let Some(conn) = conn_state.handle.upgrade() else { + continue; + }; + for (path_id, addr) in conn_state.open_paths.iter() { + if let Some(stats) = conn.path_stats(*path_id) { + all_path_rtts + .entry(addr.clone()) + .or_default() + .push(stats.rtt); + } + } + } + trace!(?all_path_rtts, "dumping all path RTTs"); + let path_rtts: FxHashMap = all_path_rtts + .into_iter() + .filter_map(|(addr, rtts)| rtts.into_iter().min().map(|rtt| (addr, rtt))) + .collect(); + + // Find the fastest direct or relay path. + const IPV6_RTT_ADVANTAGE: Duration = Duration::from_millis(3); + let direct_path = path_rtts + .iter() + .filter(|(addr, _rtt)| addr.is_ip()) + .map(|(addr, rtt)| { + if addr.is_ipv4() { + (*rtt + IPV6_RTT_ADVANTAGE, addr) + } else { + (*rtt, addr) + } + }) + .min(); + let selected_path = direct_path.or_else(|| { + // Find the fasted relay path. + path_rtts + .iter() + .filter(|(addr, _rtt)| addr.is_relay()) + .map(|(addr, rtt)| (*rtt, addr)) + .min() + }); + if let Some((rtt, addr)) = selected_path { + let prev = self.selected_path.set(Some(addr.clone())); + if prev.is_ok() { + debug!(?addr, ?rtt, ?prev, "selected new path"); + } + self.open_path(addr); + self.close_redundant_paths(addr); + } + } + + /// Closes any direct paths not selected if we are the client. + /// + /// Makes sure not to close the last direct path. Relay paths are never closed + /// currently, because we only have one relay path at this time. + /// + /// Only the client closes paths, just like only the client opens paths. This is to + /// avoid the client and server selecting different paths and accidentally closing all + /// paths. + fn close_redundant_paths(&mut self, selected_path: &transports::Addr) { + debug_assert_eq!(self.selected_path.get().as_ref(), Some(selected_path),); + + for (conn_id, conn_state) in self.connections.iter() { + for (path_id, path_remote) in conn_state + .open_paths + .iter() + .filter(|(_, addr)| addr.is_ip()) + .filter(|(_, addr)| *addr != selected_path) + { + if conn_state.open_paths.values().filter(|a| a.is_ip()).count() <= 1 { + continue; // Do not close the last direct path. + } + if let Some(path) = conn_state + .handle + .upgrade() + .filter(|conn| conn.side().is_client()) + .and_then(|conn| conn.path(*path_id)) + { + trace!(?path_remote, ?conn_id, ?path_id, "closing direct path"); + match path.close(APPLICATION_ABANDON_PATH.into()) { + Err(quinn_proto::ClosePathError::LastOpenPath) => { + error!("could not close last open path"); + } + Err(quinn_proto::ClosePathError::ClosedPath) => { + // We already closed this. + } + Ok(_fut) => { + // We will handle the event in Self::handle_path_events. + } + } + } + } + } + } +} + +fn send_datagram<'a>( + sender: &'a mut TransportsSender, + dst: transports::Addr, + owned_transmit: OwnedTransmit, +) -> impl Future> + 'a { + std::future::poll_fn(move |cx| { + let transmit = transports::Transmit { + ecn: owned_transmit.ecn, + contents: owned_transmit.contents.as_ref(), + segment_size: owned_transmit.segment_size, + }; + + Pin::new(&mut *sender) + .poll_send(cx, &dst, None, &transmit) + .map(|res| res.with_context(|_| format!("failed to send datagram to {dst:?}"))) + }) +} + +/// Messages to send to the [`RemoteStateActor`]. +#[derive(derive_more::Debug)] +pub(crate) enum RemoteStateMessage { + /// Sends a datagram to all known paths. + /// + /// Used to send QUIC Initial packets. If there is no working direct path this will + /// trigger holepunching. + /// + /// This is not acceptable to use on the normal send path, as it is an async send + /// operation with a bunch more copying. So it should only be used for sending QUIC + /// Initial packets. + #[debug("SendDatagram(..)")] + SendDatagram(TransportsSender, OwnedTransmit), + /// Adds an active connection to this remote endpoint. + /// + /// The connection will now be managed by this actor. Holepunching will happen when + /// needed, any new paths discovered via holepunching will be added. And closed paths + /// will be removed etc. + #[debug("AddConnection(..)")] + AddConnection(WeakConnectionHandle, oneshot::Sender), + /// Asks if there is any possible path that could be used. + /// + /// This adds the provided transport addresses to the list of potential paths for this remote + /// and starts discovery if needed. + /// + /// Returns `Ok` immediately if the provided address list is non-empy or we have are other known paths. + /// Otherwise returns `Ok` once discovery produces a result, or the discovery error if discovery fails + /// or produces no results, + #[debug("ResolveRemote(..)")] + ResolveRemote( + BTreeSet, + oneshot::Sender>, + ), +} + +/// A handle to a [`RemoteStateActor`]. +/// +/// Dropping this will stop the actor. The actor will also stop after an idle timeout +/// if it has no connections, an empty inbox, and no other senders than the one stored +/// in the endpoint map exist. +#[derive(Debug)] +pub(super) struct RemoteStateHandle { + /// Sender for the channel into the [`RemoteStateActor`]. + /// + /// This is a [`GuardedSender`], from which we can get a sender but only if the receiver + /// hasn't been closed. + pub(super) sender: GuardedSender, + _task: AbortOnDropHandle<()>, +} + +/// Information about a holepunch attempt. +/// +/// Addresses are always stored in canonical form. +#[derive(Debug)] +struct HolepunchAttempt { + when: Instant, + /// The set of local addresses which could take part in holepunching. + /// + /// This does not mean every address here participated in the holepunching. E.g. we + /// could have tried only a sub-set of the addresses because a previous attempt already + /// covered part of the range. + /// + /// We do not store this as a [`DirectAddr`] because this is checked for equality and we + /// do not want to compare the sources of these addresses. + local_candidates: BTreeSet, + /// The set of remote addresses which could take part in holepunching. + /// + /// Like [`Self::local_candidates`] we may not have used them. + remote_candidates: BTreeSet, +} + +/// Newtype to track Connections. +/// +/// The wrapped value is the [`quinn::Connection::stable_id`] value, and is thus only valid +/// for active connections. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct ConnId(usize); + +/// State about one connection. +#[derive(Debug)] +struct ConnectionState { + /// Weak handle to the connection. + handle: WeakConnectionHandle, + /// The information we publish to users about the paths used in this connection. + pub_open_paths: Watchable, + /// The paths that exist on this connection. + /// + /// This could be in any state, e.g. while still validating the path or already closed + /// but not yet fully removed from the connection. This exists as long as Quinn knows + /// about the [`PathId`]. + paths: FxHashMap, + /// The open paths on this connection, a subset of [`Self::paths`]. + open_paths: FxHashMap, + /// Reverse map of [`Self::paths]. + path_ids: FxHashMap, +} + +impl ConnectionState { + /// Tracks a path for the connection. + fn add_path(&mut self, remote: transports::Addr, path_id: PathId) { + self.paths.insert(path_id, remote.clone()); + self.path_ids.insert(remote, path_id); + } + + /// Tracks an open path for the connection. + fn add_open_path(&mut self, remote: transports::Addr, path_id: PathId) { + self.paths.insert(path_id, remote.clone()); + self.open_paths.insert(path_id, remote.clone()); + self.path_ids.insert(remote, path_id); + + self.update_pub_path_info(); + } + + /// Completely removes a path from this connection. + fn remove_path(&mut self, path_id: &PathId) -> Option { + let addr = self.paths.remove(path_id); + if let Some(ref addr) = addr { + self.path_ids.remove(addr); + } + self.open_paths.remove(path_id); + addr + } + + /// Removes the path from the open paths. + fn remove_open_path(&mut self, path_id: &PathId) { + self.open_paths.remove(path_id); + + self.update_pub_path_info(); + } + + /// Sets the new [`PathInfo`] structs for the public [`Connection`]. + /// + /// [`Connection`]: crate::endpoint::Connection + fn update_pub_path_info(&self) { + let new = self + .open_paths + .iter() + .map(|(path_id, remote)| { + let remote = TransportAddr::from(remote.clone()); + (remote, *path_id) + }) + .collect::(); + + self.pub_open_paths.set(new).ok(); + } +} + +/// Watcher for the open paths and selected transmission path in a connection. +/// +/// This is stored in the [`Connection`], and the watchables are set from within the endpoint state actor. +/// +/// Internally, this contains a boxed-mapped-joined watcher over the open paths in the connection and the +/// selected path to the remote endpoint. The watcher is boxed because the mapped-joined watcher with +/// `SmallVec` has a size of over 800 bytes, which we don't want to put upon the [`Connection`]. +/// +/// [`Connection`]: crate::endpoint::Connection +#[derive(Clone, derive_more::Debug)] +#[debug("PathsWatcher")] +#[allow(clippy::type_complexity)] +pub(crate) struct PathsWatcher( + Box< + n0_watcher::Map< + n0_watcher::Tuple< + n0_watcher::Direct, + n0_watcher::Direct>, + >, + PathInfoList, + >, + >, +); + +impl n0_watcher::Watcher for PathsWatcher { + type Value = PathInfoList; + + fn update(&mut self) -> bool { + self.0.update() + } + + fn peek(&self) -> &Self::Value { + self.0.peek() + } + + fn is_connected(&self) -> bool { + self.0.is_connected() + } + + fn poll_updated( + &mut self, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + self.0.poll_updated(cx) + } +} + +impl PathsWatcher { + fn new( + open_paths: n0_watcher::Direct, + selected_path: n0_watcher::Direct>, + conn_handle: WeakConnectionHandle, + ) -> Self { + Self(Box::new(open_paths.or(selected_path).map( + move |(open_paths, selected_path)| { + let selected_path: Option = selected_path.map(Into::into); + let Some(conn) = conn_handle.upgrade() else { + return PathInfoList(Default::default()); + }; + let list = open_paths + .into_iter() + .flat_map(move |(remote, path_id)| { + PathInfo::new(path_id, &conn, remote, selected_path.as_ref()) + }) + .collect(); + PathInfoList(list) + }, + ))) + } +} + +/// List of [`PathInfo`] for the network paths of a [`Connection`]. +/// +/// This struct implements [`IntoIterator`]. +/// +/// [`Connection`]: crate::endpoint::Connection +#[derive(derive_more::Debug, derive_more::IntoIterator, Eq, PartialEq, Clone)] +#[debug("{_0:?}")] +pub struct PathInfoList(SmallVec<[PathInfo; 4]>); + +impl PathInfoList { + /// Returns an iterator over the path infos. + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } + + /// Returns `true` if the list is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the number of paths. + pub fn len(&self) -> usize { + self.0.len() + } +} + +/// Information about a network path used by a [`Connection`]. +/// +/// [`Connection`]: crate::endpoint::Connection +#[derive(derive_more::Debug, Clone)] +pub struct PathInfo { + path_id: PathId, + #[debug(skip)] + handle: WeakConnectionHandle, + stats: PathStats, + remote: TransportAddr, + is_selected: bool, +} + +impl PartialEq for PathInfo { + fn eq(&self, other: &Self) -> bool { + self.path_id == other.path_id + && self.remote == other.remote + && self.is_selected == other.is_selected + } +} + +impl Eq for PathInfo {} + +impl PathInfo { + fn new( + path_id: PathId, + conn: &quinn::Connection, + remote: TransportAddr, + selected_path: Option<&TransportAddr>, + ) -> Option { + let stats = conn.path_stats(path_id)?; + Some(Self { + path_id, + handle: conn.weak_handle(), + is_selected: Some(&remote) == selected_path, + remote, + stats, + }) + } + + /// The remote transport address used by this network path. + pub fn remote_addr(&self) -> &TransportAddr { + &self.remote + } + + /// Returns `true` if this path is currently the main transmission path for this [`Connection`]. + /// + /// [`Connection`]: crate::endpoint::Connection + pub fn is_selected(&self) -> bool { + self.is_selected + } + + /// Whether this is an IP transport address. + pub fn is_ip(&self) -> bool { + self.remote.is_ip() + } + + /// Whether this is a transport address via a relay server. + pub fn is_relay(&self) -> bool { + self.remote.is_relay() + } + + /// Returns stats for this transmission path. + pub fn stats(&self) -> PathStats { + self.handle + .upgrade() + .and_then(|conn| conn.path_stats(self.path_id)) + .unwrap_or(self.stats) + } + + /// Current best estimate of this paths's latency (round-trip-time) + pub fn rtt(&self) -> Duration { + self.stats().rtt + } +} + +/// Poll a future once, like n0_future::future::poll_once but sync. +fn now_or_never>(fut: F) -> Option { + let fut = std::pin::pin!(fut); + match fut.poll(&mut std::task::Context::from_waker(std::task::Waker::noop())) { + Poll::Ready(res) => Some(res), + Poll::Pending => None, + } +} + +/// Future that resolves to the `conn_id` once a connection is closed. +/// +/// This uses [`quinn::Connection::on_closed`], which does not keep the connection alive +/// while awaiting the future. +struct OnClosed { + conn_id: ConnId, + inner: quinn::OnClosed, +} + +impl OnClosed { + fn new(conn: &quinn::Connection) -> Self { + Self { + conn_id: ConnId(conn.stable_id()), + inner: conn.on_closed(), + } + } +} + +impl Future for OnClosed { + type Output = ConnId; + + fn poll(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { + let (_close_reason, _stats) = std::task::ready!(Pin::new(&mut self.inner).poll(cx)); + Poll::Ready(self.conn_id) + } +} + +/// Converts an iterator of [`TransportAddr'] into an iterator of [`transports::Addr`]. +fn to_transports_addr( + endpoint_id: EndpointId, + addrs: impl IntoIterator, +) -> impl Iterator { + addrs.into_iter().filter_map(move |addr| match addr { + TransportAddr::Relay(relay_url) => Some(transports::Addr::from((relay_url, endpoint_id))), + TransportAddr::Ip(sockaddr) => Some(transports::Addr::from(sockaddr)), + _ => { + warn!(?addr, "Unsupported TransportAddr"); + None + } + }) +} diff --git a/iroh/src/magicsock/remote_map/remote_state/guarded_channel.rs b/iroh/src/magicsock/remote_map/remote_state/guarded_channel.rs new file mode 100644 index 00000000000..2b3b3b76441 --- /dev/null +++ b/iroh/src/magicsock/remote_map/remote_state/guarded_channel.rs @@ -0,0 +1,78 @@ +use std::sync::{Arc, Mutex}; + +use tokio::sync::mpsc; + +/// Creates a new [`mpsc`] channel where the receiver can only close if there are no active senders. +pub(super) fn guarded_channel(cap: usize) -> (GuardedSender, GuardedReceiver) { + let (tx, rx) = mpsc::channel(cap); + let tx = Arc::new(Mutex::new(Some(tx))); + (GuardedSender { tx: tx.clone() }, GuardedReceiver { tx, rx }) +} + +#[derive(Debug)] +pub(crate) struct GuardedSender { + tx: Arc>>>, +} + +impl GuardedSender { + /// Returns a sender to the channel. + /// + /// Returns a new sender if the channel is not closed. It is guaranteed that + /// [`GuardedReceiver::close_if_idle`] will not return `true` until the sender is dropped. + /// Returns `None` if the channel has been closed. + pub(crate) fn get(&self) -> Option> { + self.tx.lock().expect("poisoned").clone() + } + + /// Returns `true` if the channel has been closed. + pub(crate) fn is_closed(&self) -> bool { + self.tx.lock().expect("poisoned").is_none() + } +} + +#[derive(Debug)] +pub(super) struct GuardedReceiver { + rx: mpsc::Receiver, + tx: Arc>>>, +} + +impl GuardedReceiver { + /// Receives the next value for this receiver. + /// + /// See [`mpsc::Receiver::recv`]. + pub(super) async fn recv(&mut self) -> Option { + self.rx.recv().await + } + + /// Returns `true` if the inbox is empty and no senders to the inbox exist. + pub(super) fn is_idle(&self) -> bool { + self.rx.is_empty() && self.rx.sender_strong_count() <= 1 + } + + /// Closes the channel if the channel is idle. + /// + /// Returns `true` if the channel is idle and has now been closed, and `false` if the channel + /// is not idle and therefore has not been not closed. + /// + /// Uses a lock internally to make sure that there cannot be a race condition between + /// calling this and a new sender being created. + pub(super) fn close_if_idle(&mut self) -> bool { + let mut guard = self.tx.lock().expect("poisoned"); + if self.is_idle() { + *guard = None; + self.rx.close(); + true + } else { + false + } + } +} + +impl Drop for GuardedReceiver { + fn drop(&mut self) { + let mut guard = self.tx.lock().expect("poisoned"); + *guard = None; + self.rx.close(); + drop(guard) + } +} diff --git a/iroh/src/magicsock/remote_map/remote_state/path_state.rs b/iroh/src/magicsock/remote_map/remote_state/path_state.rs new file mode 100644 index 00000000000..a9b07729e89 --- /dev/null +++ b/iroh/src/magicsock/remote_map/remote_state/path_state.rs @@ -0,0 +1,553 @@ +//! The state kept for each network path to a remote endpoint. + +use std::collections::{HashMap, HashSet, VecDeque}; + +use n0_error::e; +use n0_future::time::Instant; +use rustc_hash::FxHashMap; +use tokio::sync::oneshot; +use tracing::trace; + +use super::Source; +use crate::{discovery::DiscoveryError, magicsock::transports}; + +/// Maximum number of IP paths we keep around per endpoint. +pub(super) const MAX_IP_PATHS: usize = 30; + +/// Maximum number of inactive IP paths we keep around per endpoint. +/// +/// These are paths that at one point been opened and are now closed. +pub(super) const MAX_INACTIVE_IP_PATHS: usize = 10; + +/// Map of all paths that we are aware of for a remote endpoint. +/// +/// Also stores a list of resolve requests which are triggered once at least one path is known, +/// or once this struct is notified of a failed discovery run. +#[derive(Debug, Default)] +pub(super) struct RemotePathState { + /// All possible paths we are aware of. + /// + /// These paths might be entirely impossible to use, since they are added by discovery + /// mechanisms. The are only potentially usable. + paths: FxHashMap, + /// Pending resolve requests from [`Self::resolve_remote`]. + pending_resolve_requests: VecDeque>>, +} + +/// Describes the usability of this path, i.e. whether it has ever been opened, +/// when it was closed, or if it has never been usable. +#[derive(Debug, Default)] +pub(super) enum PathStatus { + /// This path is open and active. + Open, + /// This path was once opened, but was abandoned at the given [`Instant`]. + Inactive(Instant), + /// This path was never usable (we attempted holepunching and it didn't work). + Unusable, + /// We have not yet attempted holepunching, or holepunching is currently in + /// progress, so we do not know the usability of this path. + #[default] + Unknown, +} + +impl RemotePathState { + /// Insert a new address of an open path into our list of paths. + /// + /// This will emit pending resolve requests and trigger pruning paths. + pub(super) fn insert_open_path(&mut self, addr: transports::Addr, source: Source) { + let state = self.paths.entry(addr).or_default(); + state.status = PathStatus::Open; + state.sources.insert(source.clone(), Instant::now()); + self.emit_pending_resolve_requests(None); + self.prune_paths(); + } + + /// Mark a path as abandoned. + /// + /// If this path does not exist, it does nothing to the + /// `RemotePathState` + pub(super) fn abandoned_path(&mut self, addr: &transports::Addr) { + if let Some(state) = self.paths.get_mut(addr) { + match state.status { + PathStatus::Open | PathStatus::Inactive(_) => { + state.status = PathStatus::Inactive(Instant::now()); + } + PathStatus::Unusable | PathStatus::Unknown => { + state.status = PathStatus::Unusable; + } + } + } + } + + /// Inserts multiple addresses of unknown status into our list of potential paths. + /// + /// This will emit pending resolve requests and trigger pruning paths. + pub(super) fn insert_multiple( + &mut self, + addrs: impl Iterator, + source: Source, + ) { + let now = Instant::now(); + for addr in addrs { + self.paths + .entry(addr) + .or_default() + .sources + .insert(source.clone(), now); + } + trace!("added addressing information"); + self.emit_pending_resolve_requests(None); + self.prune_paths(); + } + + /// Triggers `tx` immediately if there are any known paths, or store in the list of pending requests. + /// + /// The pending requests will be resolved once a path becomes known, or once discovery + /// concludes without results, whichever comes first. + /// + /// Sends `Ok(())` over `tx` if there are any known paths, and a [`DiscoveryError`] if there are + /// no known paths by the time a discovery run finished with an error or without results. + pub(super) fn resolve_remote(&mut self, tx: oneshot::Sender>) { + if !self.paths.is_empty() { + tx.send(Ok(())).ok(); + } else { + self.pending_resolve_requests.push_back(tx); + } + } + + /// Notifies that a discovery run has finished. + /// + /// This will emit pending resolve requests. + pub(super) fn discovery_finished(&mut self, result: Result<(), DiscoveryError>) { + self.emit_pending_resolve_requests(result.err()); + } + + /// Returns an iterator over the addresses of all paths. + pub(super) fn addrs(&self) -> impl Iterator { + self.paths.keys() + } + + /// Returns whether this stores any addresses. + pub(super) fn is_empty(&self) -> bool { + self.paths.is_empty() + } + + /// Replies to all pending resolve requests. + /// + /// This is a no-op if no requests are queued. Replies `Ok` if we have any known paths, + /// otherwise with the provided `discovery_error` or with [`DiscoveryError::NoResults`]. + fn emit_pending_resolve_requests(&mut self, discovery_error: Option) { + if self.pending_resolve_requests.is_empty() { + return; + } + let result = match (self.paths.is_empty(), discovery_error) { + (false, _) => Ok(()), + (true, Some(err)) => Err(err), + (true, None) => Err(e!(DiscoveryError::NoResults)), + }; + for tx in self.pending_resolve_requests.drain(..) { + tx.send(result.clone()).ok(); + } + } + + /// Prune paths. + /// + /// Should be invoked any time we insert a new path. + /// + /// We currently only prune IP paths. For more information on the criteria + /// for when and which paths we prune, look at the [`prune_ip_paths`] function. + pub(super) fn prune_paths(&mut self) { + // right now we only prune IP paths + prune_ip_paths(&mut self.paths); + } +} + +/// The state of a single path to the remote endpoint. +/// +/// Each path is identified by the destination [`transports::Addr`] and they are stored in +/// the [`RemotePathState`] map at [`RemoteStateActor::paths`]. +/// +/// [`RemoteStateActor::paths`]: super::RemoteStateActor::paths +#[derive(Debug, Default)] +pub(super) struct PathState { + /// How we learned about this path, and when. + /// + /// We keep track of only the latest [`Instant`] for each [`Source`], keeping the size + /// of the map of sources down to one entry per type of source. + pub(super) sources: HashMap, + /// The usability status of this path. + pub(super) status: PathStatus, +} + +/// Prunes the IP paths in the paths HashMap. +/// +/// Only prunes if the number of IP paths is above [`MAX_IP_PATHS`]. +/// +/// Keeps paths that are open or of unknown status. +/// +/// Always prunes paths that have unsuccessfully holepunched. +/// +/// Keeps [`MAX_INACTIVE_IP_PATHS`] of the most recently closed paths +/// that are not currently being used but have successfully been +/// holepunched previously. +/// +/// This all ensures that: +/// +/// - We do not have unbounded growth of paths. +/// - If we have many paths for this remote, we prune the paths that cannot hole punch. +/// - We do not prune holepunched paths that are currently not in use too quickly. For example, if a large number of untested paths are added at once, we will not immediately prune all of the unused, but valid, paths at once. +fn prune_ip_paths(paths: &mut FxHashMap) { + // if the total number of paths is less than the max, bail early + if paths.len() < MAX_IP_PATHS { + return; + } + + let ip_paths: Vec<_> = paths.iter().filter(|(addr, _)| addr.is_ip()).collect(); + + // if the total number of ip paths is less than the max, bail early + if ip_paths.len() < MAX_IP_PATHS { + return; + } + + // paths that were opened at one point but have previously been closed + let mut inactive = Vec::with_capacity(ip_paths.len()); + // paths where we attempted hole punching but it not successful + let mut failed = Vec::with_capacity(ip_paths.len()); + + for (addr, state) in ip_paths { + match state.status { + PathStatus::Inactive(t) => { + // paths where holepunching succeeded at one point, but the path was closed. + inactive.push((addr.clone(), t)); + } + PathStatus::Unusable => { + // paths where holepunching has been attempted and failed. + failed.push(addr.clone()); + } + _ => { + // ignore paths that are open or the status is unknown + } + } + } + + // All paths are bad, don't prune all of them. + // + // This implies that `inactive` is empty. + if failed.len() == paths.len() { + // leave the max number of IP paths + failed.truncate(paths.len().saturating_sub(MAX_IP_PATHS)); + } + + // sort the potentially prunable from most recently closed to least recently closed + inactive.sort_by(|a, b| b.1.cmp(&a.1)); + + // Prune the "oldest" closed paths. + let old_inactive = inactive.split_off(inactive.len().saturating_sub(MAX_INACTIVE_IP_PATHS)); + + // collect all the paths that should be pruned + let must_prune: HashSet<_> = failed + .into_iter() + .chain(old_inactive.into_iter().map(|(addr, _)| addr)) + .collect(); + + paths.retain(|addr, _| !must_prune.contains(addr)); +} + +#[cfg(test)] +mod tests { + use std::{ + net::{Ipv4Addr, SocketAddrV4}, + time::Duration, + }; + + use iroh_base::{RelayUrl, SecretKey}; + use rand::SeedableRng; + + use super::*; + + fn ip_addr(port: u16) -> transports::Addr { + transports::Addr::Ip(SocketAddrV4::new(Ipv4Addr::LOCALHOST, port).into()) + } + + fn path_state_inactive(closed: Instant) -> PathState { + PathState { + sources: HashMap::new(), + status: PathStatus::Inactive(closed), + } + } + + fn path_state_unusable() -> PathState { + PathState { + sources: HashMap::new(), + status: PathStatus::Unusable, + } + } + + #[test] + fn test_prune_under_max_paths() { + let mut paths = FxHashMap::default(); + for i in 0..20 { + paths.insert(ip_addr(i), PathState::default()); + } + + prune_ip_paths(&mut paths); + assert_eq!(20, paths.len(), "should not prune when under MAX_IP_PATHS"); + } + + #[test] + fn test_prune_at_max_paths_no_prunable() { + let mut paths = FxHashMap::default(); + // All paths are active (never abandoned), so none should be pruned + for i in 0..MAX_IP_PATHS { + paths.insert(ip_addr(i as u16), PathState::default()); + } + + prune_ip_paths(&mut paths); + assert_eq!(MAX_IP_PATHS, paths.len(), "should not prune active paths"); + } + + #[test] + fn test_prune_failed_holepunch() { + let mut paths = FxHashMap::default(); + + // Add 20 active paths + for i in 0..20 { + paths.insert(ip_addr(i), PathState::default()); + } + + // Add 15 failed holepunch paths (must_prune) + for i in 20..35 { + paths.insert(ip_addr(i), path_state_unusable()); + } + + prune_ip_paths(&mut paths); + + // All failed holepunch paths should be pruned + assert_eq!(20, paths.len()); + for i in 0..20 { + assert!(paths.contains_key(&ip_addr(i))); + } + for i in 20..35 { + assert!(!paths.contains_key(&ip_addr(i))); + } + } + + #[test] + fn test_prune_keeps_most_recent_inactive() { + let mut paths = FxHashMap::default(); + let now = Instant::now(); + + // Add 15 active paths + for i in 0..15 { + paths.insert(ip_addr(i), PathState::default()); + } + + // Add 20 inactive paths with different abandon times + // Ports 15-34, with port 34 being most recently abandoned + for i in 0..20 { + let abandoned_time = now - Duration::from_secs((20 - i) as u64); + paths.insert(ip_addr(15 + i as u16), path_state_inactive(abandoned_time)); + } + + assert_eq!(35, paths.len()); + prune_ip_paths(&mut paths); + + // Should keep 15 active + 10 most recently abandoned + assert_eq!(25, paths.len()); + + // Active paths should remain + for i in 0..15 { + assert!(paths.contains_key(&ip_addr(i))); + } + + // Most recently abandoned (ports 25-34) should remain + for i in 25..35 { + assert!(paths.contains_key(&ip_addr(i)), "port {} should be kept", i); + } + + // Oldest abandoned (ports 15-24) should be pruned + for i in 15..25 { + assert!( + !paths.contains_key(&ip_addr(i)), + "port {} should be pruned", + i + ); + } + } + + #[test] + fn test_prune_mixed_must_and_can_prune() { + let mut paths = FxHashMap::default(); + let now = Instant::now(); + + // Add 15 active paths + for i in 0..15 { + paths.insert(ip_addr(i), PathState::default()); + } + + // Add 5 failed holepunch paths + for i in 15..20 { + paths.insert(ip_addr(i), path_state_unusable()); + } + + // Add 15 usable but abandoned paths + for i in 0..15 { + let abandoned_time = now - Duration::from_secs((15 - i) as u64); + paths.insert(ip_addr(20 + i as u16), path_state_inactive(abandoned_time)); + } + + assert_eq!(35, paths.len()); + prune_ip_paths(&mut paths); + + // Remove all failed paths -> down to 30 + // Keep MAX_INACTIVE_IP_PATHS, eg remove 5 usable but abandoned paths -> down to 20 + assert_eq!(20, paths.len()); + + // Active paths should remain + for i in 0..15 { + assert!(paths.contains_key(&ip_addr(i))); + } + + // Failed holepunch should be pruned + for i in 15..20 { + assert!(!paths.contains_key(&ip_addr(i))); + } + + // Most recently abandoned (ports 30-34) should remain + for i in 30..35 { + assert!(paths.contains_key(&ip_addr(i)), "port {} should be kept", i); + } + } + + #[test] + fn test_prune_non_ip_paths_not_counted() { + let mut paths = FxHashMap::default(); + + // Add 25 IP paths (under MAX_IP_PATHS) + for i in 0..25 { + paths.insert(ip_addr(i), path_state_unusable()); + } + + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0u64); + let relay_url: RelayUrl = url::Url::parse("https://localhost") + .expect("should be valid url") + .into(); + // Add 10 relay addresses + for _ in 0..10 { + let id = SecretKey::generate(&mut rng).public(); + let relay_addr = transports::Addr::Relay(relay_url.clone(), id); + paths.insert(relay_addr, PathState::default()); + } + + assert_eq!(35, paths.len()); // 25 IP + 10 relay + prune_ip_paths(&mut paths); + + // Should not prune since IP paths < MAX_IP_PATHS + assert_eq!(35, paths.len()); + } + + #[test] + fn test_prune_preserves_never_dialed() { + let mut paths = FxHashMap::default(); + + // Add 20 never-dialed paths (PathStatus::Unknown) + for i in 0..20 { + paths.insert(ip_addr(i), PathState::default()); + } + + // Add 15 failed paths to trigger pruning + for i in 20..35 { + paths.insert(ip_addr(i), path_state_unusable()); + } + + prune_ip_paths(&mut paths); + + // Never-dialed paths should be preserved + for i in 0..20 { + assert!(paths.contains_key(&ip_addr(i))); + } + } + + #[test] + fn test_prune_all_paths_failed() { + let mut paths = FxHashMap::default(); + + // Add 40 failed holepunch paths (all paths have failed) + for i in 0..40 { + paths.insert(ip_addr(i), path_state_unusable()); + } + + assert_eq!(40, paths.len()); + prune_ip_paths(&mut paths); + + // Should keep MAX_IP_PATHS instead of pruning everything + // This prevents catastrophic loss of all path information + assert_eq!( + MAX_IP_PATHS, + paths.len(), + "should keep MAX_IP_PATHS when all paths failed" + ); + } + + #[test] + fn test_insert_open_path() { + let mut state = RemotePathState::default(); + let addr = ip_addr(1000); + let source = Source::Udp; + + assert!(state.is_empty()); + + state.insert_open_path(addr.clone(), source.clone()); + + assert!(!state.is_empty()); + assert!(state.paths.contains_key(&addr)); + let path = &state.paths[&addr]; + assert!(matches!(path.status, PathStatus::Open)); + assert_eq!(path.sources.len(), 1); + assert!(path.sources.contains_key(&source)); + } + + #[test] + fn test_abandoned_path() { + let mut state = RemotePathState::default(); + + // Test: Open goes to Inactive + let addr_open = ip_addr(1000); + state.insert_open_path(addr_open.clone(), Source::Udp); + assert!(matches!(state.paths[&addr_open].status, PathStatus::Open)); + + state.abandoned_path(&addr_open); + assert!(matches!( + state.paths[&addr_open].status, + PathStatus::Inactive(_) + )); + + // Test: Inactive stays Inactive + state.abandoned_path(&addr_open); + assert!(matches!( + state.paths[&addr_open].status, + PathStatus::Inactive(_) + )); + + // Test: Unknown goes to Unusable + let addr_unknown = ip_addr(2000); + state.insert_multiple([addr_unknown.clone()].into_iter(), Source::Relay); + assert!(matches!( + state.paths[&addr_unknown].status, + PathStatus::Unknown + )); + + state.abandoned_path(&addr_unknown); + assert!(matches!( + state.paths[&addr_unknown].status, + PathStatus::Unusable + )); + + // Test: Unusable stays Unusable + state.abandoned_path(&addr_unknown); + assert!(matches!( + state.paths[&addr_unknown].status, + PathStatus::Unusable + )); + } +} diff --git a/iroh/src/magicsock/transports.rs b/iroh/src/magicsock/transports.rs index b44eca505ae..a20d2d371b3 100644 --- a/iroh/src/magicsock/transports.rs +++ b/iroh/src/magicsock/transports.rs @@ -1,16 +1,22 @@ use std::{ + fmt, io::{self, IoSliceMut}, net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}, pin::Pin, - sync::{Arc, atomic::AtomicUsize}, + sync::Arc, task::{Context, Poll}, }; -use iroh_base::{EndpointId, RelayUrl}; +use bytes::Bytes; +use iroh_base::{EndpointId, RelayUrl, TransportAddr}; +use iroh_relay::RelayMap; use n0_watcher::Watcher; use relay::{RelayNetworkChangeSender, RelaySender}; -use smallvec::SmallVec; -use tracing::{error, trace, warn}; +use tokio_util::sync::CancellationToken; +use tracing::{debug, error, instrument, trace, warn}; + +use super::{MagicSock, mapped_addrs::MultipathMappedAddr, remote_map::RemoteStateMessage}; +use crate::{metrics::EndpointMetrics, net_report::Report}; #[cfg(not(wasm_browser))] mod ip; @@ -21,8 +27,6 @@ pub(crate) use self::ip::IpTransport; #[cfg(not(wasm_browser))] use self::ip::{IpNetworkChangeSender, IpSender}; pub(crate) use self::relay::{RelayActorConfig, RelayTransport}; -use super::MagicSock; -use crate::net_report::Report; /// Manages the different underlying data transports that the magicsock /// can support. @@ -32,18 +36,20 @@ pub(crate) struct Transports { ip: Vec, relay: Vec, - poll_recv_counter: AtomicUsize, + poll_recv_counter: usize, + /// Cache for source addrs, to speed up access + source_addrs: [Addr; quinn_udp::BATCH_SIZE], } #[cfg(not(wasm_browser))] pub(crate) type LocalAddrsWatch = n0_watcher::Map< - ( + n0_watcher::Tuple< n0_watcher::Join>, n0_watcher::Join< Option<(RelayUrl, EndpointId)>, n0_watcher::Map>, Option<(RelayUrl, EndpointId)>>, >, - ), + >, Vec, >; @@ -56,18 +62,89 @@ pub(crate) type LocalAddrsWatch = n0_watcher::Map< Vec, >; +/// Available transport configurations. +#[derive(Debug, Clone)] +#[non_exhaustive] +pub enum TransportConfig { + /// IP based transport + #[cfg(not(wasm_browser))] + Ip { + /// The address this transport will bind on. + bind_addr: SocketAddr, + }, + /// Relay transport + Relay { + /// The [`RelayMap`] used for this relay. + relay_map: RelayMap, + }, +} +impl TransportConfig { + /// Configures a default IPv4 transport, listening on `0.0.0.0:0`. + #[cfg(not(wasm_browser))] + pub fn default_ipv4() -> Self { + use std::net::{Ipv4Addr, SocketAddrV4}; + + Self::Ip { + bind_addr: SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, 0)), + } + } + + /// Configures a default IPv6 transport, listening on `[::]:0`. + #[cfg(not(wasm_browser))] + pub fn default_ipv6() -> Self { + Self::Ip { + bind_addr: SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, 0, 0, 0)), + } + } +} + +#[cfg(not(wasm_browser))] +fn bind_ip(configs: &[TransportConfig], metrics: &EndpointMetrics) -> io::Result> { + let mut transports = Vec::new(); + for config in configs { + if let TransportConfig::Ip { bind_addr } = config { + match IpTransport::bind(*bind_addr, metrics.magicsock.clone()) { + Ok(transport) => { + transports.push(transport); + } + Err(err) => { + if bind_addr.is_ipv6() { + tracing::info!("bind ignoring IPv6 bind failure: {:?}", err); + } else { + return Err(err); + } + } + } + } + } + + Ok(transports) +} + impl Transports { - /// Creates a new transports structure. - pub(crate) fn new( - #[cfg(not(wasm_browser))] ip: Vec, - relay: Vec, - ) -> Self { - Self { + /// Binds the transports. + pub(crate) fn bind( + configs: &[TransportConfig], + relay_actor_config: RelayActorConfig, + metrics: &EndpointMetrics, + shutdown_token: CancellationToken, + ) -> io::Result { + #[cfg(not(wasm_browser))] + let ip = bind_ip(configs, metrics)?; + + let relay = configs + .iter() + .filter(|t| matches!(t, TransportConfig::Relay { .. })) + .map(|_c| RelayTransport::new(relay_actor_config.clone(), shutdown_token.child_token())) + .collect(); + + Ok(Self { #[cfg(not(wasm_browser))] ip, relay, poll_recv_counter: Default::default(), - } + source_addrs: Default::default(), + }) } pub(crate) fn poll_recv( @@ -78,15 +155,15 @@ impl Transports { msock: &MagicSock, ) -> Poll> { debug_assert_eq!(bufs.len(), metas.len(), "non matching bufs & metas"); + debug_assert!(bufs.len() <= quinn_udp::BATCH_SIZE, "too many buffers"); if msock.is_closing() { return Poll::Pending; } - let mut source_addrs = vec![Addr::default(); metas.len()]; - match self.inner_poll_recv(cx, bufs, metas, &mut source_addrs)? { + match self.inner_poll_recv(cx, bufs, metas)? { Poll::Pending | Poll::Ready(0) => Poll::Pending, Poll::Ready(n) => { - msock.process_datagrams(&mut bufs[..n], &mut metas[..n], &source_addrs[..n]); + msock.process_datagrams(&mut bufs[..n], &mut metas[..n], &self.source_addrs[..n]); Poll::Ready(Ok(n)) } } @@ -98,13 +175,12 @@ impl Transports { cx: &mut Context, bufs: &mut [IoSliceMut<'_>], metas: &mut [quinn_udp::RecvMeta], - source_addrs: &mut [Addr], ) -> Poll> { debug_assert_eq!(bufs.len(), metas.len(), "non matching bufs & metas"); macro_rules! poll_transport { ($socket:expr) => { - match $socket.poll_recv(cx, bufs, metas, source_addrs)? { + match $socket.poll_recv(cx, bufs, metas, &mut self.source_addrs)? { Poll::Pending | Poll::Ready(0) => {} Poll::Ready(n) => { return Poll::Ready(Ok(n)); @@ -115,9 +191,7 @@ impl Transports { // To improve fairness, every other call reverses the ordering of polling. - let counter = self - .poll_recv_counter - .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let counter = self.poll_recv_counter.wrapping_add(1); if counter % 2 == 0 { #[cfg(not(wasm_browser))] @@ -154,7 +228,7 @@ impl Transports { let ips = n0_watcher::Join::new(self.ip.iter().map(|t| t.local_addr_watch())); let relays = n0_watcher::Join::new(self.relay.iter().map(|t| t.local_addr_watch())); - (ips, relays).map(|(ips, relays)| { + ips.or(relays).map(|(ips, relays)| { ips.into_iter() .map(Addr::from) .chain( @@ -219,16 +293,15 @@ impl Transports { false } - pub(crate) fn create_sender(&self, msock: Arc) -> UdpSender { + pub(crate) fn create_sender(&self) -> TransportsSender { #[cfg(not(wasm_browser))] let ip = self.ip.iter().map(|t| t.create_sender()).collect(); let relay = self.relay.iter().map(|t| t.create_sender()).collect(); let max_transmit_segments = self.max_transmit_segments(); - UdpSender { + TransportsSender { #[cfg(not(wasm_browser))] ip, - msock, relay, max_transmit_segments, } @@ -301,12 +374,42 @@ pub(crate) struct Transmit<'a> { pub(crate) segment_size: Option, } -#[derive(Debug, Clone, PartialEq, Eq)] +/// An outgoing packet that can be sent across channels. +#[derive(Debug, Clone)] +pub(crate) struct OwnedTransmit { + pub(crate) ecn: Option, + pub(crate) contents: Bytes, + pub(crate) segment_size: Option, +} + +impl From<&quinn_udp::Transmit<'_>> for OwnedTransmit { + fn from(source: &quinn_udp::Transmit<'_>) -> Self { + Self { + ecn: source.ecn, + contents: Bytes::copy_from_slice(source.contents), + segment_size: source.segment_size, + } + } +} + +/// Transports address. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) enum Addr { + /// An IP address, should always be stored in its canonical form. Ip(SocketAddr), + /// A relay address. Relay(RelayUrl, EndpointId), } +impl fmt::Debug for Addr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Addr::Ip(addr) => write!(f, "Ip({addr})"), + Addr::Relay(url, node_id) => write!(f, "Relay({url}, {})", node_id.fmt_short()), + } + } +} + impl Default for Addr { fn default() -> Self { Self::Ip(SocketAddr::V6(SocketAddrV6::new( @@ -320,7 +423,23 @@ impl Default for Addr { impl From for Addr { fn from(value: SocketAddr) -> Self { - Self::Ip(value) + match value { + SocketAddr::V4(_) => Self::Ip(value), + SocketAddr::V6(addr) => { + Self::Ip(SocketAddr::new(addr.ip().to_canonical(), addr.port())) + } + } + } +} + +impl From<&SocketAddr> for Addr { + fn from(value: &SocketAddr) -> Self { + match value { + SocketAddr::V4(_) => Self::Ip(*value), + SocketAddr::V6(addr) => { + Self::Ip(SocketAddr::new(addr.ip().to_canonical(), addr.port())) + } + } } } @@ -330,11 +449,31 @@ impl From<(RelayUrl, EndpointId)> for Addr { } } +impl From for TransportAddr { + fn from(value: Addr) -> Self { + match value { + Addr::Ip(addr) => TransportAddr::Ip(addr), + Addr::Relay(url, _) => TransportAddr::Relay(url), + } + } +} + impl Addr { pub(crate) fn is_relay(&self) -> bool { matches!(self, Self::Relay(..)) } + pub(crate) fn is_ip(&self) -> bool { + matches!(self, Self::Ip(_)) + } + + pub(crate) fn is_ipv4(&self) -> bool { + match self { + Addr::Ip(socket_addr) => socket_addr.is_ipv4(), + Addr::Relay(_, _) => false, + } + } + /// Returns `None` if not an `Ip`. pub(crate) fn into_socket_addr(self) -> Option { match self { @@ -344,77 +483,25 @@ impl Addr { } } -#[derive(Debug)] -pub(crate) struct UdpSender { - msock: Arc, // :( +/// A sender that sends to all our transports. +#[derive(Debug, Clone)] +pub(crate) struct TransportsSender { #[cfg(not(wasm_browser))] ip: Vec, relay: Vec, max_transmit_segments: usize, } -impl UdpSender { - pub(crate) async fn send( - &self, - destination: &Addr, - src: Option, - transmit: &Transmit<'_>, - ) -> io::Result<()> { - trace!(?destination, "sending"); - - let mut any_match = false; - match destination { - #[cfg(wasm_browser)] - Addr::Ip(..) => return Err(io::Error::other("IP is unsupported in browser")), - #[cfg(not(wasm_browser))] - Addr::Ip(addr) => { - for sender in &self.ip { - if sender.is_valid_send_addr(addr) { - any_match = true; - match sender.send(*addr, src, transmit).await { - Ok(()) => { - return Ok(()); - } - Err(err) => { - warn!("ip failed to send: {:?}", err); - } - } - } - } - } - Addr::Relay(url, endpoint_id) => { - for sender in &self.relay { - if sender.is_valid_send_addr(url, endpoint_id) { - any_match = true; - match sender.send(url.clone(), *endpoint_id, transmit).await { - Ok(()) => { - return Ok(()); - } - Err(err) => { - warn!("relay failed to send: {:?}", err); - } - } - } - } - } - } - if any_match { - Err(io::Error::other("all available transports failed")) - } else { - Err(io::Error::other("no transport available")) - } - } - - pub(crate) fn inner_poll_send( +impl TransportsSender { + #[instrument(name = "poll_send", skip(self, cx, transmit), fields(len = transmit.contents.len()))] + pub(crate) fn poll_send( mut self: Pin<&mut Self>, cx: &mut std::task::Context, - destination: &Addr, + dst: &Addr, src: Option, transmit: &Transmit<'_>, ) -> Poll> { - trace!(?destination, "sending"); - - match destination { + match dst { #[cfg(wasm_browser)] Addr::Ip(..) => { return Poll::Ready(Err(io::Error::other("IP is unsupported in browser"))); @@ -425,7 +512,13 @@ impl UdpSender { if sender.is_valid_send_addr(addr) { match Pin::new(sender).poll_send(cx, *addr, src, transmit) { Poll::Pending => {} - Poll::Ready(res) => return Poll::Ready(res), + Poll::Ready(res) => { + match &res { + Ok(()) => trace!("sent"), + Err(err) => trace!("send failed: {err:#}"), + } + return Poll::Ready(res); + } } } } @@ -435,7 +528,13 @@ impl UdpSender { if sender.is_valid_send_addr(url, endpoint_id) { match sender.poll_send(cx, url.clone(), *endpoint_id, transmit) { Poll::Pending => {} - Poll::Ready(res) => return Poll::Ready(res), + Poll::Ready(res) => { + match &res { + Ok(()) => trace!("sent"), + Err(err) => trace!("send failed: {err:#}"), + } + return Poll::Ready(res); + } } } } @@ -443,149 +542,221 @@ impl UdpSender { } Poll::Pending } +} - /// Best effort sending - pub(crate) fn inner_try_send( - &self, - destination: &Addr, - src: Option, - transmit: &Transmit<'_>, - ) -> io::Result<()> { - trace!(?destination, "sending, best effort"); +/// A [`Transports`] that works with [`MultipathMappedAddr`]s and their IPv6 representation. +/// +/// The [`MultipathMappedAddr`]s have an IPv6 representation that Quinn uses. This struct +/// knows about these and maps them back to the transport [`Addr`]s used by the wrapped +/// [`Transports`]. +#[derive(Debug)] +pub(crate) struct MagicTransport { + msock: Arc, + transports: Transports, +} - match destination { - #[cfg(wasm_browser)] - Addr::Ip(..) => return Err(io::Error::other("IP is unsupported in browser")), - #[cfg(not(wasm_browser))] - Addr::Ip(addr) => { - for transport in &self.ip { - if transport.is_valid_send_addr(addr) { - match transport.try_send(*addr, src, transmit) { - Ok(()) => return Ok(()), - Err(_err) => { - continue; - } - } - } - } - } - Addr::Relay(url, endpoint_id) => { - for transport in &self.relay { - if transport.is_valid_send_addr(url, endpoint_id) { - match transport.try_send(url.clone(), *endpoint_id, transmit) { - Ok(()) => return Ok(()), - Err(_err) => { - continue; - } - } - } - } - } - } - Err(io::Error::new( - io::ErrorKind::WouldBlock, - "no transport ready", - )) +impl MagicTransport { + pub(crate) fn new(msock: Arc, transports: Transports) -> Self { + Self { msock, transports } } } -impl quinn::UdpSender for UdpSender { - fn poll_send( - mut self: Pin<&mut Self>, - transmit: &quinn_udp::Transmit, +impl quinn::AsyncUdpSocket for MagicTransport { + fn create_sender(&self) -> Pin> { + Box::pin(MagicSender { + msock: self.msock.clone(), + sender: self.transports.create_sender(), + }) + } + + fn poll_recv( + &mut self, cx: &mut Context, - ) -> Poll> { - let active_paths = self.msock.prepare_send(&self, transmit)?; - - if active_paths.is_empty() { - // Returning Ok here means we let QUIC timeout. - // Returning an error would immediately fail a connection. - // The philosophy of quinn-udp is that a UDP connection could - // come back at any time or missing should be transient so chooses to let - // these kind of errors time out. See test_try_send_no_send_addr to try - // this out. - error!("no paths available for endpoint, voiding transmit"); - return Poll::Ready(Ok(())); - } + bufs: &mut [IoSliceMut<'_>], + meta: &mut [quinn_udp::RecvMeta], + ) -> Poll> { + self.transports.poll_recv(cx, bufs, meta, &self.msock) + } - let mut results = SmallVec::<[_; 3]>::new(); + #[cfg(not(wasm_browser))] + fn local_addr(&self) -> io::Result { + let local_addrs = self.transports.local_addrs(); + let addrs: Vec<_> = local_addrs + .into_iter() + .map(|addr| { + use crate::magicsock::mapped_addrs::DEFAULT_FAKE_ADDR; + + match addr { + Addr::Ip(addr) => addr, + Addr::Relay(..) => DEFAULT_FAKE_ADDR.into(), + } + }) + .collect(); - trace!(?active_paths, "attempting to send"); + if let Some(addr) = addrs.iter().find(|addr| addr.is_ipv6()) { + return Ok(*addr); + } + if let Some(SocketAddr::V4(addr)) = addrs.first() { + // Pretend to be IPv6, because our `MappedAddr`s need to be IPv6. + let ip = addr.ip().to_ipv6_mapped().into(); + return Ok(SocketAddr::new(ip, addr.port())); + } - for destination in active_paths { - let src = transmit.src_ip; - let transmit = Transmit { - ecn: transmit.ecn, - contents: transmit.contents, - segment_size: transmit.segment_size, - }; + if !self.transports.relay.is_empty() { + // pretend we have an address to make sure things are not too sad during startup + use crate::magicsock::mapped_addrs::DEFAULT_FAKE_ADDR; - let res = self - .as_mut() - .inner_poll_send(cx, &destination, src, &transmit); - match res { - Poll::Ready(Ok(())) => { - trace!(dst = ?destination, "sent transmit"); - } - Poll::Ready(Err(ref err)) => { - warn!(dst = ?destination, "failed to send: {err:#}"); - } - Poll::Pending => {} - } - results.push(res); + return Ok(DEFAULT_FAKE_ADDR.into()); } + Err(io::Error::other("no valid address available")) + } - if results.iter().all(|p| matches!(p, Poll::Pending)) { - // Handle backpressure. - return Poll::Pending; - } - Poll::Ready(Ok(())) + #[cfg(wasm_browser)] + fn local_addr(&self) -> io::Result { + // Again, we need to pretend we're IPv6, because of our `MappedAddr`s. + Ok(SocketAddr::new(std::net::Ipv6Addr::LOCALHOST.into(), 0)) } - fn max_transmit_segments(&self) -> usize { - self.max_transmit_segments - } - - fn try_send(self: Pin<&mut Self>, transmit: &quinn_udp::Transmit) -> io::Result<()> { - let active_paths = self.msock.prepare_send(&self, transmit)?; - if active_paths.is_empty() { - // Returning Ok here means we let QUIC timeout. - // Returning an error would immediately fail a connection. - // The philosophy of quinn-udp is that a UDP connection could - // come back at any time or missing should be transient so chooses to let - // these kind of errors time out. See test_try_send_no_send_addr to try - // this out. - error!("no paths available for endpoint, voiding transmit"); - return Ok(()); - } + fn max_receive_segments(&self) -> usize { + self.transports.max_receive_segments() + } + + fn may_fragment(&self) -> bool { + self.transports.may_fragment() + } +} - let mut results = SmallVec::<[_; 3]>::new(); +/// A sender for [`MagicTransport`]. +/// +/// This is special in that it handles [`MultipathMappedAddr::Mixed`] by delegating to the +/// [`MagicSock`] which expands it back to one or more [`Addr`]s and sends it +/// using the underlying [`Transports`]. +#[derive(Debug)] +#[pin_project::pin_project] +pub(crate) struct MagicSender { + msock: Arc, + #[pin] + sender: TransportsSender, +} - trace!(?active_paths, "attempting to send"); +impl MagicSender { + /// Extracts the right [`Addr`] from the [`quinn_udp::Transmit`]. + /// + /// Because Quinn does only know about IP transports we map other transports to private + /// IPv6 Unique Local Address ranges. This extracts the transport addresses out of the + /// transmit's destination. + fn mapped_addr(&self, transmit: &quinn_udp::Transmit) -> io::Result { + if self.msock.is_closed() { + return Err(io::Error::new( + io::ErrorKind::NotConnected, + "connection closed", + )); + } - for destination in active_paths { - let src = transmit.src_ip; - let transmit = Transmit { - ecn: transmit.ecn, - contents: transmit.contents, - segment_size: transmit.segment_size, - }; + Ok(MultipathMappedAddr::from(transmit.destination)) + } +} - let res = self.inner_try_send(&destination, src, &transmit); - match res { - Ok(()) => { - trace!(dst = ?destination, "sent transmit"); +impl quinn::UdpSender for MagicSender { + fn poll_send( + self: Pin<&mut Self>, + quinn_transmit: &quinn_udp::Transmit, + cx: &mut Context, + ) -> Poll> { + // On errors this methods prefers returning Ok(()) to Quinn. Returning an error + // should only happen if the error is permanent and fatal and it will never be + // possible to send anything again. Doing so kills the Quinn EndpointDriver. Most + // send errors are intermittent errors, returning Ok(()) in those cases will mean + // Quinn eventually considers the packets that had send errors as lost and will try + // and re-send them. + let mapped_addr = self.mapped_addr(quinn_transmit)?; + + let transport_addr = match mapped_addr { + MultipathMappedAddr::Mixed(mapped_addr) => { + let Some(node_id) = self + .msock + .remote_map + .endpoint_mapped_addrs + .lookup(&mapped_addr) + else { + error!(dst = ?mapped_addr, "unknown NodeIdMappedAddr, dropped transmit"); + return Poll::Ready(Ok(())); + }; + + // Note we drop the src_ip set in the Quinn Transmit. This is only the + // Initial packet we are sending, so we do not yet have an src address we + // need to respond from. + if let Some(src_ip) = quinn_transmit.src_ip { + warn!(dst = ?mapped_addr, ?src_ip, dst_node = %node_id.fmt_short(), + "oops, flub didn't think this would happen"); } - Err(ref err) => { - warn!(dst = ?destination, "failed to send: {err:#}"); + + let sender = self.msock.remote_map.remote_state_actor(node_id); + let transmit = OwnedTransmit::from(quinn_transmit); + return match sender.try_send(RemoteStateMessage::SendDatagram( + self.sender.clone(), + transmit, + )) { + Ok(()) => { + trace!(dst = ?mapped_addr, dst_node = %node_id.fmt_short(), "sent transmit"); + Poll::Ready(Ok(())) + } + Err(err) => { + // We do not want to block the next send which might be on a + // different transport. Instead we let Quinn handle this as + // a lost datagram. + // TODO: Revisit this: we might want to do something better. + debug!(dst = ?mapped_addr, dst_node = %node_id.fmt_short(), + "RemoteStateActor inbox {err:#}, dropped transmit"); + Poll::Ready(Ok(())) + } + }; + } + MultipathMappedAddr::Relay(relay_mapped_addr) => { + match self + .msock + .remote_map + .relay_mapped_addrs + .lookup(&relay_mapped_addr) + { + Some((relay_url, endpoint_id)) => Addr::Relay(relay_url, endpoint_id), + None => { + error!("unknown RelayMappedAddr, dropped transmit"); + return Poll::Ready(Ok(())); + } } } - results.push(res); + MultipathMappedAddr::Ip(socket_addr) => Addr::Ip(socket_addr), + }; + + let transmit = Transmit { + ecn: quinn_transmit.ecn, + contents: quinn_transmit.contents, + segment_size: quinn_transmit.segment_size, + }; + let this = self.project(); + + match this + .sender + .poll_send(cx, &transport_addr, quinn_transmit.src_ip, &transmit) + { + Poll::Ready(Ok(())) => Poll::Ready(Ok(())), + Poll::Ready(Err(ref err)) => { + warn!("dropped transmit: {err:#}"); + Poll::Ready(Ok(())) + } + Poll::Pending => { + // We do not want to block the next send which might be on a + // different transport. Instead we let Quinn handle this as a lost + // datagram. + // TODO: Revisit this: we might want to do something better. + trace!("transport pending, dropped transmit"); + Poll::Ready(Ok(())) + } } + } - if results.iter().all(|p| p.is_err()) { - return Err(io::Error::other("all failed")); - } - Ok(()) + fn max_transmit_segments(&self) -> usize { + self.sender.max_transmit_segments } } diff --git a/iroh/src/magicsock/transports/ip.rs b/iroh/src/magicsock/transports/ip.rs index 95e4093435c..dc4ab293da4 100644 --- a/iroh/src/magicsock/transports/ip.rs +++ b/iroh/src/magicsock/transports/ip.rs @@ -9,7 +9,7 @@ use std::{ use n0_watcher::Watchable; use netwatch::{UdpSender, UdpSocket}; use pin_project::pin_project; -use tracing::trace; +use tracing::{debug, trace}; use super::{Addr, Transmit}; use crate::metrics::MagicsockMetrics; @@ -22,7 +22,36 @@ pub(crate) struct IpTransport { metrics: Arc, } +fn bind_with_fallback(mut addr: SocketAddr) -> io::Result { + debug!(%addr, "binding"); + + // First try binding a preferred port, if specified + match netwatch::UdpSocket::bind_full(addr) { + Ok(socket) => { + let local_addr = socket.local_addr()?; + debug!(%addr, %local_addr, "successfully bound"); + return Ok(socket); + } + Err(err) => { + debug!(%addr, "failed to bind: {err:#}"); + // If that was already the fallback port, then error out + if addr.port() == 0 { + return Err(err); + } + } + } + + // Otherwise, try binding with port 0 + addr.set_port(0); + netwatch::UdpSocket::bind_full(addr) +} + impl IpTransport { + pub(crate) fn bind(bind_addr: SocketAddr, metrics: Arc) -> io::Result { + let socket = bind_with_fallback(bind_addr)?; + Ok(Self::new(bind_addr, Arc::new(socket), metrics.clone())) + } + pub(crate) fn new( bind_addr: SocketAddr, socket: Arc, @@ -51,8 +80,21 @@ impl IpTransport { match self.socket.poll_recv_quinn(cx, bufs, metas) { Poll::Pending => Poll::Pending, Poll::Ready(Ok(n)) => { - for (addr, el) in source_addrs.iter_mut().zip(metas.iter()).take(n) { - *addr = el.addr.into(); + for (source_addr, meta) in source_addrs.iter_mut().zip(metas.iter_mut()).take(n) { + if meta.addr.is_ipv4() { + // The AsyncUdpSocket is an AF_INET6 socket and needs to show this + // as coming from an IPv4-mapped IPv6 addresses, since Quinn will + // use those when sending on an INET6 socket. + let v6_ip = match meta.addr.ip() { + IpAddr::V4(ipv4_addr) => ipv4_addr.to_ipv6_mapped(), + IpAddr::V6(ipv6_addr) => ipv6_addr, + }; + meta.addr = SocketAddr::new(v6_ip.into(), meta.addr.port()); + } + // The transport addresses are internal to iroh and we always want those + // to remain the canonical address. + *source_addr = + SocketAddr::new(meta.addr.ip().to_canonical(), meta.addr.port()).into(); } Poll::Ready(Ok(n)) } @@ -119,7 +161,7 @@ impl IpNetworkChangeSender { } } -#[derive(Debug)] +#[derive(Debug, Clone)] #[pin_project] pub(super) struct IpSender { bind_addr: SocketAddr, @@ -143,42 +185,6 @@ impl IpSender { } } - pub(super) async fn send( - &self, - destination: SocketAddr, - src: Option, - transmit: &Transmit<'_>, - ) -> io::Result<()> { - trace!("sending to {}", destination); - let total_bytes = transmit.contents.len() as u64; - let res = self - .sender - .send(&quinn_udp::Transmit { - destination: Self::canonical_addr(destination), - ecn: transmit.ecn, - contents: transmit.contents, - segment_size: transmit.segment_size, - src_ip: src, - }) - .await; - trace!("send res: {:?}", res); - - match res { - Ok(res) => { - match destination { - SocketAddr::V4(_) => { - self.metrics.send_ipv4.inc_by(total_bytes); - } - SocketAddr::V6(_) => { - self.metrics.send_ipv6.inc_by(total_bytes); - } - } - Ok(res) - } - Err(err) => Err(err), - } - } - /// Creates a canonical socket address. /// /// We may be asked to send IPv4-mapped IPv6 addresses. But our sockets are configured @@ -192,15 +198,14 @@ impl IpSender { pub(super) fn poll_send( mut self: Pin<&mut Self>, cx: &mut std::task::Context, - destination: SocketAddr, + dst: SocketAddr, src: Option, transmit: &Transmit<'_>, ) -> Poll> { - trace!("sending to {}", destination); let total_bytes = transmit.contents.len() as u64; let res = Pin::new(&mut self.sender).poll_send( &quinn_udp::Transmit { - destination: Self::canonical_addr(destination), + destination: Self::canonical_addr(dst), ecn: transmit.ecn, contents: transmit.contents, segment_size: transmit.segment_size, @@ -208,11 +213,10 @@ impl IpSender { }, cx, ); - trace!("send res: {:?}", res); match res { Poll::Ready(Ok(res)) => { - match destination { + match dst { SocketAddr::V4(_) => { self.metrics.send_ipv4.inc_by(total_bytes); } @@ -226,37 +230,4 @@ impl IpSender { Poll::Pending => Poll::Pending, } } - - pub(super) fn try_send( - &self, - destination: SocketAddr, - src: Option, - transmit: &Transmit<'_>, - ) -> io::Result<()> { - trace!("sending to {}", destination); - let total_bytes = transmit.contents.len() as u64; - let res = self.sender.try_send(&quinn_udp::Transmit { - destination, - ecn: transmit.ecn, - contents: transmit.contents, - segment_size: transmit.segment_size, - src_ip: src, - }); - trace!("send res: {:?}", res); - - match res { - Ok(res) => { - match destination { - SocketAddr::V4(_) => { - self.metrics.send_ipv4.inc_by(total_bytes); - } - SocketAddr::V6(_) => { - self.metrics.send_ipv6.inc_by(total_bytes); - } - } - Ok(res) - } - Err(err) => Err(err), - } - } } diff --git a/iroh/src/magicsock/transports/relay.rs b/iroh/src/magicsock/transports/relay.rs index 7b3c8cf46a1..fdaf503fd5f 100644 --- a/iroh/src/magicsock/transports/relay.rs +++ b/iroh/src/magicsock/transports/relay.rs @@ -14,7 +14,7 @@ use n0_future::{ use n0_watcher::{Watchable, Watcher as _}; use tokio::sync::mpsc; use tokio_util::sync::{CancellationToken, PollSender}; -use tracing::{Instrument, error, info_span, trace, warn}; +use tracing::{Instrument, error, info_span, warn}; use super::{Addr, Transmit}; @@ -147,7 +147,7 @@ impl RelayTransport { .segment_size .map_or(dm.datagrams.contents.len(), |s| u16::from(s) as usize); meta_out.ecn = None; - meta_out.dst_ip = None; // TODO: insert the relay url for this relay + meta_out.dst_ip = None; *addr = (dm.url, dm.src).into(); num_msgs += 1; @@ -244,42 +244,6 @@ impl RelaySender { true } - pub(super) async fn send( - &self, - dest_url: RelayUrl, - dest_endpoint: EndpointId, - transmit: &Transmit<'_>, - ) -> io::Result<()> { - let contents = datagrams_from_transmit(transmit); - - let item = RelaySendItem { - remote_endpoint: dest_endpoint, - url: dest_url.clone(), - datagrams: contents, - }; - - let dest_endpoint = item.remote_endpoint; - let dest_url = item.url.clone(); - let Some(sender) = self.sender.get_ref() else { - return Err(io::Error::other("channel closed")); - }; - match sender.send(item).await { - Ok(_) => { - trace!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message queued"); - Ok(()) - } - Err(mpsc::error::SendError(_)) => { - error!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message dropped, channel to actor is closed"); - Err(io::Error::new( - io::ErrorKind::ConnectionReset, - "channel to actor is closed", - )) - } - } - } - pub(super) fn poll_send( &mut self, cx: &mut Context, @@ -289,81 +253,24 @@ impl RelaySender { ) -> Poll> { match ready!(self.sender.poll_reserve(cx)) { Ok(()) => { - trace!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message queued"); - let contents = datagrams_from_transmit(transmit); let item = RelaySendItem { remote_endpoint: dest_endpoint, url: dest_url.clone(), datagrams: contents, }; - let dest_endpoint = item.remote_endpoint; - let dest_url = item.url.clone(); - match self.sender.send_item(item) { Ok(()) => Poll::Ready(Ok(())), - Err(_err) => { - error!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message dropped, channel to actor is closed"); - Poll::Ready(Err(io::Error::new( - io::ErrorKind::ConnectionReset, - "channel to actor is closed", - ))) - } + Err(_err) => Poll::Ready(Err(io::Error::new( + io::ErrorKind::ConnectionReset, + "channel to actor is closed", + ))), } } - Err(_err) => { - error!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message dropped, channel to actor is closed"); - Poll::Ready(Err(io::Error::new( - io::ErrorKind::ConnectionReset, - "channel to actor is closed", - ))) - } - } - } - - pub(super) fn try_send( - &self, - dest_url: RelayUrl, - dest_endpoint: EndpointId, - transmit: &Transmit<'_>, - ) -> io::Result<()> { - let contents = datagrams_from_transmit(transmit); - - let item = RelaySendItem { - remote_endpoint: dest_endpoint, - url: dest_url.clone(), - datagrams: contents, - }; - - let dest_endpoint = item.remote_endpoint; - let dest_url = item.url.clone(); - - let Some(sender) = self.sender.get_ref() else { - return Err(io::Error::other("channel closed")); - }; - - match sender.try_send(item) { - Ok(_) => { - trace!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message queued"); - Ok(()) - } - Err(mpsc::error::TrySendError::Closed(_)) => { - error!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message dropped, channel to actor is closed"); - Err(io::Error::new( - io::ErrorKind::ConnectionReset, - "channel to actor is closed", - )) - } - Err(mpsc::error::TrySendError::Full(_)) => { - warn!(endpoint = %dest_endpoint.fmt_short(), relay_url = %dest_url, - "send relay: message dropped, channel to actor is full"); - Err(io::Error::new(io::ErrorKind::WouldBlock, "channel full")) - } + Err(_err) => Poll::Ready(Err(io::Error::new( + io::ErrorKind::ConnectionReset, + "channel to actor is closed", + ))), } } } diff --git a/iroh/src/magicsock/transports/relay/actor.rs b/iroh/src/magicsock/transports/relay/actor.rs index 7e73d9d17bc..ea463da3769 100644 --- a/iroh/src/magicsock/transports/relay/actor.rs +++ b/iroh/src/magicsock/transports/relay/actor.rs @@ -314,10 +314,6 @@ impl ActiveRelayActor { /// /// Primarily switches between the dialing and connected states. async fn run(mut self) { - // TODO(frando): decide what this metric means, it's either wrong here or in endpoint_state.rs. - // From the existing description, it is wrong here. - // self.metrics.num_relay_conns_added.inc(); - let mut backoff = Self::build_backoff(); while let Err(err) = self.run_once().await { @@ -341,9 +337,6 @@ impl ActiveRelayActor { } } debug!("exiting"); - // TODO(frando): decide what this metric means, it's either wrong here or in endpoint_state.rs. - // From the existing description, it is wrong here. - // self.metrics.num_relay_conns_removed.inc(); } fn build_backoff() -> impl Backoff { @@ -393,7 +386,7 @@ impl ActiveRelayActor { /// Returns `None` if the actor needs to shut down. Returns `Some(Ok(client))` when the /// connection is established, and `Some(Err(err))` if dialing the relay failed. async fn run_dialing(&mut self) -> Option> { - debug!("Actor loop: connecting to relay."); + trace!("Actor loop: connecting to relay."); // We regularly flush the relay_datagrams_send queue so it is not full of stale // packets while reconnecting. Those datagrams are dropped and the QUIC congestion @@ -836,7 +829,7 @@ pub(super) struct RelayActor { cancel_token: CancellationToken, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Config { pub my_relay: Watchable>, pub secret_key: SecretKey, @@ -1466,7 +1459,6 @@ mod tests { #[tokio::test] #[traced_test] - #[ignore = "flaky"] async fn test_active_relay_inactive() -> Result { let (_relay_map, relay_url, _server) = test_utils::run_relay_server().await?; @@ -1488,11 +1480,11 @@ mod tests { ); // Wait until the actor is connected to the relay server. - tokio::time::timeout(Duration::from_millis(200), async { + tokio::time::timeout(Duration::from_secs(5), async { loop { let (tx, rx) = oneshot::channel(); inbox_tx.send(ActiveRelayMessage::PingServer(tx)).await.ok(); - if tokio::time::timeout(Duration::from_millis(100), rx) + if tokio::time::timeout(Duration::from_millis(200), rx) .await .map(|resp| resp.is_ok()) .unwrap_or_default() @@ -1504,12 +1496,12 @@ mod tests { .await .std_context("timeout")?; - // From now on, we pause time - tokio::time::pause(); // We now have an idling ActiveRelayActor. If we advance time just a little it // should stay alive. info!("Stepping time forwards by RELAY_INACTIVE_CLEANUP_TIME / 2"); + tokio::time::pause(); tokio::time::advance(RELAY_INACTIVE_CLEANUP_TIME / 2).await; + tokio::time::resume(); assert!( tokio::time::timeout(Duration::from_millis(100), &mut task) @@ -1520,15 +1512,20 @@ mod tests { // If we advance time a lot it should finish. info!("Stepping time forwards by RELAY_INACTIVE_CLEANUP_TIME"); + tokio::time::pause(); tokio::time::advance(RELAY_INACTIVE_CLEANUP_TIME).await; + tokio::time::resume(); + + // We resume time for these timeouts, as there's actual I/O happening, + // for example closing the TCP stream, so we actually need the tokio + // runtime to idle a bit while the kernel is doing its thing. assert!( - tokio::time::timeout(Duration::from_millis(1000), task) + tokio::time::timeout(Duration::from_secs(1), task) .await .is_ok(), "actor task still running" ); - tokio::time::resume(); cancel_token.cancel(); Ok(()) } diff --git a/iroh/src/net_report.rs b/iroh/src/net_report.rs index 24484bcaeaf..0dde6c9e481 100644 --- a/iroh/src/net_report.rs +++ b/iroh/src/net_report.rs @@ -51,7 +51,6 @@ use self::reportgen::QadProbeReport; use self::reportgen::{ProbeFinished, ProbeReport}; mod defaults; -mod ip_mapped_addrs; mod metrics; mod probes; mod report; @@ -78,8 +77,6 @@ pub(crate) mod portmapper { } } -pub(crate) use ip_mapped_addrs::{IpMappedAddr, IpMappedAddresses}; - pub(crate) use self::reportgen::IfStateDetails; #[cfg(not(wasm_browser))] #[allow(missing_docs)] @@ -227,7 +224,6 @@ impl Client { /// Creates a new net_report client. pub(crate) fn new( #[cfg(not(wasm_browser))] dns_resolver: DnsResolver, - #[cfg(not(wasm_browser))] ip_mapped_addrs: Option, relay_map: RelayMap, opts: Options, metrics: Arc, @@ -245,7 +241,6 @@ impl Client { let socket_state = SocketState { quic_client, dns_resolver, - ip_mapped_addrs, }; Client { @@ -408,8 +403,6 @@ impl Client { ) -> Vec { use tracing::{Instrument, warn_span}; - debug!("spawning QAD probes"); - let Some(ref quic_client) = self.socket_state.quic_client else { return Vec::new(); }; @@ -452,6 +445,8 @@ impl Client { return reports; } + trace!("spawning QAD probes"); + // TODO: randomize choice? const MAX_RELAYS: usize = 5; @@ -464,7 +459,6 @@ impl Client { for relay in relays.into_iter().take(MAX_RELAYS) { if if_state.have_v4 && needs_v4_probe { debug!(?relay.url, "v4 QAD probe"); - let ip_mapped_addrs = self.socket_state.ip_mapped_addrs.clone(); let relay = relay.clone(); let dns_resolver = self.socket_state.dns_resolver.clone(); let quic_client = quic_client.clone(); @@ -474,15 +468,13 @@ impl Client { .child_token() .run_until_cancelled_owned(time::timeout( PROBES_TIMEOUT, - run_probe_v4(ip_mapped_addrs, relay, quic_client, dns_resolver), + run_probe_v4(relay, quic_client, dns_resolver), )) - .instrument(warn_span!("QAD-IPv4", %relay_url)), + .instrument(warn_span!("QADv4", %relay_url)), ); } - if if_state.have_v6 && needs_v6_probe { debug!(?relay.url, "v6 QAD probe"); - let ip_mapped_addrs = self.socket_state.ip_mapped_addrs.clone(); let relay = relay.clone(); let dns_resolver = self.socket_state.dns_resolver.clone(); let quic_client = quic_client.clone(); @@ -492,9 +484,9 @@ impl Client { .child_token() .run_until_cancelled_owned(time::timeout( PROBES_TIMEOUT, - run_probe_v6(ip_mapped_addrs, relay, quic_client, dns_resolver), + run_probe_v6(relay, quic_client, dns_resolver), )) - .instrument(warn_span!("QAD-IPv6", %relay_url)), + .instrument(warn_span!("QADv6", %relay_url)), ); } } @@ -509,6 +501,7 @@ impl Client { loop { // We early-abort the tasks once we have at least `enough_relays` reports, // and at least one ipv4 and one ipv6 report completed (if they were started, see comment above). + if reports.len() >= enough_relays && !ipv4_pending && !ipv6_pending { debug!("enough probes: {}", reports.len()); cancel_v4.cancel(); @@ -520,12 +513,14 @@ impl Client { biased; val = v4_buf.join_next(), if !v4_buf.is_empty() => { + let span = warn_span!("QADv4"); + let _guard = span.enter(); ipv4_pending = false; match val { Some(Ok(Some(Ok(res)))) => { match res { Ok((r, conn)) => { - debug!(?r, "got v4 QAD conn"); + debug!(?r, "probe report"); let url = r.relay.clone(); reports.push(ProbeReport::QadIpv4(r)); if self.qad_conns.v4.is_none() { @@ -535,32 +530,34 @@ impl Client { } } Err(err) => { - debug!("probe v4 failed: {err:?}"); + debug!("probe failed: {err:#}"); } } } Some(Err(err)) => { if err.is_panic() { - panic!("probe v4 panicked: {err:?}"); + panic!("probe panicked: {err:#}"); } - warn!("probe v4 failed: {err:?}"); + warn!("probe failed: {err:#}"); } Some(Ok(None)) => { - debug!("probe v4 canceled"); + debug!("probe canceled"); } Some(Ok(Some(Err(time::Elapsed { .. })))) => { - debug!("probe v4 timed out"); + debug!("probe timed out"); } None => {} } } val = v6_buf.join_next(), if !v6_buf.is_empty() => { + let span = warn_span!("QADv6"); + let _guard = span.enter(); ipv6_pending = false; match val { Some(Ok(Some(Ok(res)))) => { match res { Ok((r, conn)) => { - debug!(?r, "got v6 QAD conn"); + debug!(?r, "probe report"); let url = r.relay.clone(); reports.push(ProbeReport::QadIpv6(r)); if self.qad_conns.v6.is_none() { @@ -570,21 +567,21 @@ impl Client { } } Err(err) => { - debug!("probe v6 failed: {err:?}"); + debug!("probe failed: {err:#}"); } } } Some(Err(err)) => { if err.is_panic() { - panic!("probe v6 panicked: {err:?}"); + panic!("probe panicked: {err:#}"); } - warn!("probe v6 failed: {err:?}"); + warn!("probe failed: {err:#}"); } Some(Ok(None)) => { - debug!("probe v6 canceled"); + debug!("probe canceled"); } Some(Ok(Some(Err(time::Elapsed { .. })))) => { - debug!("probe v6 timed out"); + debug!("probe timed out"); } None => {} } @@ -765,26 +762,24 @@ impl Client { #[cfg(not(wasm_browser))] async fn run_probe_v4( - ip_mapped_addrs: Option, relay: Arc, quic_client: QuicClient, dns_resolver: DnsResolver, ) -> n0_error::Result<(QadProbeReport, QadConn), QadProbeError> { - let relay_addr_orig = reportgen::get_relay_addr_ipv4(&dns_resolver, &relay) + let relay_addr = reportgen::get_relay_addr_ipv4(&dns_resolver, &relay) .await .map_err(|source| e!(QadProbeError::GetRelayAddr { source }))?; - let relay_addr = - reportgen::maybe_to_mapped_addr(ip_mapped_addrs.as_ref(), relay_addr_orig.into()); - debug!(?relay_addr_orig, ?relay_addr, "relay addr v4"); + trace!(?relay_addr, "resolved relay server address"); let host = relay .url .host_str() .ok_or_else(|| e!(QadProbeError::MissingHost))?; let conn = quic_client - .create_conn(relay_addr, host) + .create_conn(relay_addr.into(), host) .await .map_err(|source| e!(QadProbeError::Quic { source }))?; + let mut receiver = conn.observed_external_addr(); // wait for an addr @@ -811,7 +806,6 @@ async fn run_probe_v4( // that is ivp6 then the address is an [IPv4-Mapped IPv6 Addresses](https://doc.rust-lang.org/beta/std/net/struct.Ipv6Addr.html#ipv4-mapped-ipv6-addresses) let val = val.map(|val| SocketAddr::new(val.ip().to_canonical(), val.port())); let latency = conn.rtt(); - trace!(?val, ?relay_addr, ?latency, "got addr V4"); observer .set(val.map(|addr| QadProbeReport { relay: endpoint.clone(), @@ -839,26 +833,24 @@ async fn run_probe_v4( #[cfg(not(wasm_browser))] async fn run_probe_v6( - ip_mapped_addrs: Option, relay: Arc, quic_client: QuicClient, dns_resolver: DnsResolver, ) -> n0_error::Result<(QadProbeReport, QadConn), QadProbeError> { - let relay_addr_orig = reportgen::get_relay_addr_ipv6(&dns_resolver, &relay) + let relay_addr = reportgen::get_relay_addr_ipv6(&dns_resolver, &relay) .await .map_err(|source| e!(QadProbeError::GetRelayAddr { source }))?; - let relay_addr = - reportgen::maybe_to_mapped_addr(ip_mapped_addrs.as_ref(), relay_addr_orig.into()); - debug!(?relay_addr_orig, ?relay_addr, "relay addr v6"); + trace!(?relay_addr, "resolved relay server address"); let host = relay .url .host_str() .ok_or_else(|| e!(QadProbeError::MissingHost))?; let conn = quic_client - .create_conn(relay_addr, host) + .create_conn(relay_addr.into(), host) .await .map_err(|source| e!(QadProbeError::Quic { source }))?; + let mut receiver = conn.observed_external_addr(); // wait for an addr @@ -885,7 +877,6 @@ async fn run_probe_v6( // that is ivp6 then the address is an [IPv4-Mapped IPv6 Addresses](https://doc.rust-lang.org/beta/std/net/struct.Ipv6Addr.html#ipv4-mapped-ipv6-addresses) let val = val.map(|val| SocketAddr::new(val.ip().to_canonical(), val.port())); let latency = conn.rtt(); - trace!(?val, ?relay_addr, ?latency, "got addr V6"); observer .set(val.map(|addr| QadProbeReport { relay: endpoint.clone(), @@ -982,7 +973,6 @@ mod tests { .insecure_skip_relay_cert_verify(true); let mut client = Client::new( resolver.clone(), - None, relay_map.clone(), opts.clone(), Default::default(), @@ -1183,8 +1173,7 @@ mod tests { println!("test: {}", tt.name); let relay_map = RelayMap::empty(); let opts = Options::default(); - let mut client = - Client::new(resolver.clone(), None, relay_map, opts, Default::default()); + let mut client = Client::new(resolver.clone(), relay_map, opts, Default::default()); for s in &mut tt.steps { // trigger the timer tokio::time::advance(Duration::from_secs(s.after)).await; diff --git a/iroh/src/net_report/ip_mapped_addrs.rs b/iroh/src/net_report/ip_mapped_addrs.rs deleted file mode 100644 index 90f1efbc964..00000000000 --- a/iroh/src/net_report/ip_mapped_addrs.rs +++ /dev/null @@ -1,134 +0,0 @@ -use std::{ - collections::BTreeMap, - net::{IpAddr, Ipv6Addr, SocketAddr}, - sync::{ - Arc, - atomic::{AtomicU64, Ordering}, - }, -}; - -use n0_error::{e, stack_error}; - -/// Can occur when converting a [`SocketAddr`] to an [`IpMappedAddr`] -#[stack_error(derive, add_meta)] -#[error("Failed to convert")] -pub struct IpMappedAddrError; - -/// A map fake Ipv6 address with an actual IP address. -/// -/// It is essentially a lookup key for an IP that iroh's magicsocket knows about. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub(crate) struct IpMappedAddr(Ipv6Addr); - -/// Counter to always generate unique addresses for [`IpMappedAddr`]. -static IP_ADDR_COUNTER: AtomicU64 = AtomicU64::new(1); - -impl IpMappedAddr { - /// The Prefix/L of our Unique Local Addresses. - const ADDR_PREFIXL: u8 = 0xfd; - /// The Global ID used in our Unique Local Addresses. - const ADDR_GLOBAL_ID: [u8; 5] = [21, 7, 10, 81, 11]; - /// The Subnet ID used in our Unique Local Addresses. - const ADDR_SUBNET: [u8; 2] = [0, 1]; - - /// The dummy port used for all mapped addresses. - const MAPPED_ADDR_PORT: u16 = 12345; - - /// Generates a globally unique fake UDP address. - /// - /// This generates a new IPv6 address in the Unique Local Address range (RFC 4193) - /// which is recognised by iroh as an IP mapped address. - pub(super) fn generate() -> Self { - let mut addr = [0u8; 16]; - addr[0] = Self::ADDR_PREFIXL; - addr[1..6].copy_from_slice(&Self::ADDR_GLOBAL_ID); - addr[6..8].copy_from_slice(&Self::ADDR_SUBNET); - - let counter = IP_ADDR_COUNTER.fetch_add(1, Ordering::Relaxed); - addr[8..16].copy_from_slice(&counter.to_be_bytes()); - - Self(Ipv6Addr::from(addr)) - } - - /// Returns a consistent [`SocketAddr`] for the [`IpMappedAddr`]. - /// - /// This does not have a routable IP address. - /// - /// This uses a made-up, but fixed port number. The [IpMappedAddresses`] map this is - /// made for creates a unique [`IpMappedAddr`] for each IP+port and thus does not use - /// the port to map back to the original [`SocketAddr`]. - pub(crate) fn private_socket_addr(&self) -> SocketAddr { - SocketAddr::new(IpAddr::from(self.0), Self::MAPPED_ADDR_PORT) - } -} - -impl TryFrom for IpMappedAddr { - type Error = IpMappedAddrError; - - fn try_from(value: Ipv6Addr) -> std::result::Result { - let octets = value.octets(); - if octets[0] == Self::ADDR_PREFIXL - && octets[1..6] == Self::ADDR_GLOBAL_ID - && octets[6..8] == Self::ADDR_SUBNET - { - return Ok(Self(value)); - } - Err(e!(IpMappedAddrError)) - } -} - -impl std::fmt::Display for IpMappedAddr { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "IpMappedAddr({})", self.0) - } -} - -/// A Map of [`IpMappedAddresses`] to [`SocketAddr`]. -// TODO(ramfox): before this is ready to be used beyond QAD, we should add -// mechanisms for keeping track of "aliveness" and pruning address, as we do -// with the `EndpointMap` -#[derive(Debug, Clone, Default)] -pub(crate) struct IpMappedAddresses(Arc>); - -#[derive(Debug, Default)] -pub(super) struct Inner { - by_mapped_addr: BTreeMap, - /// Because [`std::net::SocketAddrV6`] contains extra fields besides the IP - /// address and port (ie, flow_info and scope_id), the a [`std::net::SocketAddrV6`] - /// with the same IP addr and port might Hash to something different. - /// So to get a hashable key for the map, we are using `(IpAddr, u6)`. - by_ip_port: BTreeMap<(IpAddr, u16), IpMappedAddr>, -} - -impl IpMappedAddresses { - /// Adds a [`SocketAddr`] to the map and returns the generated [`IpMappedAddr`]. - /// - /// If this [`SocketAddr`] already exists in the map, it returns its - /// associated [`IpMappedAddr`]. - /// - /// Otherwise a new [`IpMappedAddr`] is generated for it and returned. - pub(super) fn get_or_register(&self, socket_addr: SocketAddr) -> IpMappedAddr { - let ip_port = (socket_addr.ip(), socket_addr.port()); - let mut inner = self.0.lock().expect("poisoned"); - if let Some(mapped_addr) = inner.by_ip_port.get(&ip_port) { - return *mapped_addr; - } - let ip_mapped_addr = IpMappedAddr::generate(); - inner.by_mapped_addr.insert(ip_mapped_addr, socket_addr); - inner.by_ip_port.insert(ip_port, ip_mapped_addr); - ip_mapped_addr - } - - /// Returns the [`IpMappedAddr`] for the given [`SocketAddr`]. - pub(crate) fn get_mapped_addr(&self, socket_addr: &SocketAddr) -> Option { - let ip_port = (socket_addr.ip(), socket_addr.port()); - let inner = self.0.lock().expect("poisoned"); - inner.by_ip_port.get(&ip_port).copied() - } - - /// Returns the [`SocketAddr`] for the given [`IpMappedAddr`]. - pub(crate) fn get_ip_addr(&self, mapped_addr: &IpMappedAddr) -> Option { - let inner = self.0.lock().expect("poisoned"); - inner.by_mapped_addr.get(mapped_addr).copied() - } -} diff --git a/iroh/src/net_report/report.rs b/iroh/src/net_report/report.rs index dca3047a384..004a140dc66 100644 --- a/iroh/src/net_report/report.rs +++ b/iroh/src/net_report/report.rs @@ -7,7 +7,7 @@ use std::{ use iroh_base::RelayUrl; use serde::{Deserialize, Serialize}; -use tracing::warn; +use tracing::{trace, warn}; use super::{ProbeReport, probes::Probe}; @@ -82,7 +82,6 @@ impl Report { self.udp_v4 = true; - tracing::debug!(?self.global_v4, ?self.mapping_varies_by_dest_ipv4, %ipp,"got"); if let Some(global) = self.global_v4 { if global == ipp { if self.mapping_varies_by_dest_ipv4.is_none() { @@ -95,6 +94,7 @@ impl Report { } else { self.global_v4 = Some(ipp); } + trace!(?self.global_v4, ?self.mapping_varies_by_dest_ipv4, %ipp, "stored report"); } #[cfg(not(wasm_browser))] ProbeReport::QadIpv6(report) => { @@ -109,7 +109,6 @@ impl Report { }; self.udp_v6 = true; - tracing::debug!(?self.global_v6, ?self.mapping_varies_by_dest_ipv6, %ipp,"got"); if let Some(global) = self.global_v6 { if global == ipp { if self.mapping_varies_by_dest_ipv6.is_none() { @@ -122,6 +121,7 @@ impl Report { } else { self.global_v6 = Some(ipp); } + trace!(?self.global_v6, ?self.mapping_varies_by_dest_ipv6, %ipp, "stored report"); } } } diff --git a/iroh/src/net_report/reportgen.rs b/iroh/src/net_report/reportgen.rs index 8b9fe40642f..60f7436f1fe 100644 --- a/iroh/src/net_report/reportgen.rs +++ b/iroh/src/net_report/reportgen.rs @@ -47,6 +47,8 @@ use tokio_util::sync::CancellationToken; use tracing::{Instrument, debug, error, trace, warn, warn_span}; use url::Host; +#[cfg(not(wasm_browser))] +use super::defaults::timeouts::DNS_TIMEOUT; #[cfg(wasm_browser)] use super::portmapper; // We stub the library use super::{ @@ -54,8 +56,6 @@ use super::{ probes::{Probe, ProbePlan}, }; #[cfg(not(wasm_browser))] -use super::{defaults::timeouts::DNS_TIMEOUT, ip_mapped_addrs::IpMappedAddresses}; -#[cfg(not(wasm_browser))] use crate::discovery::dns::DNS_STAGGERING_MS; use crate::{ net_report::defaults::timeouts::{ @@ -110,8 +110,6 @@ pub(crate) struct SocketState { pub(crate) quic_client: Option, /// The DNS resolver to use for probes that need to resolve DNS records. pub(crate) dns_resolver: DnsResolver, - /// Optional [`IpMappedAddresses`] used to enable QAD in iroh - pub(crate) ip_mapped_addrs: Option, } impl Client { @@ -199,7 +197,7 @@ pub(super) enum ProbeFinished { impl Actor { async fn run(self) { match time::timeout(OVERALL_REPORT_TIMEOUT, self.run_inner()).await { - Ok(()) => debug!("reportgen actor finished"), + Ok(()) => trace!("reportgen actor finished"), Err(time::Elapsed { .. }) => { warn!("reportgen timed out"); } @@ -218,7 +216,7 @@ impl Actor { /// - Updates the report, cancels unneeded futures. /// - Sends the report to the net_report actor. async fn run_inner(self) { - debug!("reportstate actor starting"); + trace!("reportgen actor starting"); let mut probes = JoinSet::default(); @@ -349,7 +347,7 @@ impl Actor { if_state: IfStateDetails, probes: &mut JoinSet, ) -> CancellationToken { - debug!(?if_state, "local interface details"); + trace!(?if_state, "local interface details"); let plan = match self.last_report { Some(ref report) => { ProbePlan::with_last_report(&self.relay_map, report, &self.protocols) @@ -518,17 +516,6 @@ impl Probe { } } -#[cfg(not(wasm_browser))] -pub(super) fn maybe_to_mapped_addr( - ip_mapped_addrs: Option<&IpMappedAddresses>, - addr: SocketAddr, -) -> SocketAddr { - if let Some(ip_mapped_addrs) = ip_mapped_addrs { - return ip_mapped_addrs.get_or_register(addr).private_socket_addr(); - } - addr -} - #[cfg(not(wasm_browser))] #[stack_error(derive, add_meta)] #[non_exhaustive] @@ -654,8 +641,11 @@ fn get_quic_port(relay: &RelayConfig) -> Option { pub enum GetRelayAddrError { #[error("No valid hostname in the relay URL")] InvalidHostname, - #[error("No suitable relay address found")] - NoAddrFound, + #[error("No suitable relay address found for {url} ({addr_type})")] + NoAddrFound { + url: RelayUrl, + addr_type: &'static str, + }, #[error("DNS lookup failed")] DnsLookup { source: StaggeredError }, #[error("Relay is not suitable")] @@ -708,12 +698,20 @@ async fn relay_lookup_ipv4_staggered( IpAddr::V4(ip) => SocketAddrV4::new(ip, port), IpAddr::V6(_) => unreachable!("bad DNS lookup: {:?}", addr), }) - .ok_or_else(|| e!(GetRelayAddrError::NoAddrFound)), + .ok_or_else(|| { + e!(GetRelayAddrError::NoAddrFound { + url: relay.url.clone(), + addr_type: "A", + }) + }), Err(err) => Err(e!(GetRelayAddrError::DnsLookup, err)), } } Some(url::Host::Ipv4(addr)) => Ok(SocketAddrV4::new(addr, port)), - Some(url::Host::Ipv6(_addr)) => Err(e!(GetRelayAddrError::NoAddrFound)), + Some(url::Host::Ipv6(_addr)) => Err(e!(GetRelayAddrError::NoAddrFound { + url: relay.url.clone(), + addr_type: "A", + })), None => Err(e!(GetRelayAddrError::InvalidHostname)), } } @@ -740,11 +738,19 @@ async fn relay_lookup_ipv6_staggered( IpAddr::V4(_) => unreachable!("bad DNS lookup: {:?}", addr), IpAddr::V6(ip) => SocketAddrV6::new(ip, port, 0, 0), }) - .ok_or_else(|| e!(GetRelayAddrError::NoAddrFound)), + .ok_or_else(|| { + e!(GetRelayAddrError::NoAddrFound { + url: relay.url.clone(), + addr_type: "AAAA", + }) + }), Err(err) => Err(e!(GetRelayAddrError::DnsLookup, err)), } } - Some(url::Host::Ipv4(_addr)) => Err(e!(GetRelayAddrError::NoAddrFound)), + Some(url::Host::Ipv4(_addr)) => Err(e!(GetRelayAddrError::NoAddrFound { + url: relay.url.clone(), + addr_type: "AAAA", + })), Some(url::Host::Ipv6(addr)) => Ok(SocketAddrV6::new(addr, port, 0, 0)), None => Err(e!(GetRelayAddrError::InvalidHostname)), } @@ -889,7 +895,7 @@ mod tests { let quic_client = iroh_relay::quic::QuicClient::new(ep.clone(), client_config); let dns_resolver = DnsResolver::default(); - let (report, conn) = super::super::run_probe_v4(None, relay, quic_client, dns_resolver) + let (report, conn) = super::super::run_probe_v4(relay, quic_client, dns_resolver) .await .unwrap(); diff --git a/iroh/src/protocol.rs b/iroh/src/protocol.rs index fcca2939fd5..70f9fdc8afc 100644 --- a/iroh/src/protocol.rs +++ b/iroh/src/protocol.rs @@ -609,7 +609,13 @@ mod tests { use quinn::ApplicationClose; use super::*; - use crate::{RelayMode, endpoint::ConnectionError}; + use crate::{ + RelayMode, + endpoint::{ + BeforeConnectOutcome, ConnectError, ConnectWithOptsError, ConnectionError, + EndpointHooks, + }, + }; #[tokio::test] async fn test_shutdown() -> Result { @@ -649,7 +655,7 @@ mod tests { } #[tokio::test] - async fn test_limiter() -> Result { + async fn test_limiter_router() -> Result { // tracing_subscriber::fmt::try_init().ok(); let e1 = Endpoint::empty_builder(RelayMode::Disabled).bind().await?; // deny all access @@ -673,6 +679,52 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_limiter_hook() -> Result { + // tracing_subscriber::fmt::try_init().ok(); + #[derive(Debug, Default)] + struct LimitHook; + impl EndpointHooks for LimitHook { + async fn before_connect<'a>( + &'a self, + _remote_addr: &'a iroh_base::EndpointAddr, + alpn: &'a [u8], + ) -> BeforeConnectOutcome { + assert_eq!(alpn, ECHO_ALPN); + + // deny all access + BeforeConnectOutcome::Reject + } + } + + let e1 = Endpoint::empty_builder(RelayMode::Disabled).bind().await?; + + let r1 = Router::builder(e1.clone()).accept(ECHO_ALPN, Echo).spawn(); + + let addr1 = r1.endpoint().addr(); + dbg!(&addr1); + let e2 = Endpoint::empty_builder(RelayMode::Disabled) + .hooks(LimitHook) + .bind() + .await?; + + println!("connecting"); + let conn_err = e2.connect(addr1, ECHO_ALPN).await.unwrap_err(); + + assert!(matches!( + conn_err, + ConnectError::Connect { + source: ConnectWithOptsError::LocallyRejected { .. }, + .. + } + )); + + r1.shutdown().await.anyerr()?; + e2.close().await; + + Ok(()) + } + #[tokio::test] async fn test_graceful_shutdown() -> Result { #[derive(Debug, Clone, Default)]