From 484eb2a32d8156fc1f8d7b660116497a438a2dd4 Mon Sep 17 00:00:00 2001 From: jmwample Date: Thu, 13 Nov 2025 15:32:03 -0700 Subject: [PATCH 01/18] initial work to add v4/v6 controls to nameservers used for dns --- common/http-api-client/src/dns.rs | 212 +++++++++++++++++++++++++++++- 1 file changed, 206 insertions(+), 6 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 79f7997fbe1..2bb1cdc0e8e 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -115,7 +115,9 @@ pub struct HickoryDnsResolver { /// Overall timeout for dns lookup associated with any individual host resolution. For example, /// use of retries, server_ordering_strategy, etc. ends absolutely if this timeout is reached. overall_dns_timeout: Duration, -} + /// Policy for which Ip version should be used for name_servers. + ns_ip_ver_policy: NameServerIpVersionPolicy, +} impl Default for HickoryDnsResolver { fn default() -> Self { @@ -124,6 +126,7 @@ impl Default for HickoryDnsResolver { fallback: Default::default(), static_base: Default::default(), dont_use_shared: Default::default(), + ns_ip_ver_policy: Default::default(), overall_dns_timeout: Duration::from_secs(10), } } @@ -255,9 +258,9 @@ impl HickoryDnsResolver { // using a closure here is slightly gross, but this makes sure that if the // lazy-init returns an error it can be handled by the client if dont_use_shared { - new_resolver() + new_resolver(self.ns_ip_ver_policy) } else { - Ok(SHARED_RESOLVER.state.get_or_try_init(new_resolver)?.clone()) + Ok(SHARED_RESOLVER.state.get_or_try_init(|| new_resolver(self.ns_ip_ver_policy))?.clone()) } } @@ -316,6 +319,104 @@ impl HickoryDnsResolver { .expect("infallible assign"); self.static_base = Some(Arc::new(cell)); } + + /// Configure the resolver to use only Ipv4 nameservers and to resolve addresses to Ipv4 (A) + /// records only. + /// + /// NOTE: Calling this function will rebuild the inner resolver which means that the + /// cached dns lookups will not carry over and will go to network to resolve again. + pub fn set_ipv4_only(&mut self) { + self.set_nameserver_ip_version_strategy(NameServerIpVersionPolicy::Ipv4Only); + self.set_hostname_ip_version_lookup_strategy(HostnameIpLookupStrategy::A_only); + } + + /// Set the policy relating to nameserver IP version. + /// + /// NOTE: Calling this function will rebuild the inner resolver which means that the + /// cached dns lookups will not carry over and will go to network to resolve again. + pub fn set_nameserver_ip_version_strategy(&mut self, strategy: NameServerIpVersionPolicy) {} + + /// Set the policy for the record type queried when looking up hostnames + pub fn set_hostname_ip_version_lookup_strategy(&mut self, strategy: HostnameIpLookupStrategy) { + // self.state.get_mut() + } +} + +/// Policy options for nameserver IP versions to use when sending DNS queries. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub enum NameServerIpVersionPolicy { + /// Only send queries to Ipv4 nameservers + Ipv4Only, + /// Only send queries to Ipv6 nameserver + Ipv6Only, + /// Send queries to Ipv4 AND Ipv6 nameservers in parallel + #[default] + Ipv4AndIpv6, +} + +/// Policy options for query types sent when lookup up a hostname. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +#[allow(non_camel_case_types)] +pub enum HostnameIpLookupStrategy { + /// Only query for A (Ipv4) records + A_only, + /// Only query for AAAA (Ipv6) records + AAAA_only, + /// Query for A and AAAA in parallel + A_and_AAAA, + /// Query for Ipv6 if that fails, query for Ipv4 + AAAA_then_A, + #[default] + /// Query for Ipv4 if that fails, query for Ipv6 (default) + A_then_AAAA, +} + +impl From for HostnameIpLookupStrategy { + fn from(value: LookupIpStrategy) -> Self { + match value { + LookupIpStrategy::Ipv4AndIpv6 => HostnameIpLookupStrategy::A_and_AAAA, + LookupIpStrategy::Ipv4Only => HostnameIpLookupStrategy::A_only, + LookupIpStrategy::Ipv6Only => HostnameIpLookupStrategy::AAAA_only, + LookupIpStrategy::Ipv4thenIpv6 => HostnameIpLookupStrategy::A_then_AAAA, + LookupIpStrategy::Ipv6thenIpv4 => HostnameIpLookupStrategy::AAAA_then_A, + } + } +} + +impl From<&LookupIpStrategy> for HostnameIpLookupStrategy { + fn from(value: &LookupIpStrategy) -> Self { + match value { + LookupIpStrategy::Ipv4AndIpv6 => HostnameIpLookupStrategy::A_and_AAAA, + LookupIpStrategy::Ipv4Only => HostnameIpLookupStrategy::A_only, + LookupIpStrategy::Ipv6Only => HostnameIpLookupStrategy::AAAA_only, + LookupIpStrategy::Ipv4thenIpv6 => HostnameIpLookupStrategy::A_then_AAAA, + LookupIpStrategy::Ipv6thenIpv4 => HostnameIpLookupStrategy::AAAA_then_A, + } + } +} + +impl From for LookupIpStrategy { + fn from(value: HostnameIpLookupStrategy) -> LookupIpStrategy { + match value { + HostnameIpLookupStrategy::A_and_AAAA => LookupIpStrategy::Ipv4AndIpv6, + HostnameIpLookupStrategy::A_only => LookupIpStrategy::Ipv4Only, + HostnameIpLookupStrategy::AAAA_only => LookupIpStrategy::Ipv6Only, + HostnameIpLookupStrategy::A_then_AAAA => LookupIpStrategy::Ipv4thenIpv6, + HostnameIpLookupStrategy::AAAA_then_A => LookupIpStrategy::Ipv6thenIpv4, + } + } +} + +impl From<&HostnameIpLookupStrategy> for LookupIpStrategy { + fn from(value: &HostnameIpLookupStrategy) -> LookupIpStrategy { + match value { + HostnameIpLookupStrategy::A_and_AAAA => LookupIpStrategy::Ipv4AndIpv6, + HostnameIpLookupStrategy::A_only => LookupIpStrategy::Ipv4Only, + HostnameIpLookupStrategy::AAAA_only => LookupIpStrategy::Ipv6Only, + HostnameIpLookupStrategy::A_then_AAAA => LookupIpStrategy::Ipv4thenIpv6, + HostnameIpLookupStrategy::AAAA_then_A => LookupIpStrategy::Ipv6thenIpv4, + } + } } /// Create a new resolver with a custom DoT based configuration. The options are overridden to look @@ -326,15 +427,82 @@ impl HickoryDnsResolver { /// /// Caches successfully resolved addresses for 30 minutes to prevent continual use of remote lookup. /// This resolver is intended to be used for OUR API endpoints that do not rapidly rotate IPs. -fn new_resolver() -> Result { +fn new_resolver( + ns_ip_version_policy: NameServerIpVersionPolicy, +) -> Result { info!("building new configured resolver"); + let name_servers = match ns_ip_version_policy { + NameServerIpVersionPolicy::Ipv4AndIpv6 => default_nameserver_group(), + NameServerIpVersionPolicy::Ipv4Only => default_nameserver_group_ipv4_only(), + NameServerIpVersionPolicy::Ipv6Only => default_nameserver_group_ipv6_only(), + }; + + configure_and_build_resolver(name_servers) +} + +fn default_nameserver_group() -> NameServerConfigGroup { let mut name_servers = NameServerConfigGroup::quad9_tls(); name_servers.merge(NameServerConfigGroup::quad9_https()); name_servers.merge(NameServerConfigGroup::cloudflare_tls()); name_servers.merge(NameServerConfigGroup::cloudflare_https()); + name_servers +} - configure_and_build_resolver(name_servers) +fn default_nameserver_group_ipv4_only() -> NameServerConfigGroup { + let mut name_servers = NameServerConfigGroup::from_ips_https( + &quad9_ips_v4(), + 443, + "dns.quad9.net".to_string(), + true, + ); + name_servers.merge(NameServerConfigGroup::from_ips_tls( + &quad9_ips_v4(), + 853, + "dns.quad9.net".to_string(), + true, + )); + name_servers.merge(NameServerConfigGroup::from_ips_https( + &cloudflare_ips_v4(), + 443, + "cloudflare-dns.com".to_string(), + true, + )); + name_servers.merge(NameServerConfigGroup::from_ips_tls( + &cloudflare_ips_v4(), + 853, + "cloudflare-dns.com".to_string(), + true, + )); + name_servers +} + +fn default_nameserver_group_ipv6_only() -> NameServerConfigGroup { + let mut name_servers = NameServerConfigGroup::from_ips_https( + &quad9_ips_v6(), + 443, + "dns.quad9.net".to_string(), + true, + ); + name_servers.merge(NameServerConfigGroup::from_ips_tls( + &quad9_ips_v6(), + 853, + "dns.quad9.net".to_string(), + true, + )); + name_servers.merge(NameServerConfigGroup::from_ips_https( + &cloudflare_ips_v6(), + 443, + "cloudflare-dns.com".to_string(), + true, + )); + name_servers.merge(NameServerConfigGroup::from_ips_tls( + &cloudflare_ips_v6(), + 853, + "cloudflare-dns.com".to_string(), + true, + )); + name_servers } fn configure_and_build_resolver( @@ -344,13 +512,45 @@ fn configure_and_build_resolver( let mut resolver_builder = TokioResolver::builder_with_config(config, TokioConnectionProvider::default()); - resolver_builder.options_mut().ip_strategy = LookupIpStrategy::Ipv4AndIpv6; + resolver_builder.options_mut().ip_strategy = LookupIpStrategy::Ipv4thenIpv6; // Cache successful responses for queries received by this resolver for 30 min minimum. resolver_builder.options_mut().positive_min_ttl = Some(Duration::from_secs(1800)); Ok(resolver_builder.build()) } +fn cloudflare_ips_v4() -> Vec { + hickory_resolver::config::CLOUDFLARE_IPS + .iter() + .filter(|ip| ip.is_ipv4()) + .cloned() + .collect() +} + +fn cloudflare_ips_v6() -> Vec { + hickory_resolver::config::CLOUDFLARE_IPS + .iter() + .filter(|ip| ip.is_ipv6()) + .cloned() + .collect() +} + +fn quad9_ips_v4() -> Vec { + hickory_resolver::config::CLOUDFLARE_IPS + .iter() + .filter(|ip| ip.is_ipv4()) + .cloned() + .collect() +} + +fn quad9_ips_v6() -> Vec { + hickory_resolver::config::CLOUDFLARE_IPS + .iter() + .filter(|ip| ip.is_ipv4()) + .cloned() + .collect() +} + /// Create a new resolver with the default configuration, which reads from the system DNS config /// (i.e. `/etc/resolve.conf` in unix). The options are overridden to look up for both IPv4 and IPv6 /// addresses to work with "happy eyeballs" algorithm. From 80c3b28d8f2285c44244af9aa62267678f12260b Mon Sep 17 00:00:00 2001 From: jmwample Date: Tue, 18 Nov 2025 10:08:19 -0700 Subject: [PATCH 02/18] v4/v6 for independent resolers --- common/http-api-client/src/dns.rs | 227 +++++++++++++++++++++++------- 1 file changed, 180 insertions(+), 47 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 2bb1cdc0e8e..10ac0a36c2a 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -3,34 +3,25 @@ //! DNS resolver configuration for internal lookups. //! -//! The resolver itself is the set combination of the google, cloudflare, and quad9 endpoints -//! supporting DoH and DoT. +//! The resolver itself is the set combination of the cloudflare, and quad9 endpoints supporting DoH +//! and DoT. //! -//! This resolver supports a fallback mechanism where, should the DNS-over-TLS resolution fail, a -//! followup resolution will be done using the hosts configured default (e.g. `/etc/resolve.conf` on -//! linux). This is disabled by default and can be enabled using [`enable_system_fallback`]. +//! This resolver supports an optional fallback mechanism where, should the DNS-over-TLS resolution +//! fail, a followup resolution will be done using the hosts configured default (e.g. +//! `/etc/resolve.conf` on linux). This is disabled by default and can be enabled using +//! [`enable_system_fallback`]. //! -//! Requires the `dns-over-https-rustls`, `webpki-roots` feature for the -//! `hickory-resolver` crate +//! There is also a second optional fallback mechanism that allows a static map to be used as a last +//! resort. This can help when DNS encounters errors due to blocked resolvers or unknown conditions. //! -//! -//! Note: The hickory DoH resolver can cause warning logs about H2 connection failure. This -//! indicates that the long lived https connection was closed by the remote peer and the resolver -//! will have to reconnect. It should not impact actual functionality. -//! -//! code ref: https://github.com/hickory-dns/hickory-dns/blob/06a8b1ce9bd9322d8e6accf857d30257e1274427/crates/proto/src/h2/h2_client_stream.rs#L534 -//! -//! example log: -//! -//! ```txt -//! WARN /home/ubuntu/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/hickory-proto-0.24.3/src/h2/h2_client_stream.rs:493: h2 connection failed: unexpected end of file -//! ``` +//! Requires the `dns-over-https-rustls`, `webpki-roots` feature for the `hickory-resolver` crate #![deny(missing_docs)] use crate::ClientBuilder; use std::{ collections::HashMap, + fmt::Display, net::{IpAddr, SocketAddr}, str::FromStr, sync::{Arc, LazyLock}, @@ -39,7 +30,7 @@ use std::{ use hickory_resolver::{ TokioResolver, - config::{LookupIpStrategy, NameServerConfigGroup, ResolverConfig}, + config::{LookupIpStrategy, NameServerConfigGroup, ResolverConfig, ResolverOpts}, lookup_ip::LookupIpIntoIter, name_server::TokioConnectionProvider, }; @@ -51,6 +42,8 @@ mod constants; mod static_resolver; pub use static_resolver::*; +const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_secs(1800); + impl ClientBuilder { /// Override the DNS resolver implementation used by the underlying http client. pub fn dns_resolver(mut self, resolver: Arc) -> Self { @@ -116,8 +109,10 @@ pub struct HickoryDnsResolver { /// use of retries, server_ordering_strategy, etc. ends absolutely if this timeout is reached. overall_dns_timeout: Duration, /// Policy for which Ip version should be used for name_servers. - ns_ip_ver_policy: NameServerIpVersionPolicy, -} + ns_ip_ver_policy: NameServerIpVersionPolicy, + /// Current options used by the resolver and used for rebuilding on preference change. + current_options: Option, +} impl Default for HickoryDnsResolver { fn default() -> Self { @@ -127,6 +122,7 @@ impl Default for HickoryDnsResolver { static_base: Default::default(), dont_use_shared: Default::default(), ns_ip_ver_policy: Default::default(), + current_options: Default::default(), overall_dns_timeout: Duration::from_secs(10), } } @@ -138,7 +134,9 @@ impl Resolve for HickoryDnsResolver { let maybe_fallback = self.fallback.clone(); let maybe_static = self.static_base.clone(); let independent = self.dont_use_shared; + let ns_strategy = self.ns_ip_ver_policy; let overall_dns_timeout = self.overall_dns_timeout; + let options = self.current_options.clone(); Box::pin(async move { resolve( name, @@ -146,6 +144,8 @@ impl Resolve for HickoryDnsResolver { maybe_fallback, maybe_static, independent, + ns_strategy, + options, overall_dns_timeout, ) .await @@ -154,15 +154,19 @@ impl Resolve for HickoryDnsResolver { } } +#[allow(clippy::too_many_arguments)] async fn resolve( name: Name, resolver: Arc>, maybe_fallback: Option>>, maybe_static: Option>>, independent: bool, + ns_strategy: NameServerIpVersionPolicy, + options: Option, overall_dns_timeout: Duration, ) -> Result { - let resolver = resolver.get_or_try_init(|| HickoryDnsResolver::new_resolver(independent))?; + let resolver = resolver + .get_or_try_init(|| HickoryDnsResolver::new_resolver(independent, ns_strategy, options))?; // Attempt a lookup using the primary resolver let resolve_fut = tokio::time::timeout(overall_dns_timeout, resolver.lookup_ip(name.as_str())); @@ -240,6 +244,8 @@ impl HickoryDnsResolver { self.fallback.clone(), self.static_base.clone(), self.dont_use_shared, + self.ns_ip_ver_policy, + self.current_options.clone(), self.overall_dns_timeout, ) .await @@ -254,13 +260,20 @@ impl HickoryDnsResolver { } } - fn new_resolver(dont_use_shared: bool) -> Result { + fn new_resolver( + dont_use_shared: bool, + ns_strategy: NameServerIpVersionPolicy, + options: Option, + ) -> Result { // using a closure here is slightly gross, but this makes sure that if the // lazy-init returns an error it can be handled by the client if dont_use_shared { - new_resolver(self.ns_ip_ver_policy) + new_resolver(ns_strategy, options) } else { - Ok(SHARED_RESOLVER.state.get_or_try_init(|| new_resolver(self.ns_ip_ver_policy))?.clone()) + Ok(SHARED_RESOLVER + .state + .get_or_try_init(|| new_resolver(ns_strategy, options))? + .clone()) } } @@ -326,19 +339,64 @@ impl HickoryDnsResolver { /// NOTE: Calling this function will rebuild the inner resolver which means that the /// cached dns lookups will not carry over and will go to network to resolve again. pub fn set_ipv4_only(&mut self) { - self.set_nameserver_ip_version_strategy(NameServerIpVersionPolicy::Ipv4Only); self.set_hostname_ip_version_lookup_strategy(HostnameIpLookupStrategy::A_only); + self.set_nameserver_ip_version_strategy(NameServerIpVersionPolicy::Ipv4Only); } /// Set the policy relating to nameserver IP version. /// /// NOTE: Calling this function will rebuild the inner resolver which means that the /// cached dns lookups will not carry over and will go to network to resolve again. - pub fn set_nameserver_ip_version_strategy(&mut self, strategy: NameServerIpVersionPolicy) {} + pub fn set_nameserver_ip_version_strategy(&mut self, strategy: NameServerIpVersionPolicy) { + if strategy == self.ns_ip_ver_policy { + // correct strategy is already set. avoid rebuilding and clearing cache. + return; + } + self.ns_ip_ver_policy = strategy; + + self.force_primary_rebuild(); + } + + /// Get the current policy in use by this resolver for nameserver IP version. + pub fn get_nameserver_ip_version_strategy(&self) -> NameServerIpVersionPolicy { + self.ns_ip_ver_policy + } /// Set the policy for the record type queried when looking up hostnames + /// + /// NOTE: Calling this function will rebuild the inner resolver which means that the + /// cached dns lookups will not carry over and will go to network to resolve again. pub fn set_hostname_ip_version_lookup_strategy(&mut self, strategy: HostnameIpLookupStrategy) { - // self.state.get_mut() + if let Some(opts) = &self.current_options + && opts.ip_strategy == strategy.into() + { + // correct strategy is already set. avoid rebuilding and clearing cache. + return; + } + + let mut options = self + .current_options + .clone() + .unwrap_or(Self::default_options()); + options.ip_strategy = strategy.into(); + + self.current_options = Some(options); + self.force_primary_rebuild(); + } + + fn default_options() -> ResolverOpts { + let mut opts = ResolverOpts::default(); + // Always cache successful responses for queries received by this resolver for 30 min minimum. + opts.positive_min_ttl = Some(DEFAULT_POSITIVE_LOOKUP_CACHE_TTL); + + opts + } + + fn force_primary_rebuild(&mut self) { + // if !self.dont_use_shared { + // SHARED_RESOLVER = Default::default(); + // } + self.state = Arc::new(OnceCell::new()); } } @@ -354,6 +412,16 @@ pub enum NameServerIpVersionPolicy { Ipv4AndIpv6, } +impl Display for NameServerIpVersionPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Ipv4AndIpv6 => write!(f, "ipv4 & ipv6"), + Self::Ipv6Only => write!(f, "ipv6 only"), + Self::Ipv4Only => write!(f, "ipv4 only"), + } + } +} + /// Policy options for query types sent when lookup up a hostname. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] #[allow(non_camel_case_types)] @@ -429,8 +497,9 @@ impl From<&HostnameIpLookupStrategy> for LookupIpStrategy { /// This resolver is intended to be used for OUR API endpoints that do not rapidly rotate IPs. fn new_resolver( ns_ip_version_policy: NameServerIpVersionPolicy, + options: Option, ) -> Result { - info!("building new configured resolver"); + info!("building new configured {ns_ip_version_policy} resolver"); let name_servers = match ns_ip_version_policy { NameServerIpVersionPolicy::Ipv4AndIpv6 => default_nameserver_group(), @@ -438,7 +507,7 @@ fn new_resolver( NameServerIpVersionPolicy::Ipv6Only => default_nameserver_group_ipv6_only(), }; - configure_and_build_resolver(name_servers) + configure_and_build_resolver(name_servers, options) } fn default_nameserver_group() -> NameServerConfigGroup { @@ -449,6 +518,20 @@ fn default_nameserver_group() -> NameServerConfigGroup { name_servers } +fn configure_and_build_resolver( + name_servers: NameServerConfigGroup, + options: Option, +) -> Result { + let config = ResolverConfig::from_parts(None, Vec::new(), name_servers); + let mut resolver_builder = + TokioResolver::builder_with_config(config, TokioConnectionProvider::default()); + + let options = options.unwrap_or(HickoryDnsResolver::default_options()); + resolver_builder = resolver_builder.with_options(options); + + Ok(resolver_builder.build()) +} + fn default_nameserver_group_ipv4_only() -> NameServerConfigGroup { let mut name_servers = NameServerConfigGroup::from_ips_https( &quad9_ips_v4(), @@ -505,20 +588,6 @@ fn default_nameserver_group_ipv6_only() -> NameServerConfigGroup { name_servers } -fn configure_and_build_resolver( - name_servers: NameServerConfigGroup, -) -> Result { - let config = ResolverConfig::from_parts(None, Vec::new(), name_servers); - let mut resolver_builder = - TokioResolver::builder_with_config(config, TokioConnectionProvider::default()); - - resolver_builder.options_mut().ip_strategy = LookupIpStrategy::Ipv4thenIpv6; - // Cache successful responses for queries received by this resolver for 30 min minimum. - resolver_builder.options_mut().positive_min_ttl = Some(Duration::from_secs(1800)); - - Ok(resolver_builder.build()) -} - fn cloudflare_ips_v4() -> Vec { hickory_resolver::config::CLOUDFLARE_IPS .iter() @@ -546,7 +615,7 @@ fn quad9_ips_v4() -> Vec { fn quad9_ips_v6() -> Vec { hickory_resolver::config::CLOUDFLARE_IPS .iter() - .filter(|ip| ip.is_ipv4()) + .filter(|ip| ip.is_ipv6()) .cloned() .collect() } @@ -628,6 +697,70 @@ mod test { assert!(addrs.contains(&example_ip6)); Ok(()) } + + #[test] + fn address_filter_check() { + // Make sure the nameserver group for Ipv4 is really IPv4 only + let ns_group = default_nameserver_group_ipv4_only(); + let addrs: Vec = ns_group + .into_inner() + .iter() + .map(|cfg| cfg.socket_addr.ip()) + .collect(); + assert!(addrs.iter().all(|addr| addr.is_ipv4())); + + // Make sure the nameserver group for Ipv6 is really IPv6 only + let ns_group = default_nameserver_group_ipv6_only(); + let addrs: Vec = ns_group + .into_inner() + .iter() + .map(|cfg| cfg.socket_addr.ip()) + .collect(); + assert!(addrs.iter().all(|addr| addr.is_ipv6())); + } + + #[tokio::test] + async fn setting_ns_ip_version_works() { + // Make sure that setting IPv4 only on the shared resolver changes the resolver + // set to only use IPv4 nameservers and only do lookups for A records. + let mut resolver = HickoryDnsResolver { + dont_use_shared: true, + ..Default::default() + }; + + let _ = resolver.resolve_str("example.com").await; + + resolver.set_ipv4_only(); + + // setting opts resets resolver initialization + assert!(resolver.state.get().is_none()); + + let _ = resolver.resolve_str("example.com").await; + + // after rebuilding with new options it should have Ipv4 / A only + let lookup_strategy = resolver.state.get().unwrap().options().ip_strategy; + assert_eq!(lookup_strategy, HostnameIpLookupStrategy::A_only.into()); + + let nameservers = resolver.state.get().unwrap().config().name_servers(); + assert!(nameservers.iter().all(|cfg| cfg.socket_addr.is_ipv4())); + + resolver.set_nameserver_ip_version_strategy(NameServerIpVersionPolicy::Ipv6Only); + + // setting opts resets resolver initialization + assert!(resolver.state.get().is_none()); + + let _ = resolver.resolve_str("example.com").await; + + // Make sure that setting the shared resolver to use only Ipv6 nameservers changes + // the set of nameservers to only IPv6. + let nameservers = resolver.state.get().unwrap().config().name_servers(); + assert!(nameservers.iter().all(|cfg| cfg.socket_addr.is_ipv6())); + } + + // #[tokio::test] + // async fn setting_ns_ip_version_for_shared_resolver() { + // let mut resolver = HickoryDnsResolver::default(); + // } } #[cfg(test)] @@ -664,7 +797,7 @@ mod failure_test { ); broken_ns_group.merge(broken_ns_https); - configure_and_build_resolver(broken_ns_group) + configure_and_build_resolver(broken_ns_group, None) } #[tokio::test] From 8879007c65d390cc1cf3557cee3b71f920bf6ee1 Mon Sep 17 00:00:00 2001 From: jmwample Date: Wed, 19 Nov 2025 12:47:57 -0700 Subject: [PATCH 03/18] v4/v6 for shared resolver and pass on docs --- common/http-api-client/src/dns.rs | 130 ++++++++++++++++++++++++------ common/http-api-client/src/lib.rs | 2 +- 2 files changed, 105 insertions(+), 27 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 10ac0a36c2a..b97872116ab 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -6,13 +6,51 @@ //! The resolver itself is the set combination of the cloudflare, and quad9 endpoints supporting DoH //! and DoT. //! -//! This resolver supports an optional fallback mechanism where, should the DNS-over-TLS resolution -//! fail, a followup resolution will be done using the hosts configured default (e.g. -//! `/etc/resolve.conf` on linux). This is disabled by default and can be enabled using -//! [`enable_system_fallback`]. +//! ```rust +//! use nym_http_api_client::HickoryDnsResolver; +//! # use nym_http_api_client::ResolveError; +//! # type Err = ResolveError; +//! # async fn run() -> Result<(), Err> { +//! let resolver = HickoryDnsResolver::default(); +//! resolver.resolve_str("example.com").await?; +//! # Ok(()) +//! # } +//! ``` //! -//! There is also a second optional fallback mechanism that allows a static map to be used as a last -//! resort. This can help when DNS encounters errors due to blocked resolvers or unknown conditions. +//! ## Fallbacks +//! +//! **System Resolver --** This resolver supports an optional fallback mechanism where, should the +//! DNS-over-TLS resolution fail, a followup resolution will be done using the hosts configured +//! default (e.g. `/etc/resolve.conf` on linux). +//! +//! This is disabled by default and can be enabled using `enable_system_fallback`. +//! +//! **Static Table --** There is also a second optional fallback mechanism that allows a static map to +//! be used as a last resort. This can help when DNS encounters errors due to blocked resolvers or +//! unknown conditions. This is enabled by default, and can be customized if building a new resolver. +//! +//! ## IPv4 / IPv6 +//! +//! The resolver can be modified to control the behavior with respect to IPv4 and IPv6. If using the +//! shared resolver setting these options will apply to future lookups done using the shared +//! resolver as well. +//! +//! Be default the resolver uses both IPv4 and IPv6 nameservers, and is configured to do `A` lookups +//! first, and only do `AAAA` if no `A` record is available. +//! +//! ```rust +//! # use nym_http_api_client::{HickoryDnsResolver, dns::NameServerIpVersionPolicy}; +//! let mut resolver = HickoryDnsResolver::default(); +//! +//! // Set the resolver to only use IPv4 nameservers and +//! // only do lookups for A records. +//! resolver.set_ipv4_only(); +//! +//! // Set the resolver to use only IPv6 nameservers +//! resolver.set_nameserver_ip_version_strategy(NameServerIpVersionPolicy::Ipv6Only); +//! ``` +//! +//! --- //! //! Requires the `dns-over-https-rustls`, `webpki-roots` feature for the `hickory-resolver` crate #![deny(missing_docs)] @@ -24,7 +62,7 @@ use std::{ fmt::Display, net::{IpAddr, SocketAddr}, str::FromStr, - sync::{Arc, LazyLock}, + sync::{Arc, LazyLock, RwLock}, time::Duration, }; @@ -40,7 +78,7 @@ use tracing::*; mod constants; mod static_resolver; -pub use static_resolver::*; +pub(crate) use static_resolver::*; const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_secs(1800); @@ -59,12 +97,17 @@ impl ClientBuilder { } } -// n.b. static items do not call [`Drop`] on program termination, so this won't be deallocated. -// this is fine, as the OS can deallocate the terminated program faster than we can free memory -// but tools like valgrind might report "memory leaks" as it isn't obvious this is intentional. -static SHARED_RESOLVER: LazyLock = LazyLock::new(|| { - tracing::debug!("Initializing shared DNS resolver"); - HickoryDnsResolver::default() +// n.b. static items do not call [`Drop`] on program termination, so this won't be deallocated. this +// is fine, as the OS can deallocate the terminated program faster than we can free memory but tools +// like valgrind might report "memory leaks" as it isn't obvious this is intentional. Using RwLock +// for interior mutability to allow safe modification of the shared resolver -- i.e. rebuilding on +// change to desired IPv4/IPv6 config. +static SHARED_RESOLVER: LazyLock>> = LazyLock::new(|| { + tracing::debug!("Initializing shared DNS resolver with interior mutability"); + Arc::new(RwLock::new(HickoryDnsResolver { + dont_use_shared: true, // prevent infinite recursion + ..Default::default() + })) }); #[derive(Debug, thiserror::Error)] @@ -271,6 +314,8 @@ impl HickoryDnsResolver { new_resolver(ns_strategy, options) } else { Ok(SHARED_RESOLVER + .read() + .unwrap() .state .get_or_try_init(|| new_resolver(ns_strategy, options))? .clone()) @@ -280,10 +325,12 @@ impl HickoryDnsResolver { fn new_resolver_system(dont_use_shared: bool) -> Result { // using a closure here is slightly gross, but this makes sure that if the // lazy-init returns an error it can be handled by the client - if dont_use_shared || SHARED_RESOLVER.fallback.is_none() { + if dont_use_shared || SHARED_RESOLVER.read().unwrap().fallback.is_none() { new_resolver_system() } else { Ok(SHARED_RESOLVER + .read() + .unwrap() .fallback .as_ref() .unwrap() @@ -293,7 +340,9 @@ impl HickoryDnsResolver { } fn new_static_fallback(dont_use_shared: bool) -> StaticResolver { - if !dont_use_shared && let Some(ref shared_resolver) = SHARED_RESOLVER.static_base { + if !dont_use_shared + && let Some(ref shared_resolver) = SHARED_RESOLVER.read().unwrap().static_base + { shared_resolver .get_or_init(new_default_static_fallback) .clone() @@ -393,9 +442,13 @@ impl HickoryDnsResolver { } fn force_primary_rebuild(&mut self) { - // if !self.dont_use_shared { - // SHARED_RESOLVER = Default::default(); - // } + if !self.dont_use_shared { + let mut resolver = SHARED_RESOLVER.write().unwrap(); + *resolver = HickoryDnsResolver { + dont_use_shared: true, + ..Default::default() + } + } self.state = Arc::new(OnceCell::new()); } } @@ -721,15 +774,25 @@ mod test { #[tokio::test] async fn setting_ns_ip_version_works() { - // Make sure that setting IPv4 only on the shared resolver changes the resolver - // set to only use IPv4 nameservers and only do lookups for A records. - let mut resolver = HickoryDnsResolver { + let resolver = HickoryDnsResolver { dont_use_shared: true, ..Default::default() }; + resolver_ns_ip_version_test(resolver).await + } + + #[tokio::test] + async fn setting_ns_ip_version_for_shared_resolver() { + let resolver = HickoryDnsResolver::default(); + resolver_ns_ip_version_test(resolver).await + } + + async fn resolver_ns_ip_version_test(mut resolver: HickoryDnsResolver) { let _ = resolver.resolve_str("example.com").await; + // Make sure that setting IPv4Only changes the resolver set to only use IPv4 nameservers and + // only do lookups for A records. resolver.set_ipv4_only(); // setting opts resets resolver initialization @@ -751,15 +814,30 @@ mod test { let _ = resolver.resolve_str("example.com").await; - // Make sure that setting the shared resolver to use only Ipv6 nameservers changes - // the set of nameservers to only IPv6. + // Make sure that setting the resolver to use only Ipv6 nameservers changes the set of + // nameservers to only IPv6. let nameservers = resolver.state.get().unwrap().config().name_servers(); assert!(nameservers.iter().all(|cfg| cfg.socket_addr.is_ipv6())); } + // /// Triple check that ip_strategy in ResolverOpts does NOT impact the IP version of the + // /// selected nameservers. + // /// + // /// => Looking at logs, yes it still uses IPv6 nameservers. // #[tokio::test] - // async fn setting_ns_ip_version_for_shared_resolver() { - // let mut resolver = HickoryDnsResolver::default(); + // async fn resolver_ipv6_triple_check() { + // // tracing_subscriber::fmt() + // // .with_max_level(tracing::Level::DEBUG) + // // .init(); + + // let ns_group = default_nameserver_group(); + // let mut options = ResolverOpts::default(); + // options.ip_strategy = LookupIpStrategy::Ipv4Only; + // options.num_concurrent_reqs= 4; + + // let resolver = configure_and_build_resolver(ns_group, Some(options)).unwrap(); + + // let _ = resolver.lookup_ip("example.com").await.unwrap(); // } } diff --git a/common/http-api-client/src/lib.rs b/common/http-api-client/src/lib.rs index a74a141fe6f..24abaa10ab7 100644 --- a/common/http-api-client/src/lib.rs +++ b/common/http-api-client/src/lib.rs @@ -175,7 +175,7 @@ mod user_agent; pub use user_agent::UserAgent; #[cfg(not(target_arch = "wasm32"))] -mod dns; +pub mod dns; mod path; #[cfg(not(target_arch = "wasm32"))] From 7bb3e9f1b265786194a6c8aeb9a4695798de4254 Mon Sep 17 00:00:00 2001 From: jmwample Date: Fri, 21 Nov 2025 10:31:57 -0700 Subject: [PATCH 04/18] logging improvement --- common/http-api-client/src/dns.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index b97872116ab..355d8685c38 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -208,27 +208,26 @@ async fn resolve( options: Option, overall_dns_timeout: Duration, ) -> Result { + let name_str = name.as_str().to_string(); let resolver = resolver .get_or_try_init(|| HickoryDnsResolver::new_resolver(independent, ns_strategy, options))?; + let _span = debug_span!("dns", "q" = name_str); + let _guard = _span.enter(); // Attempt a lookup using the primary resolver - let resolve_fut = tokio::time::timeout(overall_dns_timeout, resolver.lookup_ip(name.as_str())); + let resolve_fut = tokio::time::timeout(overall_dns_timeout, resolver.lookup_ip(&name_str)); let primary_err = match resolve_fut.await { Err(_) => ResolveError::Timeout, Ok(Ok(lookup)) => { + debug!("internal primary resolver found {name_str} -> {lookup:?}",); let addrs: Addrs = Box::new(SocketAddrs { iter: lookup.into_iter(), }); return Ok(addrs); } - Ok(Err(e)) => { - // on failure use the fall back system configured DNS resolver - if !e.is_no_records_found() { - warn!("primary DNS failed w/ error: {e}"); - } - e.into() - } + Ok(Err(e)) => e.into(), }; + warn!("primary DNS failed w/ error: {primary_err}"); // If the primary resolver encountered an error, attempt a lookup using the fallback // resolver if one is configured. @@ -236,9 +235,9 @@ async fn resolve( let resolver = fallback.get_or_try_init(|| HickoryDnsResolver::new_resolver_system(independent))?; - let resolve_fut = - tokio::time::timeout(overall_dns_timeout, resolver.lookup_ip(name.as_str())); + let resolve_fut = tokio::time::timeout(overall_dns_timeout, resolver.lookup_ip(&name_str)); if let Ok(Ok(lookup)) = resolve_fut.await { + debug!("internal fallback resolver found {name_str} -> {lookup:?}",); let addrs: Addrs = Box::new(SocketAddrs { iter: lookup.into_iter(), }); @@ -252,9 +251,10 @@ async fn resolve( debug!("checking static"); let resolver = static_resolver.get_or_init(|| HickoryDnsResolver::new_static_fallback(independent)); - if let Ok(addrs) = resolver.resolve(name).await { - return Ok(addrs); + let _addrs: Vec = addrs.into_iter().collect(); + debug!("internal static table found {} -> {_addrs:?}", name_str); + return Ok(Box::new(_addrs.into_iter())); } } @@ -818,6 +818,10 @@ mod test { // nameservers to only IPv6. let nameservers = resolver.state.get().unwrap().config().name_servers(); assert!(nameservers.iter().all(|cfg| cfg.socket_addr.is_ipv6())); + + // reset to default + resolver.set_hostname_ip_version_lookup_strategy(HostnameIpLookupStrategy::A_then_AAAA); + resolver.set_nameserver_ip_version_strategy(NameServerIpVersionPolicy::Ipv4AndIpv6); } // /// Triple check that ip_strategy in ResolverOpts does NOT impact the IP version of the @@ -880,6 +884,9 @@ mod failure_test { #[tokio::test] async fn dns_lookup_failures() -> Result<(), ResolveError> { + tracing_subscriber::fmt() + .with_max_level(tracing::Level::DEBUG) + .init(); let time_start = std::time::Instant::now(); let r = OnceCell::new(); From 23effac3d8254dc5eb1ac4cf2b2bd7f3dc25f9b0 Mon Sep 17 00:00:00 2001 From: jmwample Date: Fri, 21 Nov 2025 11:04:21 -0700 Subject: [PATCH 05/18] ignore test that could cause issues if interleaved --- common/http-api-client/src/dns.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 355d8685c38..26db4c334e5 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -782,6 +782,7 @@ mod test { resolver_ns_ip_version_test(resolver).await } + #[ignore] // This messes with the settings for the shared resolver which can interleave in negative ways with other tests. #[tokio::test] async fn setting_ns_ip_version_for_shared_resolver() { let resolver = HickoryDnsResolver::default(); From 3235313060968a267c6d5bf84ad10a31b844e3e9 Mon Sep 17 00:00:00 2001 From: jmwample Date: Fri, 21 Nov 2025 11:06:08 -0700 Subject: [PATCH 06/18] fmt --- common/http-api-client/src/dns.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 26db4c334e5..fff433dcc2d 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -782,7 +782,8 @@ mod test { resolver_ns_ip_version_test(resolver).await } - #[ignore] // This messes with the settings for the shared resolver which can interleave in negative ways with other tests. + #[ignore] + // This messes with the settings for the shared resolver which can interleave in negative ways with other tests. #[tokio::test] async fn setting_ns_ip_version_for_shared_resolver() { let resolver = HickoryDnsResolver::default(); From 5cda17fc949f0d40162e95b415204efef14a1f41 Mon Sep 17 00:00:00 2001 From: jmwample Date: Fri, 21 Nov 2025 11:43:57 -0700 Subject: [PATCH 07/18] try default to ipv4 only nameservers --- common/http-api-client/src/dns.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index fff433dcc2d..f9194fa17dc 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -457,11 +457,11 @@ impl HickoryDnsResolver { #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub enum NameServerIpVersionPolicy { /// Only send queries to Ipv4 nameservers + #[default] Ipv4Only, /// Only send queries to Ipv6 nameserver Ipv6Only, /// Send queries to Ipv4 AND Ipv6 nameservers in parallel - #[default] Ipv4AndIpv6, } From 1719a1db7fd8079fb547c14039b6920437304e12 Mon Sep 17 00:00:00 2001 From: jmwample Date: Sat, 22 Nov 2025 10:17:22 -0700 Subject: [PATCH 08/18] lower default timeout for DNS lookups --- common/http-api-client/src/dns.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index f9194fa17dc..1b1821062b6 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -81,6 +81,8 @@ mod static_resolver; pub(crate) use static_resolver::*; const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_secs(1800); +const DEFAULT_OVERALL_LOOKUP_TIMEOUT: Duration = Duration::from_secs(6); +const DEFAULT_QUERY_TIMEOUT: Duration = Duration::from_secs(3); impl ClientBuilder { /// Override the DNS resolver implementation used by the underlying http client. @@ -166,7 +168,7 @@ impl Default for HickoryDnsResolver { dont_use_shared: Default::default(), ns_ip_ver_policy: Default::default(), current_options: Default::default(), - overall_dns_timeout: Duration::from_secs(10), + overall_dns_timeout: DEFAULT_OVERALL_LOOKUP_TIMEOUT, } } } @@ -437,6 +439,7 @@ impl HickoryDnsResolver { let mut opts = ResolverOpts::default(); // Always cache successful responses for queries received by this resolver for 30 min minimum. opts.positive_min_ttl = Some(DEFAULT_POSITIVE_LOOKUP_CACHE_TTL); + opts.timeout = DEFAULT_QUERY_TIMEOUT; opts } @@ -695,8 +698,7 @@ mod test { #[tokio::test] async fn reqwest_with_custom_dns() { - let var_name = HickoryDnsResolver::default(); - let resolver = var_name; + let resolver = HickoryDnsResolver::default(); let client = reqwest::ClientBuilder::new() .dns_resolver(resolver.into()) .build() From e921c52e13b60c3ab47c9ac83537e2799509e4db Mon Sep 17 00:00:00 2001 From: jmwample Date: Sun, 23 Nov 2025 08:36:13 -0700 Subject: [PATCH 09/18] make sure the static fallback is enabled by default --- common/http-api-client/src/dns.rs | 36 +++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 1b1821062b6..a71bdccc51c 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -164,10 +164,10 @@ impl Default for HickoryDnsResolver { Self { state: Default::default(), fallback: Default::default(), - static_base: Default::default(), dont_use_shared: Default::default(), ns_ip_ver_policy: Default::default(), current_options: Default::default(), + static_base: Some(Default::default()), overall_dns_timeout: DEFAULT_OVERALL_LOOKUP_TIMEOUT, } } @@ -211,10 +211,27 @@ async fn resolve( overall_dns_timeout: Duration, ) -> Result { let name_str = name.as_str().to_string(); + + debug!( + "looking up {name_str} - {} {}", + maybe_static.is_some(), + maybe_fallback.is_some() + ); + // If no record has been found and a static map of fallback addresses is configured + // check the table for our entry + if let Some(ref static_resolver) = maybe_static { + debug!("checking static"); + let resolver = + static_resolver.get_or_init(|| HickoryDnsResolver::new_static_fallback(independent)); + if let Ok(addrs) = resolver.resolve(name).await { + let _addrs: Vec = addrs.into_iter().collect(); + debug!("internal static table found {} -> {_addrs:?}", name_str); + return Ok(Box::new(_addrs.into_iter())); + } + } + let resolver = resolver .get_or_try_init(|| HickoryDnsResolver::new_resolver(independent, ns_strategy, options))?; - let _span = debug_span!("dns", "q" = name_str); - let _guard = _span.enter(); // Attempt a lookup using the primary resolver let resolve_fut = tokio::time::timeout(overall_dns_timeout, resolver.lookup_ip(&name_str)); @@ -247,19 +264,6 @@ async fn resolve( } } - // If no record has been found and a static map of fallback addresses is configured - // check the table for our entry - if let Some(ref static_resolver) = maybe_static { - debug!("checking static"); - let resolver = - static_resolver.get_or_init(|| HickoryDnsResolver::new_static_fallback(independent)); - if let Ok(addrs) = resolver.resolve(name).await { - let _addrs: Vec = addrs.into_iter().collect(); - debug!("internal static table found {} -> {_addrs:?}", name_str); - return Ok(Box::new(_addrs.into_iter())); - } - } - Err(primary_err) } From dd7b1fc1239296f65178285798c56c79ec378e42 Mon Sep 17 00:00:00 2001 From: jmwample Date: Sun, 23 Nov 2025 09:41:08 -0700 Subject: [PATCH 10/18] fix bug for quad9 IPs --- common/http-api-client/src/dns.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index a71bdccc51c..51652b6fe65 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -217,6 +217,7 @@ async fn resolve( maybe_static.is_some(), maybe_fallback.is_some() ); + // If no record has been found and a static map of fallback addresses is configured // check the table for our entry if let Some(ref static_resolver) = maybe_static { @@ -559,14 +560,15 @@ fn new_resolver( ns_ip_version_policy: NameServerIpVersionPolicy, options: Option, ) -> Result { - info!("building new configured {ns_ip_version_policy} resolver"); - let name_servers = match ns_ip_version_policy { NameServerIpVersionPolicy::Ipv4AndIpv6 => default_nameserver_group(), NameServerIpVersionPolicy::Ipv4Only => default_nameserver_group_ipv4_only(), NameServerIpVersionPolicy::Ipv6Only => default_nameserver_group_ipv6_only(), }; + info!("building new configured {ns_ip_version_policy} resolver"); + debug!("configuring resolver to use nameserver set: {name_servers:?}"); + configure_and_build_resolver(name_servers, options) } @@ -665,7 +667,7 @@ fn cloudflare_ips_v6() -> Vec { } fn quad9_ips_v4() -> Vec { - hickory_resolver::config::CLOUDFLARE_IPS + hickory_resolver::config::QUAD9_IPS .iter() .filter(|ip| ip.is_ipv4()) .cloned() @@ -673,7 +675,7 @@ fn quad9_ips_v4() -> Vec { } fn quad9_ips_v6() -> Vec { - hickory_resolver::config::CLOUDFLARE_IPS + hickory_resolver::config::QUAD9_IPS .iter() .filter(|ip| ip.is_ipv6()) .cloned() @@ -892,9 +894,9 @@ mod failure_test { #[tokio::test] async fn dns_lookup_failures() -> Result<(), ResolveError> { - tracing_subscriber::fmt() - .with_max_level(tracing::Level::DEBUG) - .init(); + // tracing_subscriber::fmt() + // .with_max_level(tracing::Level::DEBUG) + // .init(); let time_start = std::time::Instant::now(); let r = OnceCell::new(); From 9bda4e5d7692d67743f25776786fad204c023e7d Mon Sep 17 00:00:00 2001 From: jmwample Date: Sun, 23 Nov 2025 10:58:34 -0700 Subject: [PATCH 11/18] compile and error fixes --- Cargo.toml | 2 +- common/http-api-client/src/dns.rs | 46 +++++++++++++++++++++++++++-- common/http-api-client/src/tests.rs | 1 + 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 92f5fdbb7b9..3d6cb2e52fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -265,7 +265,7 @@ generic-array = "0.14.7" getrandom = "0.2.10" handlebars = "3.5.5" hex = "0.4.3" -hickory-resolver = "0.25" +hickory-resolver = "0.25.2" hkdf = "0.12.3" hmac = "0.12.1" http = "1" diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 51652b6fe65..a4f5cb46589 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -80,9 +80,9 @@ mod constants; mod static_resolver; pub(crate) use static_resolver::*; -const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_secs(1800); -const DEFAULT_OVERALL_LOOKUP_TIMEOUT: Duration = Duration::from_secs(6); -const DEFAULT_QUERY_TIMEOUT: Duration = Duration::from_secs(3); +pub(crate) const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_secs(1800); +pub(crate) const DEFAULT_OVERALL_LOOKUP_TIMEOUT: Duration = Duration::from_secs(6); +pub(crate) const DEFAULT_QUERY_TIMEOUT: Duration = Duration::from_secs(3); impl ClientBuilder { /// Override the DNS resolver implementation used by the underlying http client. @@ -951,4 +951,44 @@ mod failure_test { Ok(()) } + + /// This test is meant to check if shifting the lookup in our configured static table forward + /// means that the http request will succeed, in the situation where the DNS timeout and HTTP + /// request timeout would align IF we had to wait for the DNS lookup to reach its timeout. + #[tokio::test] + async fn reqwest_using_static_fallback() -> Result<(), ResolveError> { + let r = OnceCell::new(); + r.set(build_broken_resolver().expect("failed to build resolver")) + .expect("broken resolver init error"); + + // create a new resolver that won't mess with the shared resolver used by other tests + let resolver = HickoryDnsResolver { + dont_use_shared: true, + state: Arc::new(r), + static_base: Some(Default::default()), + ..Default::default() + }; + build_broken_resolver()?; + + let client = reqwest::ClientBuilder::new() + .dns_resolver(resolver.clone().into()) + .connect_timeout(Duration::from_secs(10)) + .build() + .unwrap(); + + // Because the static table is checked first there is no delay and the lookup succeeds + // immediately. This means that (for hosts with an entry in the static lookup table) there + // should no longer be an issue with timeouts or filling the hickory resolvers query buffer. + let resp = client + .get("https://nymvpn.com/api/public/v1/health") + .send() + .await + .unwrap() + .bytes() + .await + .unwrap(); + + assert!(!resp.is_empty()); + Ok(()) + } } diff --git a/common/http-api-client/src/tests.rs b/common/http-api-client/src/tests.rs index bbfe8df6e01..f8f70788a0a 100644 --- a/common/http-api-client/src/tests.rs +++ b/common/http-api-client/src/tests.rs @@ -95,6 +95,7 @@ async fn api_client_retry() -> Result<(), Box> { "http://127.0.0.1:9".parse()?, // This will fail because of TCP refused (rotate) "https://httpbin.org/status/200".parse()?, // This should succeed ])? + .with_timeout(dns::DEFAULT_OVERALL_LOOKUP_TIMEOUT * 3) .with_retries(3) .build()?; From 474483aadde8a0616bad7cea1f4b7e13964b73cb Mon Sep 17 00:00:00 2001 From: jmwample Date: Sun, 23 Nov 2025 11:16:27 -0700 Subject: [PATCH 12/18] revert change in test, forgot I had DNS blocked --- common/http-api-client/src/tests.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/common/http-api-client/src/tests.rs b/common/http-api-client/src/tests.rs index f8f70788a0a..bbfe8df6e01 100644 --- a/common/http-api-client/src/tests.rs +++ b/common/http-api-client/src/tests.rs @@ -95,7 +95,6 @@ async fn api_client_retry() -> Result<(), Box> { "http://127.0.0.1:9".parse()?, // This will fail because of TCP refused (rotate) "https://httpbin.org/status/200".parse()?, // This should succeed ])? - .with_timeout(dns::DEFAULT_OVERALL_LOOKUP_TIMEOUT * 3) .with_retries(3) .build()?; From 18cd6bdced93f1a7a7a0d01ae2c9b3718ef47e61 Mon Sep 17 00:00:00 2001 From: Jack Wampler Date: Tue, 25 Nov 2025 09:06:39 -0700 Subject: [PATCH 13/18] DNS static table check order (#6229) * add mechanism to dns resolver to control if static table is checked fist or last --- common/http-api-client/src/dns.rs | 99 +++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 17 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index a4f5cb46589..f960f957e7f 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -150,6 +150,9 @@ pub struct HickoryDnsResolver { fallback: Option>>, static_base: Option>>, dont_use_shared: bool, + /// Toggle to indicate whether `self.static_base` should be checked before the attempting to go + /// to network using the inner resolver (state). + static_base_first: bool, /// Overall timeout for dns lookup associated with any individual host resolution. For example, /// use of retries, server_ordering_strategy, etc. ends absolutely if this timeout is reached. overall_dns_timeout: Duration, @@ -168,6 +171,7 @@ impl Default for HickoryDnsResolver { ns_ip_ver_policy: Default::default(), current_options: Default::default(), static_base: Some(Default::default()), + static_base_first: false, overall_dns_timeout: DEFAULT_OVERALL_LOOKUP_TIMEOUT, } } @@ -179,6 +183,7 @@ impl Resolve for HickoryDnsResolver { let maybe_fallback = self.fallback.clone(); let maybe_static = self.static_base.clone(); let independent = self.dont_use_shared; + let static_base_first = self.static_base_first; let ns_strategy = self.ns_ip_ver_policy; let overall_dns_timeout = self.overall_dns_timeout; let options = self.current_options.clone(); @@ -189,6 +194,7 @@ impl Resolve for HickoryDnsResolver { maybe_fallback, maybe_static, independent, + static_base_first, ns_strategy, options, overall_dns_timeout, @@ -206,6 +212,7 @@ async fn resolve( maybe_fallback: Option>>, maybe_static: Option>>, independent: bool, + static_base_first: bool, ns_strategy: NameServerIpVersionPolicy, options: Option, overall_dns_timeout: Duration, @@ -218,13 +225,14 @@ async fn resolve( maybe_fallback.is_some() ); - // If no record has been found and a static map of fallback addresses is configured - // check the table for our entry - if let Some(ref static_resolver) = maybe_static { + // If we are configured to check the static map first, and a static map of addresses is + // configured -- check the table for our entry. + if static_base_first && let Some(ref static_resolver) = maybe_static { debug!("checking static"); + let qname = Name::from_str(&name_str).unwrap(); let resolver = static_resolver.get_or_init(|| HickoryDnsResolver::new_static_fallback(independent)); - if let Ok(addrs) = resolver.resolve(name).await { + if let Ok(addrs) = resolver.resolve(qname).await { let _addrs: Vec = addrs.into_iter().collect(); debug!("internal static table found {} -> {_addrs:?}", name_str); return Ok(Box::new(_addrs.into_iter())); @@ -265,6 +273,21 @@ async fn resolve( } } + // If no record has been found, we are configured to check the static table as a fallback, and + // static map of fallback addresses is configured -- check the table for our entry. + if !static_base_first && let Some(ref static_resolver) = maybe_static { + debug!("checking static"); + // this unwrap cannot fail as we serialize name_str from a valid Name + let qname = Name::from_str(&name_str).unwrap(); + let resolver = + static_resolver.get_or_init(|| HickoryDnsResolver::new_static_fallback(independent)); + if let Ok(addrs) = resolver.resolve(qname).await { + let _addrs: Vec = addrs.into_iter().collect(); + debug!("internal static table found {} -> {_addrs:?}", name_str); + return Ok(Box::new(_addrs.into_iter())); + } + } + Err(primary_err) } @@ -294,6 +317,7 @@ impl HickoryDnsResolver { self.fallback.clone(), self.static_base.clone(), self.dont_use_shared, + self.static_base_first, self.ns_ip_ver_policy, self.current_options.clone(), self.overall_dns_timeout, @@ -378,15 +402,40 @@ impl HickoryDnsResolver { /// Get the current map of hostname to address in use by the fallback static lookup if one /// exists. pub fn get_static_fallbacks(&self) -> Option>> { + if !self.dont_use_shared { + return Some( + SHARED_RESOLVER + .read() + .unwrap() + .static_base + .as_ref()? + .get()? + .get_addrs(), + ); + } Some(self.static_base.as_ref()?.get()?.get_addrs()) } /// Set (or overwrite) the map of addresses used in the fallback static hostname lookup pub fn set_static_fallbacks(&mut self, addrs: HashMap>) { - let cell = OnceCell::new(); - cell.set(StaticResolver::new(addrs)) - .expect("infallible assign"); - self.static_base = Some(Arc::new(cell)); + let resolver_table = Arc::new(OnceCell::with_value(StaticResolver::new(addrs))); + self.static_base = Some(resolver_table.clone()); + + if !self.dont_use_shared { + SHARED_RESOLVER.write().unwrap().static_base = Some(resolver_table); + } + } + + /// Change whether the resolver checks the static table of fallback addresses first or last in + /// the flow (i.e.`true` = before using the internal resolver, `false` = as a fallback after the + /// internal resolver). If the resolver has no table of static addressed configured, this + /// setting has no impact on the lookup flow. + pub fn set_check_static_fallback_first(&mut self, setting: bool) { + self.static_base_first = setting; + + if !self.dont_use_shared { + SHARED_RESOLVER.write().unwrap().static_base_first = setting + } } /// Configure the resolver to use only Ipv4 nameservers and to resolve addresses to Ipv4 (A) @@ -858,7 +907,10 @@ mod test { #[cfg(test)] mod failure_test { use super::*; - use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; + use std::{ + net::{IpAddr, Ipv4Addr, Ipv6Addr}, + time::Instant, + }; /// IP addresses guaranteed to fail attempts to resolve /// @@ -923,20 +975,17 @@ mod failure_test { #[tokio::test] async fn fallback_to_static() -> Result<(), ResolveError> { - let r = OnceCell::new(); - r.set(build_broken_resolver().expect("failed to build resolver")) - .expect("broken resolver init error"); - // create a new resolver that won't mess with the shared resolver used by other tests - let resolver = HickoryDnsResolver { + let mut resolver = HickoryDnsResolver { dont_use_shared: true, - state: Arc::new(r), + state: Arc::new(OnceCell::with_value(build_broken_resolver().unwrap())), static_base: Some(Default::default()), overall_dns_timeout: Duration::from_secs(5), ..Default::default() }; - build_broken_resolver()?; + // do a lookup where we check the static table as a fallback + let start = Instant::now(); // successful lookup using fallback to static resolver let domain = "nymvpn.com"; let _ = resolver @@ -944,7 +993,23 @@ mod failure_test { .await .expect("failed to resolve address in static lookup"); - // unsuccessful lookup - primary times out, and not in + let lookup_duration = Instant::now() - start; + assert!(lookup_duration > Duration::from_secs(5)); + + // do a lookup where we check the static table first + resolver.set_check_static_fallback_first(true); + let start = Instant::now(); + // successful lookup using fallback to static resolver + let domain = "nymvpn.com"; + let _ = resolver + .resolve_str(domain) + .await + .expect("failed to resolve address in static lookup"); + + let lookup_duration = Instant::now() - start; + assert!(lookup_duration < Duration::from_millis(50)); + + // unsuccessful lookup - primary times out, and not in static table let domain = "non-existent.nymtech.net"; let result = resolver.resolve_str(domain).await; assert!(result.is_err_and(|e| matches!(e, ResolveError::Timeout))); From 5395fef258fee45993ef8f509531894c6240b11e Mon Sep 17 00:00:00 2001 From: jmwample Date: Mon, 24 Nov 2025 12:34:47 -0700 Subject: [PATCH 14/18] add mechanism for rebuilding resolver w/ nameservers that pass a test --- common/http-api-client/src/dns.rs | 246 ++++++++++++++++++++++++++-- common/http-api-client/src/tests.rs | 2 +- 2 files changed, 229 insertions(+), 19 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index f960f957e7f..12c4f80ae22 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -68,12 +68,15 @@ use std::{ use hickory_resolver::{ TokioResolver, - config::{LookupIpStrategy, NameServerConfigGroup, ResolverConfig, ResolverOpts}, + config::{ + LookupIpStrategy, NameServerConfig, NameServerConfigGroup, ResolverConfig, ResolverOpts, + }, lookup_ip::LookupIpIntoIter, name_server::TokioConnectionProvider, }; use once_cell::sync::OnceCell; use reqwest::dns::{Addrs, Name, Resolve, Resolving}; +use tokio::task::JoinSet; use tracing::*; mod constants; @@ -84,6 +87,8 @@ pub(crate) const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_se pub(crate) const DEFAULT_OVERALL_LOOKUP_TIMEOUT: Duration = Duration::from_secs(6); pub(crate) const DEFAULT_QUERY_TIMEOUT: Duration = Duration::from_secs(3); +const RECONFIGURE_ERROR_MSG: &str = "attempted to reconfigure with no working nameservers"; + impl ClientBuilder { /// Override the DNS resolver implementation used by the underlying http client. pub fn dns_resolver(mut self, resolver: Arc) -> Self { @@ -124,6 +129,8 @@ pub enum ResolveError { Timeout, #[error("hostname not found in static lookup table")] StaticLookupMiss, + #[error("configuration error: {0}")] + ConfigError(String), } impl ResolveError { @@ -508,6 +515,103 @@ impl HickoryDnsResolver { } self.state = Arc::new(OnceCell::new()); } + + /// Do a trial resolution using each nameserver individually to test which are working and which + /// fail to complete a lookup. + pub async fn trial_nameservers(&self) -> Result<(), ResolveError> { + let trial_results = if !self.dont_use_shared { + SHARED_RESOLVER + .read() + .unwrap() + .trial_nameservers_inner() + .await + } else { + self.trial_nameservers_inner().await + }; + + for (ns, result) in trial_results { + if let Err(e) = result { + warn!("trial {ns:?} errored: {e}"); + } else { + info!("trial {ns:?} succeeded"); + } + } + Ok(()) + } + + /// Do a trial resolution using each nameserver individually to test which are working and which + /// fail to complete a lookup. + async fn trial_nameservers_inner(&self) -> Vec<(NameServerConfig, Result<(), ResolveError>)> { + let name_servers = self.state.get().unwrap().config().name_servers(); + + let mut trial_lookups = JoinSet::new(); + + for name_server in name_servers { + let ns = name_server.clone(); + trial_lookups.spawn(async { (ns.clone(), trial_lookup(ns, "example.com").await) }); + } + + trial_lookups.join_all().await + } + + /// Do a trial resolution using each nameserver individually to test which are working and which + /// fail to complete a lookup. If one or more of the resolutions succeeds, rebuild the resolver + /// using only the nameservers that successfully completed the lookup. + /// + /// If no nameservers successfully complete the lookup return an error and leave the current + /// configured resolver set as is. + pub async fn trial_nameservers_and_reconfigure(&mut self) -> Result<(), ResolveError> { + let trial_results = if self.dont_use_shared { + self.trial_nameservers_inner().await + } else { + // take a read lock here as we don't want to hold a write lock while we are doing trials + SHARED_RESOLVER + .read() + .unwrap() + .trial_nameservers_inner() + .await + }; + + let mut working_nameservers = Vec::new(); + for (ns, result) in trial_results { + if let Err(e) = result { + warn!("trial {ns:?} errored: {e}"); + } else { + info!("trial {ns:?} succeeded"); + working_nameservers.push(ns); + } + } + + if working_nameservers.is_empty() { + return Err(ResolveError::ConfigError(RECONFIGURE_ERROR_MSG.to_string())); + } + + let new_resolver = + configure_and_build_resolver(working_nameservers, self.current_options.clone())?; + + if self.dont_use_shared { + self.state = Arc::new(OnceCell::with_value(new_resolver)); + } else { + // take a write lock on the shared resolver only once we are ready to make changes + SHARED_RESOLVER.write().unwrap().state = Arc::new(OnceCell::with_value(new_resolver)); + } + + Ok(()) + } +} + +/// Create an independent resolver that has only the provided nameserver and do one lookup for the +/// provided query target. +async fn trial_lookup(name_server: NameServerConfig, query: &str) -> Result<(), ResolveError> { + info!("running ns trial {name_server:?} query={query}"); + + let resolver = configure_and_build_resolver(vec![name_server], None)?; + + match tokio::time::timeout(DEFAULT_OVERALL_LOOKUP_TIMEOUT, resolver.ipv4_lookup(query)).await { + Ok(Ok(_)) => Ok(()), + Ok(Err(e)) => Err(e.into()), + Err(_) => Err(ResolveError::Timeout), + } } /// Policy options for nameserver IP versions to use when sending DNS queries. @@ -629,10 +733,13 @@ fn default_nameserver_group() -> NameServerConfigGroup { name_servers } -fn configure_and_build_resolver( - name_servers: NameServerConfigGroup, +fn configure_and_build_resolver( + name_servers: G, options: Option, -) -> Result { +) -> Result +where + G: Into, +{ let config = ResolverConfig::from_parts(None, Vec::new(), name_servers); let mut resolver_builder = TokioResolver::builder_with_config(config, TokioConnectionProvider::default()); @@ -946,19 +1053,12 @@ mod failure_test { #[tokio::test] async fn dns_lookup_failures() -> Result<(), ResolveError> { - // tracing_subscriber::fmt() - // .with_max_level(tracing::Level::DEBUG) - // .init(); let time_start = std::time::Instant::now(); - let r = OnceCell::new(); - r.set(build_broken_resolver().expect("failed to build resolver")) - .expect("broken resolver init error"); - // create a new resolver that won't mess with the shared resolver used by other tests let resolver = HickoryDnsResolver { dont_use_shared: true, - state: Arc::new(r), + state: Arc::new(OnceCell::with_value(build_broken_resolver().unwrap())), overall_dns_timeout: Duration::from_secs(5), ..Default::default() }; @@ -1022,18 +1122,13 @@ mod failure_test { /// request timeout would align IF we had to wait for the DNS lookup to reach its timeout. #[tokio::test] async fn reqwest_using_static_fallback() -> Result<(), ResolveError> { - let r = OnceCell::new(); - r.set(build_broken_resolver().expect("failed to build resolver")) - .expect("broken resolver init error"); - // create a new resolver that won't mess with the shared resolver used by other tests let resolver = HickoryDnsResolver { dont_use_shared: true, - state: Arc::new(r), + state: Arc::new(OnceCell::with_value(build_broken_resolver().unwrap())), static_base: Some(Default::default()), ..Default::default() }; - build_broken_resolver()?; let client = reqwest::ClientBuilder::new() .dns_resolver(resolver.clone().into()) @@ -1056,4 +1151,119 @@ mod failure_test { assert!(!resp.is_empty()); Ok(()) } + + #[tokio::test] + async fn trial_nameservers() { + let good_cf_ip = IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)); + + let mut ns_ips = GUARANTEED_BROKEN_IPS_1.to_vec(); + ns_ips.push(good_cf_ip); + + let broken_ns_https = NameServerConfigGroup::from_ips_https( + &ns_ips, + 443, + "cloudflare-dns.com".to_string(), + true, + ); + + let inner = configure_and_build_resolver(broken_ns_https, None).unwrap(); + + // create a new resolver that won't mess with the shared resolver used by other tests + let resolver = HickoryDnsResolver { + dont_use_shared: true, + state: Arc::new(OnceCell::with_value(inner)), + static_base: Some(Default::default()), + ..Default::default() + }; + + for (ns, result) in resolver.trial_nameservers_inner().await { + if ns.socket_addr.ip() == good_cf_ip { + assert!(result.is_ok()) + } else { + assert!(result.is_err()) + } + } + } + + #[tokio::test] + async fn trial_nameservers_reconfigure_none_working() { + // create a new resolver that won't mess with the shared resolver used by other tests + let mut resolver = HickoryDnsResolver { + dont_use_shared: true, + state: Arc::new(OnceCell::with_value(build_broken_resolver().unwrap())), + static_base: Some(Default::default()), + overall_dns_timeout: Duration::from_secs(5), + ..Default::default() + }; + + let res = resolver.trial_nameservers_and_reconfigure().await; + assert!(res.is_err_and( + |e| matches!(e, ResolveError::ConfigError(msg) if msg == RECONFIGURE_ERROR_MSG.to_string()) + )); + } + + #[tokio::test] + async fn trial_nameservers_independent_reconfigure() { + let good_cf_ip = IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)); + + let mut ns_ips = GUARANTEED_BROKEN_IPS_1.to_vec(); + ns_ips.push(good_cf_ip); + + let broken_ns_https = NameServerConfigGroup::from_ips_https( + &ns_ips, + 443, + "cloudflare-dns.com".to_string(), + true, + ); + + let inner = configure_and_build_resolver(broken_ns_https, None).unwrap(); + + // create a new resolver that won't mess with the shared resolver used by other tests + let mut resolver = HickoryDnsResolver { + dont_use_shared: true, + state: Arc::new(OnceCell::with_value(inner)), + static_base: Some(Default::default()), + ..Default::default() + }; + + resolver.trial_nameservers_and_reconfigure().await.unwrap(); + + let ns_set = resolver.state.get().unwrap().config().name_servers(); + let addrs: Vec = ns_set.iter().map(|cfg| cfg.socket_addr.ip()).collect(); + assert_eq!(addrs.len(), 1); + assert!(addrs.contains(&good_cf_ip)); + } + + /// This test ensures that calling `trial_nameservers_and_reconfigure` on a resolver using the + /// shared resolver results in the shared resolver updating its nameserver set to use only the + /// working nameservers. From the caller perspective this should have the same result. + #[tokio::test] + // ignore this test as changes to the shared resolver can cause unexpected behavior when + // interleaved with other tests + #[ignore] + async fn trial_nameservers_shared_reconfigure() { + let good_cf_ip = IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)); + + let mut ns_ips = GUARANTEED_BROKEN_IPS_1.to_vec(); + ns_ips.push(good_cf_ip); + + let broken_ns_https = NameServerConfigGroup::from_ips_https( + &ns_ips, + 443, + "cloudflare-dns.com".to_string(), + true, + ); + + let inner = configure_and_build_resolver(broken_ns_https, None).unwrap(); + SHARED_RESOLVER.write().unwrap().state = Arc::new(OnceCell::with_value(inner)); + + let mut resolver = HickoryDnsResolver::default(); + resolver.trial_nameservers_and_reconfigure().await.unwrap(); + + let binding = SHARED_RESOLVER.read().unwrap(); + let ns_set = binding.state.get().unwrap().config().name_servers(); + let addrs: Vec = ns_set.iter().map(|cfg| cfg.socket_addr.ip()).collect(); + assert_eq!(addrs.len(), 1); + assert!(addrs.contains(&good_cf_ip)); + } } diff --git a/common/http-api-client/src/tests.rs b/common/http-api-client/src/tests.rs index bbfe8df6e01..8af4371adcc 100644 --- a/common/http-api-client/src/tests.rs +++ b/common/http-api-client/src/tests.rs @@ -91,7 +91,7 @@ fn sanitizing_urls() { #[tokio::test] async fn api_client_retry() -> Result<(), Box> { let client = ClientBuilder::new_with_urls(vec![ - "http://broken.nym.test".parse()?, // This will fail because of DNS (rotate) + "http://broken.nym.test".parse()?, // This should fail because of DNS NXDomain (rotate) "http://127.0.0.1:9".parse()?, // This will fail because of TCP refused (rotate) "https://httpbin.org/status/200".parse()?, // This should succeed ])? From 0510c097a959cfd6d9aaada8aa37af53ea5eac02 Mon Sep 17 00:00:00 2001 From: jmwample Date: Mon, 24 Nov 2025 15:23:03 -0700 Subject: [PATCH 15/18] fix compile error and unclear behavior when using shared resolver --- common/http-api-client/src/dns.rs | 67 +++++++++++++------------------ 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 12c4f80ae22..47bf6a373ae 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -519,17 +519,9 @@ impl HickoryDnsResolver { /// Do a trial resolution using each nameserver individually to test which are working and which /// fail to complete a lookup. pub async fn trial_nameservers(&self) -> Result<(), ResolveError> { - let trial_results = if !self.dont_use_shared { - SHARED_RESOLVER - .read() - .unwrap() - .trial_nameservers_inner() - .await - } else { - self.trial_nameservers_inner().await - }; + let name_servers = self.get_nameservers(); - for (ns, result) in trial_results { + for (ns, result) in trial_nameservers_inner(&name_servers).await { if let Err(e) = result { warn!("trial {ns:?} errored: {e}"); } else { @@ -539,19 +531,12 @@ impl HickoryDnsResolver { Ok(()) } - /// Do a trial resolution using each nameserver individually to test which are working and which - /// fail to complete a lookup. - async fn trial_nameservers_inner(&self) -> Vec<(NameServerConfig, Result<(), ResolveError>)> { - let name_servers = self.state.get().unwrap().config().name_servers(); - - let mut trial_lookups = JoinSet::new(); - - for name_server in name_servers { - let ns = name_server.clone(); - trial_lookups.spawn(async { (ns.clone(), trial_lookup(ns, "example.com").await) }); + fn get_nameservers(&self) -> Vec { + if self.dont_use_shared { + self.state.get().unwrap().config().name_servers().to_vec() + } else { + SHARED_RESOLVER.read().unwrap().get_nameservers() } - - trial_lookups.join_all().await } /// Do a trial resolution using each nameserver individually to test which are working and which @@ -561,19 +546,10 @@ impl HickoryDnsResolver { /// If no nameservers successfully complete the lookup return an error and leave the current /// configured resolver set as is. pub async fn trial_nameservers_and_reconfigure(&mut self) -> Result<(), ResolveError> { - let trial_results = if self.dont_use_shared { - self.trial_nameservers_inner().await - } else { - // take a read lock here as we don't want to hold a write lock while we are doing trials - SHARED_RESOLVER - .read() - .unwrap() - .trial_nameservers_inner() - .await - }; + let name_servers = self.get_nameservers(); let mut working_nameservers = Vec::new(); - for (ns, result) in trial_results { + for (ns, result) in trial_nameservers_inner(&name_servers).await { if let Err(e) = result { warn!("trial {ns:?} errored: {e}"); } else { @@ -589,9 +565,8 @@ impl HickoryDnsResolver { let new_resolver = configure_and_build_resolver(working_nameservers, self.current_options.clone())?; - if self.dont_use_shared { - self.state = Arc::new(OnceCell::with_value(new_resolver)); - } else { + self.state = Arc::new(OnceCell::with_value(new_resolver.clone())); + if !self.dont_use_shared { // take a write lock on the shared resolver only once we are ready to make changes SHARED_RESOLVER.write().unwrap().state = Arc::new(OnceCell::with_value(new_resolver)); } @@ -600,6 +575,21 @@ impl HickoryDnsResolver { } } +/// Do a trial resolution using each nameserver individually to test which are working and which +/// fail to complete a lookup. +async fn trial_nameservers_inner( + name_servers: &[NameServerConfig], +) -> Vec<(NameServerConfig, Result<(), ResolveError>)> { + let mut trial_lookups = JoinSet::new(); + + for name_server in name_servers { + let ns = name_server.clone(); + trial_lookups.spawn(async { (ns.clone(), trial_lookup(ns, "example.com").await) }); + } + + trial_lookups.join_all().await +} + /// Create an independent resolver that has only the provided nameserver and do one lookup for the /// provided query target. async fn trial_lookup(name_server: NameServerConfig, query: &str) -> Result<(), ResolveError> { @@ -1176,7 +1166,8 @@ mod failure_test { ..Default::default() }; - for (ns, result) in resolver.trial_nameservers_inner().await { + let name_servers = resolver.state.get().unwrap().config().name_servers(); + for (ns, result) in trial_nameservers_inner(name_servers).await { if ns.socket_addr.ip() == good_cf_ip { assert!(result.is_ok()) } else { @@ -1198,7 +1189,7 @@ mod failure_test { let res = resolver.trial_nameservers_and_reconfigure().await; assert!(res.is_err_and( - |e| matches!(e, ResolveError::ConfigError(msg) if msg == RECONFIGURE_ERROR_MSG.to_string()) + |e| matches!(e, ResolveError::ConfigError(msg) if msg == RECONFIGURE_ERROR_MSG) )); } From e2fd15d6f8b87de69401c9f4783c4310ef81dcf6 Mon Sep 17 00:00:00 2001 From: jmwample Date: Tue, 25 Nov 2025 09:41:22 -0700 Subject: [PATCH 16/18] prevent error on uninitialized resolver --- common/http-api-client/src/dns.rs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 47bf6a373ae..88d0547b4db 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -519,7 +519,7 @@ impl HickoryDnsResolver { /// Do a trial resolution using each nameserver individually to test which are working and which /// fail to complete a lookup. pub async fn trial_nameservers(&self) -> Result<(), ResolveError> { - let name_servers = self.get_nameservers(); + let name_servers = self.get_nameservers()?; for (ns, result) in trial_nameservers_inner(&name_servers).await { if let Err(e) = result { @@ -531,12 +531,24 @@ impl HickoryDnsResolver { Ok(()) } - fn get_nameservers(&self) -> Vec { - if self.dont_use_shared { - self.state.get().unwrap().config().name_servers().to_vec() - } else { - SHARED_RESOLVER.read().unwrap().get_nameservers() - } + fn get_nameservers(&self) -> Result, ResolveError> { + if !self.dont_use_shared { + return SHARED_RESOLVER.read().unwrap().get_nameservers(); + }; + let name_servers = self + .state + .get_or_try_init(|| { + HickoryDnsResolver::new_resolver( + self.dont_use_shared, + self.ns_ip_ver_policy, + self.current_options.clone(), + ) + })? + .config() + .name_servers() + .to_vec(); + + Ok(name_servers) } /// Do a trial resolution using each nameserver individually to test which are working and which @@ -546,7 +558,7 @@ impl HickoryDnsResolver { /// If no nameservers successfully complete the lookup return an error and leave the current /// configured resolver set as is. pub async fn trial_nameservers_and_reconfigure(&mut self) -> Result<(), ResolveError> { - let name_servers = self.get_nameservers(); + let name_servers = self.get_nameservers()?; let mut working_nameservers = Vec::new(); for (ns, result) in trial_nameservers_inner(&name_servers).await { From 77a1732cc8744228e989ee02c031d52f06491e2a Mon Sep 17 00:00:00 2001 From: jmwample Date: Mon, 1 Dec 2025 14:56:42 -0700 Subject: [PATCH 17/18] change multiple calls to trial reconfigure to retry from the full available set rather that whittling down --- common/http-api-client/src/dns.rs | 52 ++++++++++++++----------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index 88d0547b4db..e5057afaac1 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -167,6 +167,10 @@ pub struct HickoryDnsResolver { ns_ip_ver_policy: NameServerIpVersionPolicy, /// Current options used by the resolver and used for rebuilding on preference change. current_options: Option, + + /// Set of nameservers used for this resolver before any filtering is applied. Used internally + /// and for testing. + default_nameserver_config_group: NameServerConfigGroup, } impl Default for HickoryDnsResolver { @@ -180,6 +184,7 @@ impl Default for HickoryDnsResolver { static_base: Some(Default::default()), static_base_first: false, overall_dns_timeout: DEFAULT_OVERALL_LOOKUP_TIMEOUT, + default_nameserver_config_group: default_nameserver_group(), } } } @@ -517,11 +522,9 @@ impl HickoryDnsResolver { } /// Do a trial resolution using each nameserver individually to test which are working and which - /// fail to complete a lookup. + /// fail to complete a lookup. This will always try the full set of default configured resolvers. pub async fn trial_nameservers(&self) -> Result<(), ResolveError> { - let name_servers = self.get_nameservers()?; - - for (ns, result) in trial_nameservers_inner(&name_servers).await { + for (ns, result) in trial_nameservers_inner(&self.default_nameserver_config_group).await { if let Err(e) = result { warn!("trial {ns:?} errored: {e}"); } else { @@ -531,37 +534,17 @@ impl HickoryDnsResolver { Ok(()) } - fn get_nameservers(&self) -> Result, ResolveError> { - if !self.dont_use_shared { - return SHARED_RESOLVER.read().unwrap().get_nameservers(); - }; - let name_servers = self - .state - .get_or_try_init(|| { - HickoryDnsResolver::new_resolver( - self.dont_use_shared, - self.ns_ip_ver_policy, - self.current_options.clone(), - ) - })? - .config() - .name_servers() - .to_vec(); - - Ok(name_servers) - } - /// Do a trial resolution using each nameserver individually to test which are working and which /// fail to complete a lookup. If one or more of the resolutions succeeds, rebuild the resolver /// using only the nameservers that successfully completed the lookup. /// + /// This will always try the full set of default configured resolvers. + /// /// If no nameservers successfully complete the lookup return an error and leave the current /// configured resolver set as is. pub async fn trial_nameservers_and_reconfigure(&mut self) -> Result<(), ResolveError> { - let name_servers = self.get_nameservers()?; - let mut working_nameservers = Vec::new(); - for (ns, result) in trial_nameservers_inner(&name_servers).await { + for (ns, result) in trial_nameservers_inner(&self.default_nameserver_config_group).await { if let Err(e) = result { warn!("trial {ns:?} errored: {e}"); } else { @@ -1190,12 +1173,22 @@ mod failure_test { #[tokio::test] async fn trial_nameservers_reconfigure_none_working() { + let broken_ns_group = NameServerConfigGroup::from_ips_https( + GUARANTEED_BROKEN_IPS_1, + 443, + "cloudflare-dns.com".to_string(), + true, + ); + + let inner = configure_and_build_resolver(broken_ns_group.clone(), None).unwrap(); + // create a new resolver that won't mess with the shared resolver used by other tests let mut resolver = HickoryDnsResolver { dont_use_shared: true, - state: Arc::new(OnceCell::with_value(build_broken_resolver().unwrap())), + state: Arc::new(OnceCell::with_value(inner)), static_base: Some(Default::default()), overall_dns_timeout: Duration::from_secs(5), + default_nameserver_config_group: broken_ns_group, ..Default::default() }; @@ -1219,13 +1212,14 @@ mod failure_test { true, ); - let inner = configure_and_build_resolver(broken_ns_https, None).unwrap(); + let inner = configure_and_build_resolver(broken_ns_https.clone(), None).unwrap(); // create a new resolver that won't mess with the shared resolver used by other tests let mut resolver = HickoryDnsResolver { dont_use_shared: true, state: Arc::new(OnceCell::with_value(inner)), static_base: Some(Default::default()), + default_nameserver_config_group: broken_ns_https, ..Default::default() }; From 6f16bd97a4e7b50bdbf4985752111558aafff52e Mon Sep 17 00:00:00 2001 From: jmwample Date: Tue, 2 Dec 2025 11:02:22 -0700 Subject: [PATCH 18/18] add self-managed tracking for nameserver --- common/http-api-client/src/dns.rs | 172 +++++++++++++++--------------- 1 file changed, 84 insertions(+), 88 deletions(-) diff --git a/common/http-api-client/src/dns.rs b/common/http-api-client/src/dns.rs index e5057afaac1..fa38f5857e3 100644 --- a/common/http-api-client/src/dns.rs +++ b/common/http-api-client/src/dns.rs @@ -63,7 +63,7 @@ use std::{ net::{IpAddr, SocketAddr}, str::FromStr, sync::{Arc, LazyLock, RwLock}, - time::Duration, + time::{Duration, Instant}, }; use hickory_resolver::{ @@ -168,9 +168,63 @@ pub struct HickoryDnsResolver { /// Current options used by the resolver and used for rebuilding on preference change. current_options: Option, - /// Set of nameservers used for this resolver before any filtering is applied. Used internally - /// and for testing. - default_nameserver_config_group: NameServerConfigGroup, + /// Set of nameservers used for this resolver before any filtering is applied. + // Used internally and for testing. + default_nameserver_config_group: NsConfigGroupWithStatus, +} + +#[derive(Debug, Clone)] +struct NsConfigGroupWithStatus { + inner: Vec<(NameServerConfig, NsStatus)>, +} + +impl NsConfigGroupWithStatus { + fn into_ns_group(self) -> NameServerConfigGroup { + self.inner + .iter() + .map(|entry| entry.0.clone()) + .collect::>() + .into() + } + + fn nameserver_configs(&self) -> Vec { + self.inner.iter().map(|(cfg, _)| cfg.clone()).collect() + } +} + +impl From<&[NameServerConfig]> for NsConfigGroupWithStatus { + fn from(value: &[NameServerConfig]) -> Self { + let inner = value + .iter() + .map(|cfg| (cfg.clone(), NsStatus::Untested)) + .collect(); + + Self { inner } + } +} + +impl From> for NsConfigGroupWithStatus { + fn from(value: Vec) -> Self { + let inner = value + .iter() + .map(|cfg| (cfg.clone(), NsStatus::Untested)) + .collect(); + + Self { inner } + } +} + +impl From for NsConfigGroupWithStatus { + fn from(value: NameServerConfigGroup) -> Self { + Self::from(&value as &[NameServerConfig]) + } +} + +#[derive(Debug, Clone)] +enum NsStatus { + Untested, + Working(Instant), + Failed(Instant), } impl Default for HickoryDnsResolver { @@ -524,7 +578,8 @@ impl HickoryDnsResolver { /// Do a trial resolution using each nameserver individually to test which are working and which /// fail to complete a lookup. This will always try the full set of default configured resolvers. pub async fn trial_nameservers(&self) -> Result<(), ResolveError> { - for (ns, result) in trial_nameservers_inner(&self.default_nameserver_config_group).await { + let nameservers = self.default_nameserver_config_group.nameserver_configs(); + for (ns, result) in trial_nameservers_inner(&nameservers).await { if let Err(e) = result { warn!("trial {ns:?} errored: {e}"); } else { @@ -543,8 +598,10 @@ impl HickoryDnsResolver { /// If no nameservers successfully complete the lookup return an error and leave the current /// configured resolver set as is. pub async fn trial_nameservers_and_reconfigure(&mut self) -> Result<(), ResolveError> { + let nameservers = self.default_nameserver_config_group.nameserver_configs(); + let mut working_nameservers = Vec::new(); - for (ns, result) in trial_nameservers_inner(&self.default_nameserver_config_group).await { + for (ns, result) in trial_nameservers_inner(&nameservers).await { if let Err(e) = result { warn!("trial {ns:?} errored: {e}"); } else { @@ -698,11 +755,12 @@ fn new_resolver( ns_ip_version_policy: NameServerIpVersionPolicy, options: Option, ) -> Result { - let name_servers = match ns_ip_version_policy { + let name_servers: NameServerConfigGroup = match ns_ip_version_policy { NameServerIpVersionPolicy::Ipv4AndIpv6 => default_nameserver_group(), NameServerIpVersionPolicy::Ipv4Only => default_nameserver_group_ipv4_only(), NameServerIpVersionPolicy::Ipv6Only => default_nameserver_group_ipv6_only(), - }; + } + .into_ns_group(); info!("building new configured {ns_ip_version_policy} resolver"); debug!("configuring resolver to use nameserver set: {name_servers:?}"); @@ -710,12 +768,12 @@ fn new_resolver( configure_and_build_resolver(name_servers, options) } -fn default_nameserver_group() -> NameServerConfigGroup { +fn default_nameserver_group() -> NsConfigGroupWithStatus { let mut name_servers = NameServerConfigGroup::quad9_tls(); name_servers.merge(NameServerConfigGroup::quad9_https()); name_servers.merge(NameServerConfigGroup::cloudflare_tls()); name_servers.merge(NameServerConfigGroup::cloudflare_https()); - name_servers + name_servers.into() } fn configure_and_build_resolver( @@ -735,92 +793,30 @@ where Ok(resolver_builder.build()) } -fn default_nameserver_group_ipv4_only() -> NameServerConfigGroup { - let mut name_servers = NameServerConfigGroup::from_ips_https( - &quad9_ips_v4(), - 443, - "dns.quad9.net".to_string(), - true, - ); - name_servers.merge(NameServerConfigGroup::from_ips_tls( - &quad9_ips_v4(), - 853, - "dns.quad9.net".to_string(), - true, - )); - name_servers.merge(NameServerConfigGroup::from_ips_https( - &cloudflare_ips_v4(), - 443, - "cloudflare-dns.com".to_string(), - true, - )); - name_servers.merge(NameServerConfigGroup::from_ips_tls( - &cloudflare_ips_v4(), - 853, - "cloudflare-dns.com".to_string(), - true, - )); - name_servers -} - -fn default_nameserver_group_ipv6_only() -> NameServerConfigGroup { - let mut name_servers = NameServerConfigGroup::from_ips_https( - &quad9_ips_v6(), - 443, - "dns.quad9.net".to_string(), - true, - ); - name_servers.merge(NameServerConfigGroup::from_ips_tls( - &quad9_ips_v6(), - 853, - "dns.quad9.net".to_string(), - true, - )); - name_servers.merge(NameServerConfigGroup::from_ips_https( - &cloudflare_ips_v6(), - 443, - "cloudflare-dns.com".to_string(), - true, - )); - name_servers.merge(NameServerConfigGroup::from_ips_tls( - &cloudflare_ips_v6(), - 853, - "cloudflare-dns.com".to_string(), - true, - )); - name_servers -} - -fn cloudflare_ips_v4() -> Vec { - hickory_resolver::config::CLOUDFLARE_IPS +fn filter_ipv4(nameservers: impl AsRef<[NameServerConfig]>) -> Vec { + nameservers + .as_ref() .iter() - .filter(|ip| ip.is_ipv4()) + .filter(|ns| ns.socket_addr.is_ipv4()) .cloned() .collect() } -fn cloudflare_ips_v6() -> Vec { - hickory_resolver::config::CLOUDFLARE_IPS +fn filter_ipv6(nameservers: impl AsRef<[NameServerConfig]>) -> Vec { + nameservers + .as_ref() .iter() - .filter(|ip| ip.is_ipv6()) + .filter(|ns| ns.socket_addr.is_ipv6()) .cloned() .collect() } -fn quad9_ips_v4() -> Vec { - hickory_resolver::config::QUAD9_IPS - .iter() - .filter(|ip| ip.is_ipv4()) - .cloned() - .collect() +fn default_nameserver_group_ipv4_only() -> NsConfigGroupWithStatus { + filter_ipv4(default_nameserver_group().nameserver_configs()).into() } -fn quad9_ips_v6() -> Vec { - hickory_resolver::config::QUAD9_IPS - .iter() - .filter(|ip| ip.is_ipv6()) - .cloned() - .collect() +fn default_nameserver_group_ipv6_only() -> NsConfigGroupWithStatus { + filter_ipv6(default_nameserver_group().nameserver_configs()).into() } /// Create a new resolver with the default configuration, which reads from the system DNS config @@ -905,7 +901,7 @@ mod test { // Make sure the nameserver group for Ipv4 is really IPv4 only let ns_group = default_nameserver_group_ipv4_only(); let addrs: Vec = ns_group - .into_inner() + .into_ns_group() .iter() .map(|cfg| cfg.socket_addr.ip()) .collect(); @@ -914,7 +910,7 @@ mod test { // Make sure the nameserver group for Ipv6 is really IPv6 only let ns_group = default_nameserver_group_ipv6_only(); let addrs: Vec = ns_group - .into_inner() + .into_ns_group() .iter() .map(|cfg| cfg.socket_addr.ip()) .collect(); @@ -1188,7 +1184,7 @@ mod failure_test { state: Arc::new(OnceCell::with_value(inner)), static_base: Some(Default::default()), overall_dns_timeout: Duration::from_secs(5), - default_nameserver_config_group: broken_ns_group, + default_nameserver_config_group: broken_ns_group.into(), ..Default::default() }; @@ -1219,7 +1215,7 @@ mod failure_test { dont_use_shared: true, state: Arc::new(OnceCell::with_value(inner)), static_base: Some(Default::default()), - default_nameserver_config_group: broken_ns_https, + default_nameserver_config_group: broken_ns_https.into(), ..Default::default() };