|
| 1 | +use std::sync::Arc; |
| 2 | + |
| 3 | +use diesel::PgConnection; |
| 4 | +use thiserror::Error; |
| 5 | +use typomania::{ |
| 6 | + checks::{Bitflips, Omitted, SwappedWords, Typos}, |
| 7 | + Harness, |
| 8 | +}; |
| 9 | + |
| 10 | +use super::{config, database::TopCrates}; |
| 11 | + |
| 12 | +static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS"; |
| 13 | + |
| 14 | +/// A cache containing everything we need to run typosquatting checks. |
| 15 | +/// |
| 16 | +/// Specifically, this includes a corpus of popular crates attached to a typomania harness, and a |
| 17 | +/// list of e-mail addresses that we'll send notifications to if potential typosquatting is |
| 18 | +/// discovered. |
| 19 | +pub struct Cache { |
| 20 | + emails: Vec<String>, |
| 21 | + harness: Option<Harness<TopCrates>>, |
| 22 | +} |
| 23 | + |
| 24 | +impl Cache { |
| 25 | + /// Instantiates a new [`Cache`] from the environment. |
| 26 | + /// |
| 27 | + /// This reads the `NOTIFICATION_EMAILS_ENV` environment variable to get the list of e-mail |
| 28 | + /// addresses to send notifications to, then invokes [`Cache::new`] to read popular crates from |
| 29 | + /// the database. |
| 30 | + #[instrument(skip_all, err)] |
| 31 | + pub fn from_env(conn: &mut PgConnection) -> Result<Self, Error> { |
| 32 | + let emails: Vec<String> = crates_io_env_vars::var(NOTIFICATION_EMAILS_ENV) |
| 33 | + .map_err(|e| Error::Environment { |
| 34 | + name: NOTIFICATION_EMAILS_ENV.into(), |
| 35 | + source: Arc::new(e), |
| 36 | + })? |
| 37 | + .unwrap_or_default() |
| 38 | + .split(',') |
| 39 | + .map(|s| s.trim().to_owned()) |
| 40 | + .filter(|s| !s.is_empty()) |
| 41 | + .collect(); |
| 42 | + |
| 43 | + if emails.is_empty() { |
| 44 | + // If we're not notifying anyone, then there's really not much to do here. |
| 45 | + warn!("$TYPOSQUAT_NOTIFICATION_EMAILS is not set; no typosquatting notifications will be sent"); |
| 46 | + Ok(Self { |
| 47 | + emails, |
| 48 | + harness: None, |
| 49 | + }) |
| 50 | + } else { |
| 51 | + // Otherwise, let's go get the top crates and build a corpus. |
| 52 | + Self::new(emails, conn) |
| 53 | + } |
| 54 | + } |
| 55 | + |
| 56 | + /// Instantiates a cache by querying popular crates and building them into a typomania harness. |
| 57 | + /// |
| 58 | + /// This relies on configuration in the `super::config` module. |
| 59 | + pub fn new(emails: Vec<String>, conn: &mut PgConnection) -> Result<Self, Error> { |
| 60 | + let top = TopCrates::new(conn, config::TOP_CRATES)?; |
| 61 | + |
| 62 | + Ok(Self { |
| 63 | + emails, |
| 64 | + harness: Some( |
| 65 | + Harness::builder() |
| 66 | + .with_check(Bitflips::new( |
| 67 | + config::CRATE_NAME_ALPHABET, |
| 68 | + top.crates.keys().map(String::as_str), |
| 69 | + )) |
| 70 | + .with_check(Omitted::new(config::CRATE_NAME_ALPHABET)) |
| 71 | + .with_check(SwappedWords::new("-_")) |
| 72 | + .with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| { |
| 73 | + (*c, typos.iter().map(|ss| ss.to_string()).collect()) |
| 74 | + }))) |
| 75 | + .build(top), |
| 76 | + ), |
| 77 | + }) |
| 78 | + } |
| 79 | + |
| 80 | + pub fn get_harness(&self) -> Option<&Harness<TopCrates>> { |
| 81 | + self.harness.as_ref() |
| 82 | + } |
| 83 | + |
| 84 | + pub fn iter_emails(&self) -> impl Iterator<Item = &str> { |
| 85 | + self.emails.iter().map(String::as_str) |
| 86 | + } |
| 87 | +} |
| 88 | + |
| 89 | +// Because the error returned from Cache::new() gets memoised in the environment, we either need to |
| 90 | +// return it by reference from Environment::typosquat_cache() or we need to be able to clone it. |
| 91 | +// We'll do some Arc wrapping in the variants below to ensure that everything is clonable while not |
| 92 | +// destroying the source metadata. |
| 93 | +#[derive(Error, Debug, Clone)] |
| 94 | +pub enum Error { |
| 95 | + #[error("error reading environment variable {name}: {source:?}")] |
| 96 | + Environment { |
| 97 | + name: String, |
| 98 | + #[source] |
| 99 | + source: Arc<anyhow::Error>, |
| 100 | + }, |
| 101 | + |
| 102 | + #[error("error getting top crates: {0:?}")] |
| 103 | + TopCrates(#[source] Arc<diesel::result::Error>), |
| 104 | +} |
| 105 | + |
| 106 | +impl From<diesel::result::Error> for Error { |
| 107 | + fn from(value: diesel::result::Error) -> Self { |
| 108 | + Self::TopCrates(Arc::new(value)) |
| 109 | + } |
| 110 | +} |
0 commit comments