|
| 1 | +use std::ops::Deref; |
| 2 | +use std::result::Result as StdResult; |
| 3 | +use std::sync::LazyLock; |
| 4 | + |
| 5 | +use mlua::{Lua, MetaMethod, Result, String as LuaString, Table, UserData, UserDataMethods, Value, Variadic}; |
| 6 | +use ouroboros::self_referencing; |
| 7 | +use quick_cache::sync::Cache; |
| 8 | + |
| 9 | +// A reasonable cache size for regexes. This can be adjusted as needed. |
| 10 | +const REGEX_CACHE_SIZE: usize = 256; |
| 11 | + |
| 12 | +#[derive(Clone, Debug)] |
| 13 | +pub struct Regex(regex::bytes::Regex); |
| 14 | + |
| 15 | +impl Deref for Regex { |
| 16 | + type Target = regex::bytes::Regex; |
| 17 | + |
| 18 | + #[inline] |
| 19 | + fn deref(&self) -> &Self::Target { |
| 20 | + &self.0 |
| 21 | + } |
| 22 | +} |
| 23 | + |
| 24 | +// Global cache for regexes shared across all Lua states. |
| 25 | +static CACHE: LazyLock<Cache<String, Regex>> = LazyLock::new(|| Cache::new(REGEX_CACHE_SIZE)); |
| 26 | + |
| 27 | +impl Regex { |
| 28 | + /// Creates a new cached regex or retrieves it from the cache if it already exists. |
| 29 | + pub fn new(_: &Lua, re: &str) -> StdResult<Self, regex::Error> { |
| 30 | + if let Some(re) = CACHE.get(re) { |
| 31 | + return Ok(re); |
| 32 | + } |
| 33 | + let regex = Self(regex::bytes::Regex::new(&re)?); |
| 34 | + CACHE.insert(re.to_string(), regex.clone()); |
| 35 | + Ok(regex) |
| 36 | + } |
| 37 | +} |
| 38 | + |
| 39 | +impl UserData for Regex { |
| 40 | + fn register(registry: &mut mlua::UserDataRegistry<Self>) { |
| 41 | + registry.add_method("is_match", |_, this, text: LuaString| { |
| 42 | + Ok(this.0.is_match(&text.as_bytes())) |
| 43 | + }); |
| 44 | + |
| 45 | + registry.add_method("match", |lua, this, text: LuaString| { |
| 46 | + let text = (*text.as_bytes()).into(); |
| 47 | + let caps = Captures::try_new(text, |text| this.0.captures(text).ok_or(())); |
| 48 | + match caps { |
| 49 | + Ok(caps) => Ok(Value::UserData(lua.create_userdata(caps)?)), |
| 50 | + Err(_) => Ok(Value::Nil), |
| 51 | + } |
| 52 | + }); |
| 53 | + |
| 54 | + // Returns low level information about raw offsets of each submatch. |
| 55 | + registry.add_method("captures_read", |lua, this, text: LuaString| { |
| 56 | + let mut locs = this.capture_locations(); |
| 57 | + match this.captures_read(&mut locs, &text.as_bytes()) { |
| 58 | + Some(_) => Ok(Value::UserData(lua.create_userdata(CaptureLocations(locs))?)), |
| 59 | + None => Ok(Value::Nil), |
| 60 | + } |
| 61 | + }); |
| 62 | + |
| 63 | + registry.add_method("split", |lua, this, text: LuaString| { |
| 64 | + lua.create_sequence_from(this.split(&text.as_bytes()).map(LuaString::wrap)) |
| 65 | + }); |
| 66 | + |
| 67 | + registry.add_method("splitn", |lua, this, (text, limit): (LuaString, usize)| { |
| 68 | + lua.create_sequence_from(this.splitn(&text.as_bytes(), limit).map(LuaString::wrap)) |
| 69 | + }); |
| 70 | + |
| 71 | + registry.add_method("replace", |lua, this, (text, rep): (LuaString, LuaString)| { |
| 72 | + lua.create_string(this.replace(&text.as_bytes(), &*rep.as_bytes())) |
| 73 | + }); |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +#[self_referencing] |
| 78 | +struct Captures { |
| 79 | + text: Box<[u8]>, |
| 80 | + |
| 81 | + #[borrows(text)] |
| 82 | + #[covariant] |
| 83 | + caps: regex::bytes::Captures<'this>, |
| 84 | +} |
| 85 | + |
| 86 | +impl UserData for Captures { |
| 87 | + fn register(registry: &mut mlua::UserDataRegistry<Self>) { |
| 88 | + registry.add_meta_method(MetaMethod::Index, |lua, this, key: Value| match key { |
| 89 | + Value::String(s) => { |
| 90 | + let name = s.to_string_lossy(); |
| 91 | + this.borrow_caps() |
| 92 | + .name(&name) |
| 93 | + .map(|v| lua.create_string(v.as_bytes())) |
| 94 | + .transpose() |
| 95 | + } |
| 96 | + Value::Integer(i) => this |
| 97 | + .borrow_caps() |
| 98 | + .get(i as usize) |
| 99 | + .map(|v| lua.create_string(v.as_bytes())) |
| 100 | + .transpose(), |
| 101 | + _ => Ok(None), |
| 102 | + }) |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +struct CaptureLocations(regex::bytes::CaptureLocations); |
| 107 | + |
| 108 | +impl UserData for CaptureLocations { |
| 109 | + fn register(registry: &mut mlua::UserDataRegistry<Self>) { |
| 110 | + // Returns the total number of capture groups. |
| 111 | + registry.add_method("len", |_, this, ()| Ok(this.0.len())); |
| 112 | + |
| 113 | + // Returns the start and end positions of the Nth capture group. |
| 114 | + registry.add_method("get", |_, this, i: usize| match this.0.get(i) { |
| 115 | + // We add 1 to the start position because Lua is 1-indexed. |
| 116 | + // End position is non-inclusive, so we don't need to add 1. |
| 117 | + Some((start, end)) => Ok(Variadic::from_iter([start + 1, end])), |
| 118 | + None => Ok(Variadic::new()), |
| 119 | + }); |
| 120 | + } |
| 121 | +} |
| 122 | + |
| 123 | +struct RegexSet(regex::bytes::RegexSet); |
| 124 | + |
| 125 | +impl Deref for RegexSet { |
| 126 | + type Target = regex::bytes::RegexSet; |
| 127 | + |
| 128 | + #[inline] |
| 129 | + fn deref(&self) -> &Self::Target { |
| 130 | + &self.0 |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +impl UserData for RegexSet { |
| 135 | + fn register(registry: &mut mlua::UserDataRegistry<Self>) { |
| 136 | + registry.add_function("new", |_, patterns: Vec<String>| { |
| 137 | + let set = lua_try!(regex::bytes::RegexSet::new(patterns).map(RegexSet)); |
| 138 | + Ok(Ok(set)) |
| 139 | + }); |
| 140 | + |
| 141 | + registry.add_method("is_match", |_, this, text: LuaString| { |
| 142 | + Ok(this.is_match(&text.as_bytes())) |
| 143 | + }); |
| 144 | + |
| 145 | + registry.add_method("len", |_, this, ()| Ok(this.len())); |
| 146 | + |
| 147 | + registry.add_method("matches", |_, this, text: LuaString| { |
| 148 | + Ok(this |
| 149 | + .matches(&text.as_bytes()) |
| 150 | + .iter() |
| 151 | + .map(|i| i + 1) |
| 152 | + .collect::<Vec<_>>()) |
| 153 | + }); |
| 154 | + } |
| 155 | +} |
| 156 | + |
| 157 | +/// Compiles a regular expression. |
| 158 | +/// |
| 159 | +/// Once compiled, it can be used repeatedly to search, split or replace substrings in a text. |
| 160 | +fn regex_new(lua: &Lua, re: LuaString) -> Result<StdResult<Regex, String>> { |
| 161 | + let re = re.to_str()?; |
| 162 | + Ok(Ok(lua_try!(Regex::new(lua, &re)))) |
| 163 | +} |
| 164 | + |
| 165 | +/// Escapes a string so that it can be used as a literal in a regular expression. |
| 166 | +fn regex_escape(_: &Lua, text: LuaString) -> Result<String> { |
| 167 | + Ok(regex::escape(&text.to_str()?)) |
| 168 | +} |
| 169 | + |
| 170 | +/// Returns true if there is a match for the regex anywhere in the given text. |
| 171 | +fn regex_is_match(lua: &Lua, (re, text): (LuaString, LuaString)) -> Result<StdResult<bool, String>> { |
| 172 | + let re = re.to_str()?; |
| 173 | + let re = lua_try!(Regex::new(lua, &re)); |
| 174 | + Ok(Ok(re.is_match(&text.as_bytes()))) |
| 175 | +} |
| 176 | + |
| 177 | +/// Returns all matches of the regex in the given text or nil if there is no match. |
| 178 | +fn regex_match(lua: &Lua, (re, text): (LuaString, LuaString)) -> Result<StdResult<Value, String>> { |
| 179 | + let re = re.to_str()?; |
| 180 | + let re = lua_try!(Regex::new(lua, &re)); |
| 181 | + match re.captures(&text.as_bytes()) { |
| 182 | + Some(caps) => { |
| 183 | + let mut it = caps.iter().map(|om| om.map(|m| LuaString::wrap(m.as_bytes()))); |
| 184 | + let first = it.next().unwrap(); |
| 185 | + let table = lua.create_sequence_from(it)?; |
| 186 | + table.raw_set(0, first)?; |
| 187 | + Ok(Ok(Value::Table(table))) |
| 188 | + } |
| 189 | + None => Ok(Ok(Value::Nil)), |
| 190 | + } |
| 191 | +} |
| 192 | + |
| 193 | +/// A loader for the `regex` module. |
| 194 | +fn loader(lua: &Lua) -> Result<Table> { |
| 195 | + let t = lua.create_table()?; |
| 196 | + t.set("new", lua.create_function(regex_new)?)?; |
| 197 | + t.set("escape", lua.create_function(regex_escape)?)?; |
| 198 | + t.set("is_match", lua.create_function(regex_is_match)?)?; |
| 199 | + t.set("match", lua.create_function(regex_match)?)?; |
| 200 | + t.set("RegexSet", lua.create_proxy::<RegexSet>()?)?; |
| 201 | + Ok(t) |
| 202 | +} |
| 203 | + |
| 204 | +/// Registers the `regex` module in the given Lua state. |
| 205 | +pub fn register(lua: &Lua, name: Option<&str>) -> Result<Table> { |
| 206 | + let name = name.unwrap_or("@regex"); |
| 207 | + let value = loader(lua)?; |
| 208 | + lua.register_module(name, &value)?; |
| 209 | + Ok(value) |
| 210 | +} |
0 commit comments