|
18 | 18 | use std::collections::HashMap; |
19 | 19 | use std::fmt::Display; |
20 | 20 | use std::str::FromStr; |
| 21 | +use std::sync::Arc; |
21 | 22 |
|
22 | | -use opendal::Configurator; |
| 23 | +use async_trait::async_trait; |
| 24 | +use bytes::Bytes; |
23 | 25 | use opendal::services::AzdlsConfig; |
| 26 | +use opendal::{Configurator, Operator}; |
24 | 27 | use url::Url; |
25 | 28 |
|
| 29 | +use crate::io::{ |
| 30 | + Extensions, FileMetadata, FileRead, FileWrite, InputFile, OutputFile, Storage, |
| 31 | + StorageBuilder, |
| 32 | +}; |
26 | 33 | use crate::{Error, ErrorKind, Result, ensure_data_valid}; |
27 | 34 |
|
28 | 35 | /// A connection string. |
@@ -125,7 +132,7 @@ pub(crate) fn azdls_create_operator<'a>( |
125 | 132 | /// paths are expected to contain the `dfs` storage service. |
126 | 133 | /// - `wasb[s]` is used to refer to files in Blob Storage directly; paths are |
127 | 134 | /// expected to contain the `blob` storage service. |
128 | | -#[derive(Debug, PartialEq)] |
| 135 | +#[derive(Debug, Clone, PartialEq)] |
129 | 136 | pub(crate) enum AzureStorageScheme { |
130 | 137 | Abfs, |
131 | 138 | Abfss, |
@@ -597,3 +604,116 @@ mod tests { |
597 | 604 | } |
598 | 605 | } |
599 | 606 | } |
| 607 | + |
| 608 | +/// Azure Data Lake Storage implementation using OpenDAL |
| 609 | +#[derive(Debug, Clone)] |
| 610 | +pub struct OpenDALAzdlsStorage { |
| 611 | + /// Because Azdls accepts multiple possible schemes, we store the full |
| 612 | + /// passed scheme here to later validate schemes passed via paths. |
| 613 | + configured_scheme: AzureStorageScheme, |
| 614 | + config: Arc<AzdlsConfig>, |
| 615 | +} |
| 616 | + |
| 617 | +impl OpenDALAzdlsStorage { |
| 618 | + /// Creates operator from path. |
| 619 | + fn create_operator<'a>(&self, path: &'a str) -> Result<(Operator, &'a str)> { |
| 620 | + let (op, relative_path) = azdls_create_operator(path, &self.config, &self.configured_scheme)?; |
| 621 | + let op = op.layer(opendal::layers::RetryLayer::new()); |
| 622 | + Ok((op, relative_path)) |
| 623 | + } |
| 624 | +} |
| 625 | + |
| 626 | +#[async_trait] |
| 627 | +impl Storage for OpenDALAzdlsStorage { |
| 628 | + async fn exists(&self, path: &str) -> Result<bool> { |
| 629 | + let (op, relative_path) = self.create_operator(path)?; |
| 630 | + Ok(op.exists(relative_path).await?) |
| 631 | + } |
| 632 | + |
| 633 | + async fn metadata(&self, path: &str) -> Result<FileMetadata> { |
| 634 | + let (op, relative_path) = self.create_operator(path)?; |
| 635 | + let meta = op.stat(relative_path).await?; |
| 636 | + Ok(FileMetadata { |
| 637 | + size: meta.content_length(), |
| 638 | + }) |
| 639 | + } |
| 640 | + |
| 641 | + async fn read(&self, path: &str) -> Result<Bytes> { |
| 642 | + let (op, relative_path) = self.create_operator(path)?; |
| 643 | + Ok(op.read(relative_path).await?.to_bytes()) |
| 644 | + } |
| 645 | + |
| 646 | + async fn reader(&self, path: &str) -> Result<Box<dyn FileRead>> { |
| 647 | + let (op, relative_path) = self.create_operator(path)?; |
| 648 | + Ok(Box::new(op.reader(relative_path).await?)) |
| 649 | + } |
| 650 | + |
| 651 | + async fn write(&self, path: &str, bs: Bytes) -> Result<()> { |
| 652 | + let mut writer = self.writer(path).await?; |
| 653 | + writer.write(bs).await?; |
| 654 | + writer.close().await |
| 655 | + } |
| 656 | + |
| 657 | + async fn writer(&self, path: &str) -> Result<Box<dyn FileWrite>> { |
| 658 | + let (op, relative_path) = self.create_operator(path)?; |
| 659 | + Ok(Box::new(op.writer(relative_path).await?)) |
| 660 | + } |
| 661 | + |
| 662 | + async fn delete(&self, path: &str) -> Result<()> { |
| 663 | + let (op, relative_path) = self.create_operator(path)?; |
| 664 | + Ok(op.delete(relative_path).await?) |
| 665 | + } |
| 666 | + |
| 667 | + async fn remove_dir_all(&self, path: &str) -> Result<()> { |
| 668 | + let (op, relative_path) = self.create_operator(path)?; |
| 669 | + let path = if relative_path.ends_with('/') { |
| 670 | + relative_path.to_string() |
| 671 | + } else { |
| 672 | + format!("{relative_path}/") |
| 673 | + }; |
| 674 | + Ok(op.remove_all(&path).await?) |
| 675 | + } |
| 676 | + |
| 677 | + fn new_input(&self, path: &str) -> Result<InputFile> { |
| 678 | + Ok(InputFile { |
| 679 | + storage: Arc::new(self.clone()), |
| 680 | + path: path.to_string(), |
| 681 | + }) |
| 682 | + } |
| 683 | + |
| 684 | + fn new_output(&self, path: &str) -> Result<OutputFile> { |
| 685 | + Ok(OutputFile { |
| 686 | + storage: Arc::new(self.clone()), |
| 687 | + path: path.to_string(), |
| 688 | + }) |
| 689 | + } |
| 690 | +} |
| 691 | + |
| 692 | +/// Builder for Azure Data Lake Storage |
| 693 | +#[derive(Debug)] |
| 694 | +pub struct OpenDALAzdlsStorageBuilder; |
| 695 | + |
| 696 | +impl StorageBuilder for OpenDALAzdlsStorageBuilder { |
| 697 | + fn build( |
| 698 | + &self, |
| 699 | + props: HashMap<String, String>, |
| 700 | + _extensions: Extensions, |
| 701 | + ) -> Result<Arc<dyn Storage>> { |
| 702 | + // Get the scheme string from the props or use default |
| 703 | + let scheme_str = props |
| 704 | + .get("scheme_str") |
| 705 | + .cloned() |
| 706 | + .unwrap_or_else(|| "abfs".to_string()); |
| 707 | + |
| 708 | + // Parse the scheme |
| 709 | + let scheme = scheme_str.parse::<AzureStorageScheme>()?; |
| 710 | + |
| 711 | + // Parse Azdls config from props |
| 712 | + let config = azdls_config_parse(props)?; |
| 713 | + |
| 714 | + Ok(Arc::new(OpenDALAzdlsStorage { |
| 715 | + configured_scheme: scheme, |
| 716 | + config: Arc::new(config), |
| 717 | + })) |
| 718 | + } |
| 719 | +} |
0 commit comments