From aa7e27aedeea90bffe3703b2674d4e8ac8556e15 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 23 Oct 2025 14:47:29 -0400 Subject: [PATCH 1/3] Clean up all explict DataLoader usage --- docs/src/en/getting_started/quickstart.md | 17 ++++++----------- examples/plugin_cache/s3fifo.py | 9 ++++----- examples/trace_analysis.py | 6 ++---- src/exception.cpp | 4 ++-- src/libCacheSim | 2 +- 5 files changed, 15 insertions(+), 23 deletions(-) diff --git a/docs/src/en/getting_started/quickstart.md b/docs/src/en/getting_started/quickstart.md index 3e7771a..c3ddaa6 100644 --- a/docs/src/en/getting_started/quickstart.md +++ b/docs/src/en/getting_started/quickstart.md @@ -91,11 +91,10 @@ With libcachesim installed, you can start cache simulation for some eviction alg The above example demonstrates the basic workflow of using `libcachesim` for cache simulation: -1. Use `DataLoader` to download a cache trace file from an S3 bucket. -2. Open and efficiently process the trace file with `TraceReader`. -3. Initialize a cache object (here, `S3FIFO`) with a specified cache size (e.g., 1MB). -4. Run the simulation on the entire trace using `process_trace` to obtain object and byte miss ratios. -5. Optionally, process only a portion of the trace by specifying `start_req` and `max_req` for partial simulation. +1. Open and efficiently process the trace file with `TraceReader`. +2. Initialize a cache object (here, `S3FIFO`) with a specified cache size (e.g., 1MB). +3. Run the simulation on the entire trace using `process_trace` to obtain object and byte miss ratios. +4. Optionally, process only a portion of the trace by specifying `start_req` and `max_req` for partial simulation. This workflow applies to most cache algorithms and trace types, making it easy to get started and customize your experiments. @@ -108,10 +107,7 @@ Here is an example demonstrating how to use `TraceAnalyzer`. import libcachesim as lcs # Step 1: Get one trace from S3 bucket - URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" - dl = lcs.DataLoader() - dl.load(URI) - + URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" reader = lcs.TraceReader( trace = dl.get_cache_path(URI), trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE, @@ -143,8 +139,7 @@ Here is an example demonstrating how to use `TraceAnalyzer`. The above code demonstrates how to perform trace analysis using `libcachesim`. The workflow is as follows: -1. Download a trace file from an S3 bucket using `DataLoader`. -2. Open the trace file with `TraceReader`, specifying the trace type and any reader initialization parameters. +1. Open the trace file with `TraceReader`, specifying the trace type and any reader initialization parameters. The URI starting with `s3://`, will download a trace file from an S3 bucket. 3. Configure the analysis options with `AnalysisOption` to enable or disable specific analyses (such as request rate, size, etc.). 4. Optionally, set additional analysis parameters with `AnalysisParam`. 5. Create a `TraceAnalyzer` object with the reader, output directory, and the chosen options and parameters. diff --git a/examples/plugin_cache/s3fifo.py b/examples/plugin_cache/s3fifo.py index aa1fcdf..0176983 100644 --- a/examples/plugin_cache/s3fifo.py +++ b/examples/plugin_cache/s3fifo.py @@ -193,17 +193,16 @@ def cache_free_hook(cache): cache_name="S3FIFO", ) -URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" -dl = lcs.DataLoader() -dl.load(URI) +URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" -# Step 2: Open trace and process efficiently +# Open trace reader = lcs.TraceReader( - trace=dl.get_cache_path(URI), + trace=URI, trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, reader_init_params=lcs.ReaderInitParam(ignore_obj_size=True), ) +# Use native S3FIFO for reference ref_s3fifo = S3FIFO(cache_size=1024, small_size_ratio=0.1, ghost_size_ratio=0.9, move_to_main_threshold=2) # for req in reader: diff --git a/examples/trace_analysis.py b/examples/trace_analysis.py index 0318171..49fb300 100644 --- a/examples/trace_analysis.py +++ b/examples/trace_analysis.py @@ -1,12 +1,10 @@ import libcachesim as lcs # Step 1: Get one trace from S3 bucket -URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" -dl = lcs.DataLoader() -dl.load(URI) +URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" reader = lcs.TraceReader( - trace=dl.get_cache_path(URI), + trace=URI, trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, reader_init_params=lcs.ReaderInitParam(ignore_obj_size=False), ) diff --git a/src/exception.cpp b/src/exception.cpp index de3195e..078d9c4 100644 --- a/src/exception.cpp +++ b/src/exception.cpp @@ -21,9 +21,9 @@ void register_exception(py::module& m) { try { if (p) std::rethrow_exception(p); } catch (const CacheException& e) { - exc_cache(e.what()); + py::set_error(exc_cache, e.what()); } catch (const ReaderException& e) { - exc_reader(e.what()); + py::set_error(exc_reader, e.what()); } }); diff --git a/src/libCacheSim b/src/libCacheSim index 06bddc5..91f703a 160000 --- a/src/libCacheSim +++ b/src/libCacheSim @@ -1 +1 @@ -Subproject commit 06bddc566194d58931feb26d343ea2111c20860f +Subproject commit 91f703a0bb9bcb728cf48ae9f1df03a8d096db21 From 15e8979873f9503e20a7beced4cff4370210a8fd Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 23 Oct 2025 15:00:04 -0400 Subject: [PATCH 2/3] Combine translator --- docs/src/en/getting_started/quickstart.md | 10 +++++----- src/exception.cpp | 17 +++++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/src/en/getting_started/quickstart.md b/docs/src/en/getting_started/quickstart.md index c3ddaa6..c3f9f63 100644 --- a/docs/src/en/getting_started/quickstart.md +++ b/docs/src/en/getting_started/quickstart.md @@ -109,7 +109,7 @@ Here is an example demonstrating how to use `TraceAnalyzer`. # Step 1: Get one trace from S3 bucket URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" reader = lcs.TraceReader( - trace = dl.get_cache_path(URI), + trace = URI, trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE, reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False) ) @@ -140,10 +140,10 @@ Here is an example demonstrating how to use `TraceAnalyzer`. The above code demonstrates how to perform trace analysis using `libcachesim`. The workflow is as follows: 1. Open the trace file with `TraceReader`, specifying the trace type and any reader initialization parameters. The URI starting with `s3://`, will download a trace file from an S3 bucket. -3. Configure the analysis options with `AnalysisOption` to enable or disable specific analyses (such as request rate, size, etc.). -4. Optionally, set additional analysis parameters with `AnalysisParam`. -5. Create a `TraceAnalyzer` object with the reader, output directory, and the chosen options and parameters. -6. Run the analysis with `analyzer.run()`. +2. Configure the analysis options with `AnalysisOption` to enable or disable specific analyses (such as request rate, size, etc.). +3. Optionally, set additional analysis parameters with `AnalysisParam`. +4. Create a `TraceAnalyzer` object with the reader, output directory, and the chosen options and parameters. +5. Run the analysis with `analyzer.run()`. After running, you can access the analysis results, such as summary statistics (`stat`) or detailed results (e.g., `example_analysis.size`). diff --git a/src/exception.cpp b/src/exception.cpp index 078d9c4..33698f5 100644 --- a/src/exception.cpp +++ b/src/exception.cpp @@ -17,36 +17,41 @@ void register_exception(py::module& m) { static py::exception exc_cache(m, "CacheException"); static py::exception exc_reader(m, "ReaderException"); + // Single exception translator with catch blocks ordered from most-specific to least-specific py::register_exception_translator([](std::exception_ptr p) { try { if (p) std::rethrow_exception(p); } catch (const CacheException& e) { + // Custom exception: CacheException py::set_error(exc_cache, e.what()); } catch (const ReaderException& e) { + // Custom exception: ReaderException py::set_error(exc_reader, e.what()); - } - }); - - py::register_exception_translator([](std::exception_ptr p) { - try { - if (p) std::rethrow_exception(p); } catch (const std::bad_alloc& e) { + // Memory allocation error PyErr_SetString(PyExc_MemoryError, e.what()); } catch (const std::invalid_argument& e) { + // Invalid argument error PyErr_SetString(PyExc_ValueError, e.what()); } catch (const std::out_of_range& e) { + // Out of range error PyErr_SetString(PyExc_IndexError, e.what()); } catch (const std::domain_error& e) { + // Domain error PyErr_SetString(PyExc_ValueError, ("Domain error: " + std::string(e.what())).c_str()); } catch (const std::overflow_error& e) { + // Overflow error PyErr_SetString(PyExc_OverflowError, e.what()); } catch (const std::range_error& e) { + // Range error PyErr_SetString(PyExc_ValueError, ("Range error: " + std::string(e.what())).c_str()); } catch (const std::runtime_error& e) { + // Generic runtime error PyErr_SetString(PyExc_RuntimeError, e.what()); } catch (const std::exception& e) { + // Catch-all for any other std::exception PyErr_SetString(PyExc_RuntimeError, ("C++ exception: " + std::string(e.what())).c_str()); } From 730a60e46208d052af5572e37b49f11da6a25778 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 23 Oct 2025 15:03:06 -0400 Subject: [PATCH 3/3] Fix badge link of MKDocs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 527106e..201dc52 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # libCacheSim Python Binding [![Build](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml/badge.svg)](https://github.com/cacheMon/libCacheSim-python/actions/workflows/build.yml) -[![Documentation](https://github.com/cacheMon/libCacheSim-python/actions/workflows/docs.yml/badge.svg)](docs.libcachesim.com/python) +[![Documentation](https://github.com/cacheMon/libCacheSim-python/actions/workflows/docs.yml/badge.svg)]([docs.libcachesim.com/python](https://github.com/cacheMon/libCacheSim-python/actions/workflows/docs.yml)) libCacheSim is fast with the features from [underlying libCacheSim lib](https://github.com/1a1a11a/libCacheSim):