11# An example of plugin for s3fifo
2+
3+ # NOTE(haocheng): the one shows that with plugin system, we can make cache as lego blocks
4+ # Happy caching!
5+
6+ import libcachesim as lcs
27from collections import OrderedDict
3- from libcachesim import PluginCache , CommonCacheParams , Request , S3FIFO , SyntheticReader
8+ from collections import deque
9+ from libcachesim import PluginCache , CommonCacheParams , Request , S3FIFO , FIFO , SyntheticReader
410
511# NOTE(haocheng): we only support ignore object size for now
612class StandaloneS3FIFO :
@@ -9,160 +15,157 @@ def __init__(self,
915 ghost_size_ratio : float = 0.9 ,
1016 move_to_main_threshold : int = 2 ,
1117 cache_size : int = 1024 ):
12- # S3-FIFO uses three queues with OrderedDict for O(1) operations
13- self .small_fifo = OrderedDict ()
14- self .main_fifo = OrderedDict ()
15- self .ghost_fifo = OrderedDict ()
16-
17- # Size limits
18- self .small_max_size = int (small_size_ratio * cache_size )
19- self .main_max_size = int (cache_size - small_size_ratio * cache_size )
20- self .ghost_max_size = int (ghost_size_ratio * cache_size )
18+ self .cache_size = cache_size
19+ small_fifo_size = int (small_size_ratio * cache_size )
20+ main_fifo_size = cache_size - small_fifo_size
21+ ghost_fifo_size = int (ghost_size_ratio * cache_size )
22+
23+ self .small_set = set ()
24+ self .main_set = set ()
25+ self .ghost_set = deque (maxlen = ghost_fifo_size )
26+
27+ self .small_fifo = FIFO (small_fifo_size )
28+ self .main_fifo = FIFO (main_fifo_size )
29+ self .ghost_fifo = FIFO (ghost_fifo_size )
2130
2231 # Frequency tracking
23- self .small_freq = {}
24- self .main_freq = {}
25- self .ghost_freq = {}
32+ self .freq = {}
2633
2734 # Other parameters
2835 self .max_freq = 3
2936 self .move_to_main_threshold = move_to_main_threshold
3037
31- def cache_hit (self , obj_id ):
32- """
33- Cache hit can happen in two cases:
34- 1. Small FIFO cache hit (small_fifo)
35- 2. Main FIFO cache hit (main_fifo)
36- """
37- if obj_id in self .main_fifo :
38- self .main_freq [obj_id ] += 1
39- elif obj_id in self .small_fifo :
40- self .small_freq [obj_id ] += 1
41- else :
42- print (f"Cache hit for obj_id { obj_id } but not found in any queue" )
43- print (f"small_fifo: { list (self .small_fifo .keys ())} " )
44- print (f"main_fifo: { list (self .main_fifo .keys ())} " )
45- print (f"ghost_fifo: { list (self .ghost_fifo .keys ())} " )
46- assert False , "Cache hit should happen in small_fifo or main_fifo"
38+ self .has_evicted = False # Mark if we start to evict, only after full we will start eviction
39+ self .hit_on_ghost = False
40+
41+ def cache_hit (self , req : Request ):
42+ hit_small = False
43+ hit_main = False
44+ if self .small_fifo .find (req , update_cache = False ):
45+ self .freq [req .obj_id ] += 1
46+
47+ if self .main_fifo .find (req , update_cache = False ):
48+ self .freq [req .obj_id ] += 1
4749
48- def cache_miss (self , obj_id , obj_size = 1 ):
49- """
50- Cache miss can happen in three cases:
51- 1. Miss in small and main but hit in ghost
52- 2. Miss all three queues
53- """
54- if obj_id in self .ghost_fifo :
55- del self .ghost_fifo [obj_id ]
56- del self .ghost_freq [obj_id ]
57- self .insert_to_main (obj_id )
50+ def cache_miss (self , req : Request ):
51+ if not self .hit_on_ghost :
52+ obj = self .ghost_fifo .find (req , update_cache = False )
53+ if obj is not None :
54+ self .hit_on_ghost = True
55+ # remove from ghost set
56+ self .ghost_fifo .remove (req .obj_id )
57+ self .ghost_set .remove (req .obj_id )
58+
59+
60+ # NOTE(haocheng): first we need to know this miss object has record in ghost or not
61+ if not self .hit_on_ghost :
62+ if req .obj_size >= self .small_fifo .cache_size :
63+ # If object is too large, we do not process it
64+ return
65+
66+ # If is initialization state, we need to insert to small fifo,
67+ # then we can insert to main fifo
68+ if not self .has_evicted and self .small_fifo .get_occupied_byte () >= self .small_fifo .cache_size :
69+ obj = self .main_fifo .insert (req )
70+ self .main_set .add (obj .obj_id )
71+ else :
72+ obj = self .small_fifo .insert (req )
73+ self .small_set .add (obj .obj_id )
5874 else :
59- # Miss all three queues
60- cond = (obj_id not in self .small_fifo ) and (obj_id not in self .main_fifo )
61- assert cond , "Should not be in small_fifo or main_fifo"
62-
63- # Then we need to insert to small fifo queue
64- self .insert_to_small (obj_id )
65-
66- def insert_to_small (self , obj_id ):
67- if len (self .small_fifo ) >= self .small_max_size :
68- self .cache_evict_small ()
69- self .small_fifo [obj_id ] = None # OrderedDict value doesn't matter
70- self .small_freq [obj_id ] = 0
71-
72- def insert_to_main (self , obj_id ):
73- if len (self .main_fifo ) >= self .main_max_size :
74- self .cache_evict_main ()
75- self .main_fifo [obj_id ] = None
76- self .main_freq [obj_id ] = 0
77-
78- def insert_to_ghost (self , obj_id , original_freq = 0 ):
79- if len (self .ghost_fifo ) >= self .ghost_max_size :
80- # Remove oldest item
81- oldest_id = next (iter (self .ghost_fifo ))
82- del self .ghost_fifo [oldest_id ]
83- del self .ghost_freq [oldest_id ]
84- self .ghost_fifo [obj_id ] = None
85- self .ghost_freq [obj_id ] = original_freq
75+ obj = self .main_fifo .insert (req )
76+ self .main_set .add (req .obj_id )
77+ self .hit_on_ghost = False
78+ self .freq [obj .obj_id ] = 0
8679
87- def cache_evict_small (self ):
80+ def cache_evict_small (self , req : Request ):
8881 has_evicted = False
8982 evicted_id = None
90- while not has_evicted and len (self .small_fifo ) > 0 :
91- obj_to_evict = next (iter (self .small_fifo )) # Get first item
92- if self .small_freq [obj_to_evict ] >= self .move_to_main_threshold :
93- # Move to main fifo cache (not real evict, just move)
94- del self .small_fifo [obj_to_evict ]
95- del self .small_freq [obj_to_evict ]
96- self .insert_to_main (obj_to_evict )
83+ real_evicted_id = None
84+ while not has_evicted and self .small_fifo .get_occupied_byte () > 0 :
85+ obj_to_evict = self .small_fifo .to_evict (req )
86+ evicted_id = obj_to_evict .obj_id # Store the ID before any operations
87+ if self .freq [obj_to_evict .obj_id ] >= self .move_to_main_threshold :
88+ new_req = Request (obj_id = evicted_id , obj_size = 1 )
89+ self .main_fifo .insert (new_req )
90+ self .main_set .add (evicted_id )
91+ # Reset frequency
92+ self .freq [evicted_id ] = 0
9793 else :
98- evicted_id = obj_to_evict
99- # Insert to ghost fifo cache (real evict)
100- del self .small_fifo [obj_to_evict ]
101- del self .small_freq [obj_to_evict ]
102- self .insert_to_ghost (obj_to_evict )
94+ new_req = Request (obj_id = evicted_id , obj_size = 1 )
95+ self .ghost_fifo .get (new_req )
96+ self .ghost_set .append (evicted_id )
10397 has_evicted = True
104- return evicted_id
98+ real_evicted_id = evicted_id
99+ flag = self .small_fifo .remove (evicted_id )
100+ self .small_set .remove (evicted_id )
101+ assert flag , "Should be able to remove"
102+ return real_evicted_id
105103
106- def cache_evict_main (self ):
104+ def cache_evict_main (self , req : Request ):
107105 has_evicted = False
108106 evicted_id = None
109- while not has_evicted and len (self .main_fifo ) > 0 :
110- obj_to_evict = next (iter (self .main_fifo )) # Get first item
111- freq = self .main_freq [obj_to_evict ]
107+ while not has_evicted and self .main_fifo .get_occupied_byte () > 0 :
108+ obj_to_evict = self .main_fifo .to_evict (req )
109+ assert obj_to_evict is not None
110+ evicted_id = obj_to_evict .obj_id # Store the ID before any operations
111+ freq = self .freq [evicted_id ]
112112 if freq >= 1 :
113113 # Reinsert with decremented frequency
114- del self .main_fifo [obj_to_evict ]
115- del self .main_freq [obj_to_evict ]
116- self .insert_to_main (obj_to_evict )
117- self .main_freq [obj_to_evict ] = min (freq , self .max_freq ) - 1
114+ self .main_fifo .remove (evicted_id )
115+ self .main_set .remove (evicted_id )
116+ new_req = Request (obj_id = evicted_id , obj_size = 1 )
117+ self .main_fifo .insert (new_req )
118+ self .main_set .add (evicted_id )
119+ self .freq [evicted_id ] = min (freq , self .max_freq ) - 1
118120 else :
119- evicted_id = obj_to_evict
120- # Real eviction
121- del self .main_fifo [obj_to_evict ]
122- del self .main_freq [obj_to_evict ]
121+ flag = self .main_fifo .remove (evicted_id )
122+ self .main_set .remove (evicted_id )
123123 has_evicted = True
124+ # print(f"Evicted {evicted_id}")
124125 return evicted_id
125126
126- def cache_evict (self ):
127- evicted_id = None
128- # if main is full or small is empty, evict main
129- if len (self .main_fifo ) >= self .main_max_size or len (self .small_fifo ) == 0 :
130- evicted_id = self .cache_evict_main ()
131- # if small is not empty, evict small
127+ def cache_evict (self , req : Request ):
128+ if not self .hit_on_ghost :
129+ obj = self .ghost_fifo .find (req , update_cache = False )
130+ if obj is not None :
131+ self .hit_on_ghost = True
132+ # remove from ghost set
133+ self .ghost_fifo .remove (req .obj_id )
134+ self .ghost_set .remove (req .obj_id )
135+
136+ self .has_evicted = True
137+ cond = (self .main_fifo .get_occupied_byte () > self .main_fifo .cache_size )
138+ if (cond or (self .small_fifo .get_occupied_byte () == 0 )):
139+ obj_id = self .cache_evict_main (req )
132140 else :
133- evicted_id = self .cache_evict_small ()
134- if evicted_id is None :
135- assert False , "Should not be None"
136- return evicted_id
141+ obj_id = self .cache_evict_small (req )
142+
143+ if obj_id is not None :
144+ del self .freq [obj_id ]
145+
146+ return obj_id
137147
138148 def cache_remove (self , obj_id ):
139149 removed = False
140- if obj_id in self .small_fifo :
141- del self .small_fifo [obj_id ]
142- del self .small_freq [obj_id ]
143- removed = True
144- elif obj_id in self .ghost_fifo :
145- del self .ghost_fifo [obj_id ]
146- del self .ghost_freq [obj_id ]
147- removed = True
148- elif obj_id in self .main_fifo :
149- del self .main_fifo [obj_id ]
150- del self .main_freq [obj_id ]
151- removed = True
150+ removed |= self .small_fifo .remove (obj_id )
151+ removed |= self .ghost_fifo .remove (obj_id )
152+ removed |= self .main_fifo .remove (obj_id )
152153 return removed
153154
154155def cache_init_hook (common_cache_params : CommonCacheParams ):
155156 return StandaloneS3FIFO (cache_size = common_cache_params .cache_size )
156157
157158def cache_hit_hook (cache , request : Request ):
158- cache .cache_hit (request . obj_id )
159+ cache .cache_hit (request )
159160
160161def cache_miss_hook (cache , request : Request ):
161- cache .cache_miss (request . obj_id , request . obj_size )
162+ cache .cache_miss (request )
162163
163164def cache_eviction_hook (cache , request : Request ):
164- # NOTE(haocheng): never called
165- pass
165+ evicted_id = None
166+ while evicted_id is None :
167+ evicted_id = cache .cache_evict (request )
168+ return evicted_id
166169
167170def cache_remove_hook (cache , obj_id ):
168171 cache .cache_remove (obj_id )
@@ -176,7 +179,7 @@ def cache_free_hook(cache):
176179 cache .main_freq .clear ()
177180
178181cache = PluginCache (
179- cache_size = 1024 * 1024 ,
182+ cache_size = 1024 ,
180183 cache_init_hook = cache_init_hook ,
181184 cache_hit_hook = cache_hit_hook ,
182185 cache_miss_hook = cache_miss_hook ,
@@ -185,20 +188,29 @@ def cache_free_hook(cache):
185188 cache_free_hook = cache_free_hook ,
186189 cache_name = "S3FIFO" )
187190
188- ref_s3fifo = S3FIFO (cache_size = 1024 )
191+ URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
192+ dl = lcs .DataLoader ()
193+ dl .load (URI )
189194
190- reader = SyntheticReader (
191- num_of_req = 1000000 ,
192- num_objects = 100 ,
193- obj_size = 1 ,
194- seed = 42 ,
195- alpha = 0.8 ,
196- dist = "zipf" ,
195+ # Step 2: Open trace and process efficiently
196+ reader = lcs .TraceReader (
197+ trace = dl .get_cache_path (URI ),
198+ trace_type = lcs .TraceType .ORACLE_GENERAL_TRACE ,
199+ reader_init_params = lcs .ReaderInitParam (ignore_obj_size = True )
197200)
198201
199- for req in reader :
200- plugin_hit = cache .get (req )
201- ref_hit = ref_s3fifo .get (req )
202- assert plugin_hit == ref_hit , f"Cache hit mismatch: { plugin_hit } != { ref_hit } "
202+ ref_s3fifo = S3FIFO (cache_size = 1024 , small_size_ratio = 0.1 , ghost_size_ratio = 0.9 , move_to_main_threshold = 2 )
203+
204+ # for req in reader:
205+ # hit = cache.get(req)
206+ # ref_hit = ref_s3fifo.get(req)
207+ # assert hit == ref_hit, f"Cache hit mismatch: {hit} != {ref_hit}"
208+
209+ req_miss_ratio , byte_miss_ratio = cache .process_trace (reader )
210+ ref_req_miss_ratio , ref_byte_miss_ratio = ref_s3fifo .process_trace (reader )
211+ print (f"Plugin req miss ratio: { req_miss_ratio } , ref req miss ratio: { ref_req_miss_ratio } " )
212+ print (f"Plugin byte miss ratio: { byte_miss_ratio } , ref byte miss ratio: { ref_byte_miss_ratio } " )
203213
214+ assert req_miss_ratio == ref_req_miss_ratio
215+ assert byte_miss_ratio == ref_byte_miss_ratio
204216print ("All requests processed successfully. Plugin cache matches reference S3FIFO cache." )
0 commit comments