Make memory util more fair between baseline/rackscale benches; prepare to use hugepages for qemu memory (not done yet)

hunhoffe · hunhoffe · commit aa86b14d3b6d · 2023-07-30T12:14:48.000-07:00
diff --git a/kernel/src/arch/x86_64/rackscale/registration.rs b/kernel/src/arch/x86_64/rackscale/registration.rs
@@ -20,7 +20,7 @@ use crate::memory::backends::AllocatorStatistics;
 use crate::memory::mcache::MCache;
 use crate::memory::shmem_affinity::mid_to_shmem_affinity;
 use crate::memory::{Frame, PAddr, LARGE_PAGE_SIZE};
-use crate::transport::shmem::get_affinity_shmem;
+use crate::transport::shmem::{get_affinity_shmem, get_affinity_shmem_by_mid};
 
 #[derive(Debug, Default)]
 pub(crate) struct ClientRegistrationRequest {
@@ -117,6 +117,16 @@ pub(crate) fn register_client(hdr: &mut RPCHeader, payload: &mut [u8]) -> Result
             }
         };
 
+        // Make sure the controller and the client are seeing the same shmem addresses.
+        {
+            let shmem_region = get_affinity_shmem_by_mid(req.mid);
+            assert_eq!(
+                shmem_region.base.as_u64(),
+                req.shmem_region_base,
+                "Controller did not assign shmem region the same address as the client"
+            );
+        }
+
         // Create shmem memory manager
         let frame = Frame::new(
             PAddr::from(req.shmem_region_base),
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -117,6 +117,7 @@ fn rackscale_fxmark_benchmark(transport: RackscaleTransport) {
     test.controller_match_fn = controller_match_fn;
     test.transport = transport;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.clone();
     test.arg = Some(config);
 
@@ -144,9 +145,7 @@ fn rackscale_fxmark_benchmark(transport: RackscaleTransport) {
         cmd_fn,
         baseline_timeout_fn: timeout_fn,
         rackscale_timeout_fn: timeout_fn,
-        controller_mem_fn: mem_fn,
-        client_mem_fn: mem_fn,
-        baseline_mem_fn: mem_fn,
+        mem_fn,
     };
 
     if cfg!(feature = "baseline") {
@@ -284,6 +283,7 @@ fn rackscale_vmops_benchmark(transport: RackscaleTransport, benchtype: VMOpsBenc
     test.controller_match_fn = controller_match_fn;
     test.transport = transport;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.clone();
     test.arg = Some(benchtype);
 
@@ -308,9 +308,7 @@ fn rackscale_vmops_benchmark(transport: RackscaleTransport, benchtype: VMOpsBenc
         cmd_fn,
         baseline_timeout_fn,
         rackscale_timeout_fn,
-        controller_mem_fn: mem_fn,
-        client_mem_fn: mem_fn,
-        baseline_mem_fn: mem_fn,
+        mem_fn,
     };
 
     if cfg!(feature = "baseline") {
@@ -411,6 +409,7 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
     test.controller_match_fn = controller_match_fn;
     test.transport = RackscaleTransport::Shmem;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.to_string();
     test.arg = Some(config);
     test.run_dhcpd_for_baseline = true;
@@ -440,9 +439,7 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
         cmd_fn,
         baseline_timeout_fn,
         rackscale_timeout_fn,
-        controller_mem_fn: mem_fn,
-        client_mem_fn: mem_fn,
-        baseline_mem_fn: mem_fn,
+        mem_fn,
     };
 
     if cfg!(feature = "baseline") {
@@ -601,6 +598,7 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
     test.transport = transport;
     test.shmem_size *= 2;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.to_string();
     test.arg = Some(config);
     test.run_dhcpd_for_baseline = true;
@@ -630,9 +628,7 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
         cmd_fn,
         baseline_timeout_fn,
         rackscale_timeout_fn,
-        controller_mem_fn: mem_fn,
-        client_mem_fn: mem_fn,
-        baseline_mem_fn: mem_fn,
+        mem_fn,
     };
 
     if cfg!(feature = "baseline") {
@@ -682,6 +678,7 @@ fn rackscale_monetdb_benchmark(transport: RackscaleTransport) {
     test.controller_match_fn = controller_match_fn;
     test.transport = transport;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.to_string();
     test.arg = None;
     test.run_dhcpd_for_baseline = true;
@@ -710,9 +707,7 @@ fn rackscale_monetdb_benchmark(transport: RackscaleTransport) {
         cmd_fn,
         baseline_timeout_fn,
         rackscale_timeout_fn,
-        controller_mem_fn: mem_fn,
-        client_mem_fn: mem_fn,
-        baseline_mem_fn: mem_fn,
+        mem_fn,
     };
 
     if cfg!(feature = "baseline") {
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
@@ -58,14 +58,12 @@ where
     built: Built<'static>,
     /// Timeout for the controller process
     pub controller_timeout: u64,
-    /// Amount of non-shmem QEMU memory given to the controller
-    pub controller_memory: usize,
     /// Function that is called after the controller is spawned to match output of the controller process
     pub controller_match_fn: RackscaleMatchFn<T>,
     /// Timeout for each client process
     pub client_timeout: u64,
-    /// Amount of non-shmem QEMU memory given to each client
-    pub client_memory: usize,
+    /// Amount of non-shmem QEMU memory given to each QEMU instance
+    pub memory: usize,
     /// Function that is called after each client is spawned to match output of the client process
     pub client_match_fn: RackscaleMatchFn<T>,
     /// Number of client machines to spawn
@@ -90,6 +88,8 @@ where
     pub arg: Option<T>,
     /// Run DHCPD in baseline test
     pub run_dhcpd_for_baseline: bool,
+    /// Huge huge pages for qemu memory. This requires pre-alloc'ing them on the host before running.
+    pub use_qemu_huge_pages: bool,
 }
 
 impl<T: Clone + Send + 'static> RackscaleRun<T> {
@@ -109,11 +109,10 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
 
         RackscaleRun {
             controller_timeout: 60_000,
-            controller_memory: 1024,
             controller_match_fn: blank_match_fn,
             client_timeout: 60_000,
-            client_memory: 1024,
             client_match_fn: blank_match_fn,
+            memory: 1024,
             kernel_test,
             built,
             num_clients: 1,
@@ -127,6 +126,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
             cmd: "".to_string(),
             arg: None,
             run_dhcpd_for_baseline: false,
+            use_qemu_huge_pages: false,
         }
     }
 
@@ -186,10 +186,11 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
         let controller_placement_cores = placement_cores.clone();
         let state = self.clone();
         let controller_tx_build_timer = tx_build_timer_mut.clone();
+        let use_large_pages = self.use_qemu_huge_pages;
         let controller = std::thread::Builder::new()
             .name("Controller".to_string())
             .spawn(move || {
-                let cmdline_controller =
+                let mut cmdline_controller =
                     RunnerArgs::new_with_build(&controller_kernel_test, &state.built)
                         .timeout(state.controller_timeout)
                         .transport(state.transport)
@@ -200,12 +201,16 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                         .no_network_setup()
                         .workers(state.num_clients + 1)
                         .use_vmxnet3()
-                        .memory(state.controller_memory)
+                        .memory(state.memory)
                         .nodes(1)
                         .cores(controller_cores)
                         .node_offset(controller_placement_cores[0].0)
                         .setaffinity(controller_placement_cores[0].1.clone());
 
+                if use_large_pages {
+                    cmdline_controller = cmdline_controller.large_pages().prealloc();
+                }
+
                 let mut output = String::new();
                 let qemu_run = || -> Result<WaitStatus> {
                     let mut p = spawn_nrk(&cmdline_controller)?;
@@ -287,10 +292,11 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
             let client_placement_cores = placement_cores.clone();
             let state = self.clone();
             let client_tx_build_timer = tx_build_timer_mut.clone();
+            let use_large_pages = self.use_qemu_huge_pages;
             let client = std::thread::Builder::new()
                 .name(format!("Client{}", i + 1))
                 .spawn(move || {
-                    let cmdline_client =
+                    let mut cmdline_client =
                         RunnerArgs::new_with_build(&client_kernel_test, &state.built)
                             .timeout(state.client_timeout)
                             .transport(state.transport)
@@ -301,14 +307,18 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                             .no_network_setup()
                             .workers(state.num_clients + 1)
                             .cores(state.cores_per_client)
-                            .memory(state.client_memory)
+                            .memory(state.memory)
                             .nobuild() // Use single build for all for consistency
                             .use_vmxnet3()
                             .cmd(&client_cmd)
                             .nodes(1)
                             .node_offset(client_placement_cores[i + 1].0)
                             .setaffinity(client_placement_cores[i + 1].1.clone());
 
+                    if use_large_pages {
+                        cmdline_client = cmdline_client.large_pages().prealloc();
+                    }
+
                     let mut output = String::new();
                     let qemu_run = || -> Result<WaitStatus> {
                         let mut p = spawn_nrk(&cmdline_client)?;
@@ -425,16 +435,20 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
             setup_network(self.num_clients + 1);
         }
 
-        let cmdline_baseline = RunnerArgs::new_with_build(&self.kernel_test, &self.built)
+        let mut cmdline_baseline = RunnerArgs::new_with_build(&self.kernel_test, &self.built)
             .timeout(self.controller_timeout)
-            .memory(self.controller_memory)
+            .memory(self.memory)
             .workers(1)
             .cores(self.cores_per_client * self.num_clients)
             .cmd(&self.cmd)
             .no_network_setup()
             .nodes(self.num_clients)
             .setaffinity(all_placement_cores);
 
+        if self.use_qemu_huge_pages {
+            cmdline_baseline = cmdline_baseline.large_pages().prealloc();
+        }
+
         let mut output = String::new();
         let mut qemu_run = || -> Result<WaitStatus> {
             let dhcpd_server = if self.run_dhcpd_for_baseline {
@@ -472,12 +486,8 @@ pub struct RackscaleBench<T: Clone + Send + 'static> {
     pub rackscale_timeout_fn: fn(usize) -> u64,
     // Function to calculate the timeout. Takes as argument number of application cores
     pub baseline_timeout_fn: fn(usize) -> u64,
-    // Function to calculate controller (and baseline) memory. Takes as argument number of application cores and is_smoke
-    pub controller_mem_fn: fn(usize, bool) -> usize,
-    // Function to calculate client memory. Takes as argument number of application cores and is_smoke
-    pub client_mem_fn: fn(usize, bool) -> usize,
-    // Function to calculate baseline nros memory. Takes as argument number of application cores and is_smoke
-    pub baseline_mem_fn: fn(usize, bool) -> usize,
+    // Function to calculate memory (excpeting controller memory). Takes as argument number of application cores and is_smoke
+    pub mem_fn: fn(usize, bool) -> usize,
 }
 
 impl<T: Clone + Send + 'static> RackscaleBench<T> {
@@ -565,11 +575,11 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
 
             // Caclulate memory for each component
             if !is_baseline {
-                test_run.controller_memory = (self.controller_mem_fn)(total_cores, is_smoke);
-                test_run.client_memory = (self.client_mem_fn)(total_cores, is_smoke);
+                test_run.memory = ((self.mem_fn)(total_cores, is_smoke) / test_run.num_clients)
+                    - test_run.shmem_size;
+                assert!(test_run.memory > 0);
             } else {
-                test_run.controller_memory = (self.baseline_mem_fn)(total_cores, is_smoke);
-                test_run.client_memory = test_run.controller_memory;
+                test_run.memory = (self.mem_fn)(total_cores, is_smoke);
             }
 
             if is_baseline {
diff --git a/lib/rpc/src/transport/smoltcp.rs b/lib/rpc/src/transport/smoltcp.rs