Finished global core id impl

hunhoffe · hunhoffe · commit a9bd768c424c · 2022-11-17T20:56:42.000-08:00
diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs
@@ -65,6 +65,7 @@ lazy_static! {
     };
 }
 
+// List of hwthreads of all the clients in the rack
 lazy_static! {
     pub(crate) static ref HWTHREADS: Arc<Mutex<Vec<CpuThread>>> = {
         let mut hwthreads = Vec::try_with_capacity(get_num_clients() as usize)
@@ -73,6 +74,19 @@ lazy_static! {
     };
 }
 
+// Keep track of which hwthreads have been allocated. Index corresponds to gtid of hwthread
+lazy_static! {
+    pub(crate) static ref HWTHREADS_BUSY: Arc<Mutex<Vec<Option<bool>>>> = {
+        // Assume each client has about 8 cores, for now
+        let mut hwthreads_busy = Vec::try_with_capacity(get_num_clients() as usize * 8)
+            .expect("Failed to create vector for rack cpu threads");
+        for i in 0..(get_num_clients() as usize * 30) {
+            hwthreads_busy.push(None);
+        }
+        Arc::new(Mutex::new(hwthreads_busy))
+    };
+}
+
 // Keep track of unfulfilled core assignments
 lazy_static! {
     pub(crate) static ref UNFULFILLED_CORE_ASSIGNMENTS: Arc<Mutex<Vec<Box<VecDeque<RequestCoreReq>>>>> = {
diff --git a/kernel/src/arch/x86_64/rackscale/processops/request_core.rs b/kernel/src/arch/x86_64/rackscale/processops/request_core.rs
@@ -9,23 +9,27 @@ use core2::io::Write;
 use rpc::rpc::*;
 use rpc::RPCClient;
 
-use super::super::dcm::resource_alloc::dcm_resource_alloc;
-use super::super::kernelrpc::*;
-use crate::arch::rackscale::controller::get_local_pid;
-use crate::arch::rackscale::controller::UNFULFILLED_CORE_ASSIGNMENTS;
 use crate::error::KError;
 use crate::fs::cnrfs::MlnrKernelNode;
 use crate::fs::{cnrfs, NrLock};
 use crate::memory::VAddr;
 use crate::nr;
 use crate::nr::KernelNode;
 
+use super::super::client::{get_local_client_id, get_num_clients};
+use super::super::controller::{
+    get_local_pid, HWTHREADS, HWTHREADS_BUSY, UNFULFILLED_CORE_ASSIGNMENTS,
+};
+use super::super::dcm::resource_alloc::dcm_resource_alloc;
+use super::super::kernelrpc::*;
+use super::super::systemops::{gtid_to_local, local_to_gtid};
+
 #[derive(Debug, Clone, Copy)]
 pub(crate) struct RequestCoreReq {
     pub core_id: u64,
     pub entry_point: u64,
 }
-unsafe_abomonate!(RequestCoreReq: core_id, entry_point);
+unsafe_abomonate!(RequestCoreReq: entry_point);
 
 #[derive(Debug)]
 pub(crate) struct RequestCoreWorkRes {
@@ -66,10 +70,6 @@ pub(crate) fn rpc_request_core(
             return Err(RPCError::ExtraData);
         }
         info!("RequestCore() {:?}", res);
-
-        // TODO: could optimize for local case and call local function here
-        // for now, will handle all the same (i.e., client ask for work from controller)
-
         return res.ret;
     } else {
         return Err(RPCError::MalformedResponse);
@@ -95,24 +95,48 @@ pub(crate) fn handle_request_core(hdr: &mut RPCHeader, payload: &mut [u8]) -> Re
         }
     };
 
-    let node = dcm_resource_alloc(local_pid, true);
-
-    // Add request to be handled later.
-    // TODO: handle local differently? For now, for simplicity, handle all the same
-    // TODO: check capacity of core assignments?
-    log::info!("Logged unfulfilled core assignment for {:?}", node);
-    {
-        let mut core_request_vec = UNFULFILLED_CORE_ASSIGNMENTS.lock();
-        let mut deque = core_request_vec
-            .get_mut(node as usize)
-            .expect("failed to fetch core assignment deque for node");
-        deque.push_back(*core_req);
+    let client_id = dcm_resource_alloc(local_pid, true);
+
+    // controller chooses a core id - right now, sequentially for cores on the client_id.
+    let num_clients = get_num_clients();
+    let mut rack_hwthreads_busy = HWTHREADS_BUSY.lock();
+    let mut index = 0;
+    loop {
+        //
+        match rack_hwthreads_busy[local_to_gtid(index, client_id)] {
+            // thread is busy, keep looking
+            Some(true) => index += 1,
+            // found an empty thread! set to busy and break
+            Some(false) => {
+                rack_hwthreads_busy[index] = Some(true);
+                break;
+            }
+            // Ran out of threads for client; DCM should not have allowed this to happen
+            None => panic!(
+                "Should never happen - no empty hwthreads found for client {:?}",
+                client_id
+            ),
+        }
     }
 
+    let rack_hwthreads = HWTHREADS.lock();
+    let gtid = rack_hwthreads[index].id;
+    log::info!("Chose thread id {:?} for request", gtid);
+
     // Construct and return result
     let res = KernelRpcRes {
-        ret: convert_return(Ok((node, 0))),
+        ret: convert_return(Ok((gtid as u64, 0))),
     };
+
+    // can handle request locally if same node otherwise must queue for remote node to handle
+    if client_id != hdr.client_id {
+        log::info!("Logged unfulfilled core assignment for {:?}", client_id);
+        let mut core_request_vec = UNFULFILLED_CORE_ASSIGNMENTS.lock();
+        let mut deque = core_request_vec
+            .get_mut(client_id as usize)
+            .expect("failed to fetch core assignment deque for client_id");
+        deque.push_back(*core_req);
+    }
     construct_ret(hdr, payload, res)
 }
 
@@ -143,7 +167,10 @@ pub(crate) fn request_core_work(rpc_client: &mut dyn RPCClient) -> () {
             // But not sure if we can do that here because 1) client syscalls are ferried to the controller
             // and 2) how do you run it in the context of the (correct) remote process?
             // for now, copied & modified code from original syscall impl
-            let gtid: usize = core_request.core_id.try_into().unwrap();
+            let gtid: usize = gtid_to_local(
+                core_request.core_id.try_into().unwrap(),
+                get_local_client_id(),
+            );
             let mut affinity = None;
             for thread in atopology::MACHINE_TOPOLOGY.threads() {
                 if thread.id == gtid {
diff --git a/kernel/src/arch/x86_64/rackscale/registration.rs b/kernel/src/arch/x86_64/rackscale/registration.rs
@@ -14,7 +14,8 @@ use rpc::rpc::{ClientId, RPCError, RPCHeader};
 use rpc::RPCClient;
 
 use super::dcm::node_registration::dcm_register_node;
-use crate::arch::rackscale::controller::{HWTHREADS, SHMEM_MANAGERS};
+use crate::arch::rackscale::client::get_num_clients;
+use crate::arch::rackscale::controller::{HWTHREADS, HWTHREADS_BUSY, SHMEM_MANAGERS};
 use crate::arch::rackscale::systemops::{local_to_gtid, local_to_node_id, local_to_package_id};
 use crate::error::KResult;
 use crate::memory::LARGE_PAGE_SIZE;
@@ -126,7 +127,8 @@ pub(crate) fn register_client(
             if remaining.len() == 0 {
                 // Register client resources with DCM, DCM doesn't care about pids, so
                 // send w/ dummy pid
-                let client_id = dcm_register_node(0, req.num_cores, memslices);
+                // TODO: register with one less core, assume init process uses that 1 core
+                let client_id = dcm_register_node(0, req.num_cores - 1, memslices);
                 info!("Registered client DCM, assigned client_id={:?}", client_id);
 
                 // Create shmem memory manager
@@ -145,8 +147,23 @@ pub(crate) fn register_client(
 
                 // Record information about the hardware threads
                 info!("hwthreads: {:?}", hwthreads);
+
                 let mut rack_threads = HWTHREADS.lock();
+                let mut rack_threads_busy = HWTHREADS_BUSY.lock();
+
+                // Make sure there's enough room to store data on whether core is busy or no
+                let num_clients = get_num_clients() as usize;
+                if rack_threads_busy.capacity() < hwthreads.len() * num_clients + client_id as usize
+                {
+                    rack_threads_busy
+                        .resize_with(hwthreads.len() * num_clients + client_id as usize, || None);
+                }
+
                 for hwthread in hwthreads {
+                    // set all threads to not busy
+                    rack_threads_busy[local_to_gtid(hwthread.id, client_id)] = Some(false);
+
+                    // add thread to global state with global values made globally unique
                     rack_threads.push(CpuThread {
                         // these are global values to make sure no conflicts across rack
                         id: local_to_gtid(hwthread.id, client_id),
@@ -157,6 +174,8 @@ pub(crate) fn register_client(
                         thread_id: hwthread.thread_id,
                     });
                 }
+                // Let's assume init process is running on hwthread 0 on the client so set that to busy
+                rack_threads_busy[local_to_gtid(0, client_id)] = Some(true);
 
                 Ok(client_id)
             } else {
diff --git a/kernel/src/arch/x86_64/rackscale/syscalls.rs b/kernel/src/arch/x86_64/rackscale/syscalls.rs
@@ -2,6 +2,7 @@ use alloc::boxed::Box;
 use alloc::string::String;
 
 use kpi::io::{FileFlags, FileModes};
+use rpc::rpc::ClientId;
 
 use crate::arch::process::{current_pid, Ring3Process};
 use crate::error::KResult;
@@ -12,7 +13,7 @@ use crate::process::{KernArcBuffer, UserSlice};
 use crate::syscalls::{FsDispatch, ProcessDispatch, SystemCallDispatch, SystemDispatch};
 
 use super::super::syscall::{Arch86SystemCall, Arch86SystemDispatch, Arch86VSpaceDispatch};
-use super::client::RPC_CLIENT;
+use super::client::{get_local_client_id, RPC_CLIENT};
 use super::fileops::close::rpc_close;
 use super::fileops::delete::rpc_delete;
 use super::fileops::getinfo::rpc_getinfo;
@@ -25,6 +26,7 @@ use super::processops::print::rpc_log;
 use super::processops::release_physical::rpc_release_physical;
 use super::processops::request_core::rpc_request_core;
 use super::systemops::get_hardware_threads::rpc_get_hardware_threads;
+use super::systemops::{gtid_to_local, is_gtid_local, local_to_gtid};
 
 pub(crate) struct Arch86LwkSystemCall {
     pub(crate) local: Arch86SystemCall,
@@ -46,7 +48,14 @@ impl SystemDispatch<u64> for Arch86LwkSystemCall {
     }
 
     fn get_core_id(&self) -> KResult<(u64, u64)> {
-        self.local.get_core_id()
+        // map local core ID to rackscale global core ID - since mapping is deterministic on number of
+        // clients we can do this without making an RPC call
+        self.local.get_core_id().and_then(|(core_id, n)| {
+            Ok((
+                local_to_gtid(core_id as usize, get_local_client_id()) as u64,
+                n,
+            ))
+        })
     }
 }
 
@@ -158,7 +167,20 @@ impl ProcessDispatch<u64> for Arch86LwkSystemCall {
     fn request_core(&self, core_id: u64, entry_point: u64) -> KResult<(u64, u64)> {
         let mut client = RPC_CLIENT.lock();
         let pid = crate::arch::process::current_pid()?;
-        rpc_request_core(&mut **client, pid, core_id, entry_point).map_err(|e| e.into())
+        let ret = rpc_request_core(&mut **client, pid, core_id, entry_point).map_err(|e| e.into());
+
+        // request core locally if that's what was assigned this request
+        let client_id = get_local_client_id();
+        if let Ok((gtid, n)) = ret {
+            if is_gtid_local(gtid as usize, client_id) {
+                self.local
+                    .request_core(gtid_to_local(gtid as usize, client_id) as u64, entry_point)
+            } else {
+                ret
+            }
+        } else {
+            ret
+        }
     }
 
     fn allocate_physical(&self, page_size: u64, affinity: u64) -> KResult<(u64, u64)> {
diff --git a/kernel/src/arch/x86_64/rackscale/systemops/get_hardware_threads.rs b/kernel/src/arch/x86_64/rackscale/systemops/get_hardware_threads.rs
@@ -44,17 +44,15 @@ pub(crate) fn rpc_get_hardware_threads(
 
         if let Ok((data_len, n)) = res.ret {
             if data_len as usize <= remaining.len() && data_len <= vaddr_buf_len {
-                log::info!("There's a match! Writing into usesprace now");
                 let mut user_slice =
                     UserSlice::new(pid, UVAddr::try_from(vaddr_buf)?, data_len as usize)?;
                 NrProcess::<Ring3Process>::write_to_userspace(
                     &mut user_slice,
                     &remaining[..data_len as usize],
                 )?;
-                log::info!("Returning value...");
                 Ok((data_len, n))
             } else {
-                log::info!(
+                log::debug!(
                     "Bad payload data: data_len: {:?} remaining.len(): {:?} vaddr_buf_len: {:?}",
                     data_len,
                     remaining.len(),
@@ -91,7 +89,7 @@ pub(crate) fn handle_get_hardware_threads(
     let additional_data = end - start;
     unsafe { encode(&*rack_threads, &mut &mut payload[start..end]) }
         .expect("Failed to encode hardware thread vector");
-    log::info!(
+    log::trace!(
         "Sending back {:?} bytes of data ({:?} hwthreads)",
         additional_data,
         rack_threads.len()
diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs
@@ -79,6 +79,7 @@ fn s06_rackscale_phys_alloc_test() {
             .tap("tap2")
             .no_network_setup()
             .workers(2)
+            .nobuild()
             .use_vmxnet3();
 
         let mut output = String::new();
@@ -185,6 +186,7 @@ fn rackscale_fs_test(is_shmem: bool) {
             .tap("tap2")
             .no_network_setup()
             .workers(2)
+            .nobuild()
             .use_vmxnet3();
 
         let mut output = String::new();
@@ -264,6 +266,7 @@ fn s06_rackscale_shmem_fs_prop_test() {
             .tap("tap2")
             .no_network_setup()
             .workers(2)
+            .nobuild()
             .use_vmxnet3();
 
         let mut output = String::new();
@@ -349,6 +352,7 @@ fn s06_rackscale_shmem_multiinstance() {
                 .tap(&tap)
                 .no_network_setup()
                 .workers(clients + 1)
+                .nobuild()
                 .use_vmxnet3();
 
             let mut output = String::new();
@@ -434,7 +438,6 @@ fn rackscale_userspace_multicore_test(is_shmem: bool) {
             let mut dcm = spawn_dcm(1, timeout)?;
             let mut p = spawn_nrk(&cmdline_controller)?;
 
-            //output += p.exp_string("Finished sending requests!")?.as_str();
             output += p.exp_eof()?.as_str();
 
             dcm.send_control('c')?;
@@ -463,13 +466,14 @@ fn rackscale_userspace_multicore_test(is_shmem: bool) {
             .workers(2)
             .cores(client_num_cores)
             .memory(4096)
+            .nobuild()
             .use_vmxnet3();
 
         let mut output = String::new();
         let mut qemu_run = || -> Result<WaitStatus> {
             let mut p = spawn_nrk(&cmdline_client)?;
 
-            for _i in 0..client_num_cores {
+            for _i in 0..(client_num_cores - 1) {
                 let r = p.exp_regex(r#"init: Hello from core (\d+)"#)?;
                 output += r.0.as_str();
                 output += r.1.as_str();
@@ -529,7 +533,7 @@ fn s06_rackscale_shmem_request_core_remote_test() {
         let mut qemu_run = || -> Result<WaitStatus> {
             let mut dcm = spawn_dcm(1, timeout)?;
             let mut p = spawn_nrk(&cmdline_controller)?;
-
+            output += p.exp_string("handle_request_core_work()")?.as_str();
             output += p.exp_eof()?.as_str();
 
             dcm.send_control('c')?;
@@ -551,7 +555,7 @@ fn s06_rackscale_shmem_request_core_remote_test() {
             .tap("tap2")
             .no_network_setup()
             .workers(3)
-            .cores(2)
+            .cores(1)
             .memory(4096)
             .nobuild() // Use single build for all for consistency
             .use_vmxnet3();
@@ -562,6 +566,7 @@ fn s06_rackscale_shmem_request_core_remote_test() {
             output += p
                 .exp_string("Client finished processing core work request")?
                 .as_str();
+            output += p.exp_string("vibrio::upcalls: Got a new core")?.as_str();
             p.process.exit()
         };
 
@@ -580,19 +585,20 @@ fn s06_rackscale_shmem_request_core_remote_test() {
             .tap("tap4")
             .no_network_setup()
             .workers(3)
+            .cores(2)
+            .memory(4096)
             .nobuild() // Use build from previous client for consistency
             .use_vmxnet3();
 
         let mut output = String::new();
         let mut qemu_run = || -> Result<WaitStatus> {
             let mut p = spawn_nrk(&cmdline_client)?;
-            p.exp_string("request_core_remote_test OK")?;
-            output = p.exp_eof()?;
-            output += p.exp_eof()?.as_str();
+            output += p.exp_string("Spawned core on CoreToken")?.as_str();
+            output += p.exp_string("request_core_remote_test OK")?.as_str();
             p.process.exit()
         };
 
-        check_for_successful_exit(&cmdline_client, qemu_run(), output);
+        let _ignore = qemu_run();
     });
 
     controller.join().unwrap();
diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs