Skip to content

Commit e2f97a3

Browse files
committed
Fix: return correct partitions with get_node() (#133)
1 parent cd61c1f commit e2f97a3

File tree

2 files changed

+193
-169
lines changed

2 files changed

+193
-169
lines changed

pyslurm/pyslurm.pyx

Lines changed: 189 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -3286,8 +3286,7 @@ cdef class node:
32863286
uint32_t i
32873287
list all_nodes
32883288

3289-
rc = slurm.slurm_load_node(<time_t> NULL, &self._Node_ptr,
3290-
self._ShowFlags)
3289+
rc = slurm.slurm_load_node(<time_t> NULL, &self._Node_ptr, self._ShowFlags)
32913290

32923291
if rc == slurm.SLURM_SUCCESS:
32933292
all_nodes = []
@@ -3340,189 +3339,211 @@ cdef class node:
33403339
uint16_t err_cpus
33413340
uint16_t alloc_cpus
33423341
uint32_t i
3342+
uint32_t j
33433343
uint64_t alloc_mem
33443344
uint32_t node_state
33453345
slurm.node_info_t *record
33463346
dict Host_dict
3347+
char time_str[32]
3348+
char tmp_str[128]
3349+
int last_inx = 0
33473350

3348-
if nodeID is None:
3349-
rc = slurm.slurm_load_node(<time_t> NULL, &self._Node_ptr,
3350-
self._ShowFlags)
3351-
else:
3352-
b_nodeID = nodeID.encode("UTF-8")
3353-
rc = slurm.slurm_load_node_single(&self._Node_ptr, b_nodeID, self._ShowFlags)
3351+
rc = slurm.slurm_load_node(<time_t>NULL, &self._Node_ptr, self._ShowFlags)
33543352

3355-
if rc == slurm.SLURM_SUCCESS:
3356-
self._NodeDict = {}
3357-
self._lastUpdate = self._Node_ptr.last_update
3358-
node_scaling = self._Node_ptr.node_scaling
3359-
last_update = self._Node_ptr.last_update
3360-
3361-
rc_part = slurm.slurm_load_partitions(<time_t> NULL, &self._Part_ptr,
3362-
slurm.SHOW_ALL)
3363-
3364-
if rc_part == slurm.SLURM_SUCCESS:
3365-
slurm.slurm_populate_node_partitions(self._Node_ptr, self._Part_ptr)
3366-
3367-
for i in range(self._Node_ptr.record_count):
3368-
record = &self._Node_ptr.node_array[i]
3369-
Host_dict = {}
3370-
cloud_str = ""
3371-
comp_str = ""
3372-
drain_str = ""
3373-
power_str = ""
3374-
err_cpus = 0
3375-
alloc_cpus = 0
3376-
cpus_per_node = 1
3377-
3378-
if record.name is NULL:
3353+
if rc != slurm.SLURM_SUCCESS:
3354+
apiError = slurm.slurm_get_errno()
3355+
raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError)
3356+
3357+
rc_part = slurm.slurm_load_partitions(<time_t>NULL, &self._Part_ptr, slurm.SHOW_ALL)
3358+
3359+
if rc_part != slurm.SLURM_SUCCESS:
3360+
self._Part_ptr = NULL
3361+
slurm.slurm_perror("slurm_load_partitions error")
3362+
3363+
slurm.slurm_populate_node_partitions(self._Node_ptr, self._Part_ptr)
3364+
3365+
self._lastUpdate = self._Node_ptr.last_update
3366+
self._NodeDict = {}
3367+
3368+
for j in range(self._Node_ptr.record_count):
3369+
if nodeID:
3370+
i = (j + last_inx) % self._Node_ptr.record_count
3371+
if self._Node_ptr.node_array[i].name == NULL or (
3372+
nodeID.encode("UTF-8") != self._Node_ptr.node_array[i].name):
33793373
continue
3374+
elif self._Node_ptr.node_array[j].name == NULL:
3375+
continue
3376+
else:
3377+
i = j
3378+
3379+
record = &self._Node_ptr.node_array[i]
3380+
3381+
Host_dict = {}
3382+
cloud_str = ""
3383+
comp_str = ""
3384+
drain_str = ""
3385+
power_str = ""
3386+
err_cpus = 0
3387+
alloc_cpus = 0
3388+
3389+
if record.name is NULL:
3390+
continue
3391+
3392+
total_used = record.cpus
3393+
3394+
Host_dict[u'arch'] = slurm.stringOrNone(record.arch, '')
3395+
Host_dict[u'boards'] = record.boards
3396+
Host_dict[u'boot_time'] = record.boot_time
3397+
Host_dict[u'cores'] = record.cores
3398+
Host_dict[u'core_spec_cnt'] = record.core_spec_cnt
3399+
Host_dict[u'cores_per_socket'] = record.cores
3400+
# TODO: cpu_alloc, cpu_tot
3401+
Host_dict[u'cpus'] = record.cpus
3402+
3403+
# FIXME
3404+
#if record.cpu_bind:
3405+
# slurm.slurm_sprint_cpu_bind_type(tmp_str, record.cpu_bind)
3406+
# Host_dict[u'cpu_bind'] = slurm.stringOrNone(tmp_str, '')
3407+
3408+
Host_dict[u'cpu_load'] = slurm.int32orNone(record.cpu_load)
3409+
Host_dict[u'cpu_spec_list'] = slurm.listOrNone(record.cpu_spec_list, '')
3410+
Host_dict[u'features'] = slurm.listOrNone(record.features, '')
3411+
Host_dict[u'features_active'] = slurm.listOrNone(record.features_act, '')
3412+
Host_dict[u'free_mem'] = slurm.int64orNone(record.free_mem)
3413+
Host_dict[u'gres'] = slurm.listOrNone(record.gres, ',')
3414+
Host_dict[u'gres_drain'] = slurm.listOrNone(record.gres_drain, '')
3415+
Host_dict[u'gres_used'] = self.parse_gres(
3416+
slurm.stringOrNone(record.gres_used, '')
3417+
)
33803418

3381-
total_used = record.cpus
3382-
if (node_scaling):
3383-
cpus_per_node = total_used / node_scaling
3384-
3385-
Host_dict[u'arch'] = slurm.stringOrNone(record.arch, '')
3386-
Host_dict[u'boards'] = record.boards
3387-
Host_dict[u'boot_time'] = record.boot_time
3388-
Host_dict[u'cores'] = record.cores
3389-
Host_dict[u'core_spec_cnt'] = record.core_spec_cnt
3390-
Host_dict[u'cpus'] = record.cpus
3391-
Host_dict[u'cpu_load'] = slurm.int32orNone(record.cpu_load)
3392-
Host_dict[u'cpu_spec_list'] = slurm.listOrNone(record.cpu_spec_list, '')
3393-
Host_dict[u'features'] = slurm.listOrNone(record.features, '')
3394-
Host_dict[u'features_active'] = slurm.listOrNone(record.features_act, '')
3395-
Host_dict[u'free_mem'] = slurm.int64orNone(record.free_mem)
3396-
Host_dict[u'gres'] = slurm.listOrNone(record.gres, ',')
3397-
Host_dict[u'gres_drain'] = slurm.listOrNone(record.gres_drain, '')
3398-
Host_dict[u'gres_used'] = self.parse_gres(
3399-
slurm.stringOrNone(record.gres_used, '')
3400-
)
3419+
if record.mcs_label == NULL:
3420+
Host_dict[u'mcs_label'] = None
3421+
else:
3422+
Host_dict[u'mcs_label'] = record.mcs_label
34013423

3402-
if record.mcs_label == NULL:
3403-
Host_dict[u'mcs_label'] = None
3404-
else:
3405-
Host_dict[u'mcs_label'] = record.mcs_label
3424+
Host_dict[u'mem_spec_limit'] = record.mem_spec_limit
3425+
Host_dict[u'name'] = slurm.stringOrNone(record.name, '')
34063426

3407-
Host_dict[u'mem_spec_limit'] = record.mem_spec_limit
3408-
Host_dict[u'name'] = slurm.stringOrNone(record.name, '')
3409-
Host_dict[u'node_addr'] = slurm.stringOrNone(record.node_addr, '')
3410-
Host_dict[u'node_hostname'] = slurm.stringOrNone(record.node_hostname, '')
3411-
Host_dict[u'os'] = slurm.stringOrNone(record.os, '')
3427+
# TODO: next_state
3428+
Host_dict[u'node_addr'] = slurm.stringOrNone(record.node_addr, '')
3429+
Host_dict[u'node_hostname'] = slurm.stringOrNone(record.node_hostname, '')
3430+
Host_dict[u'os'] = slurm.stringOrNone(record.os, '')
34123431

3413-
if record.owner == slurm.NO_VAL:
3414-
Host_dict[u'owner'] = None
3415-
else:
3416-
Host_dict[u'owner'] = record.owner
3417-
3418-
Host_dict[u'partitions'] = slurm.listOrNone(record.partitions, ',')
3419-
Host_dict[u'real_memory'] = record.real_memory
3420-
Host_dict[u'slurmd_start_time'] = record.slurmd_start_time
3421-
Host_dict[u'sockets'] = record.sockets
3422-
Host_dict[u'threads'] = record.threads
3423-
Host_dict[u'tmp_disk'] = record.tmp_disk
3424-
Host_dict[u'weight'] = record.weight
3425-
Host_dict[u'tres_fmt_str'] = slurm.stringOrNone(record.tres_fmt_str, '')
3426-
Host_dict[u'version'] = slurm.stringOrNone(record.version, '')
3427-
3428-
Host_dict[u'reason'] = slurm.stringOrNone(record.reason, '')
3429-
if record.reason_time == 0:
3430-
Host_dict[u'reason_time'] = None
3431-
else:
3432-
Host_dict[u'reason_time'] = record.reason_time
3432+
if record.owner == slurm.NO_VAL:
3433+
Host_dict[u'owner'] = None
3434+
else:
3435+
Host_dict[u'owner'] = record.owner
3436+
3437+
Host_dict[u'partitions'] = slurm.listOrNone(record.partitions, ',')
3438+
Host_dict[u'real_memory'] = record.real_memory
3439+
Host_dict[u'slurmd_start_time'] = record.slurmd_start_time
3440+
Host_dict[u'sockets'] = record.sockets
3441+
Host_dict[u'threads'] = record.threads
3442+
Host_dict[u'tmp_disk'] = record.tmp_disk
3443+
Host_dict[u'weight'] = record.weight
3444+
Host_dict[u'tres_fmt_str'] = slurm.stringOrNone(record.tres_fmt_str, '')
3445+
Host_dict[u'version'] = slurm.stringOrNone(record.version, '')
3446+
3447+
Host_dict[u'reason'] = slurm.stringOrNone(record.reason, '')
3448+
if record.reason_time == 0:
3449+
Host_dict[u'reason_time'] = None
3450+
else:
3451+
Host_dict[u'reason_time'] = record.reason_time
34333452

3434-
if record.reason_uid == slurm.NO_VAL:
3435-
Host_dict[u'reason_uid'] = None
3436-
else:
3437-
Host_dict[u'reason_uid'] = record.reason_uid
3453+
if record.reason_uid == slurm.NO_VAL:
3454+
Host_dict[u'reason_uid'] = None
3455+
else:
3456+
Host_dict[u'reason_uid'] = record.reason_uid
34383457

3439-
# Power Managment
3440-
Host_dict[u'power_mgmt'] = {}
3441-
if (not record.power or (record.power.cap_watts == slurm.NO_VAL)):
3442-
Host_dict[u'power_mgmt'][u"cap_watts"] = None
3443-
else:
3444-
Host_dict[u'power_mgmt'][u"cap_watts"] = record.power.cap_watts
3445-
3446-
# Energy statistics
3447-
Host_dict[u'energy'] = {}
3448-
if (not record.energy or record.energy.current_watts == slurm.NO_VAL):
3449-
Host_dict[u'energy'][u'current_watts'] = 0
3450-
Host_dict[u'energy'][u'base_consumed_energy'] = 0
3451-
Host_dict[u'energy'][u'consumed_energy'] = 0
3452-
else:
3453-
Host_dict[u'energy'][u'current_watts'] = record.energy.current_watts
3454-
Host_dict[u'energy'][u'base_consumed_energy'] = int(record.energy.base_consumed_energy)
3455-
Host_dict[u'energy'][u'consumed_energy'] = int(record.energy.consumed_energy)
3456-
3457-
Host_dict[u'energy'][u'base_watts'] = record.energy.base_watts
3458-
Host_dict[u'energy'][u'previous_consumed_energy'] = int(record.energy.previous_consumed_energy)
3459-
3460-
node_state = record.node_state
3461-
if (node_state & NODE_STATE_CLOUD):
3462-
node_state &= (~NODE_STATE_CLOUD)
3463-
cloud_str = "+CLOUD"
3464-
3465-
if (node_state & NODE_STATE_COMPLETING):
3466-
node_state &= (~NODE_STATE_COMPLETING)
3467-
comp_str = "+COMPLETING"
3468-
3469-
if (node_state & NODE_STATE_DRAIN):
3470-
node_state &= (~NODE_STATE_DRAIN)
3471-
drain_str = "+DRAIN"
3472-
3473-
if (node_state & NODE_STATE_FAIL):
3474-
node_state &= (~NODE_STATE_FAIL)
3475-
drain_str = "+FAIL"
3476-
3477-
if (node_state & NODE_STATE_POWER_SAVE):
3478-
node_state &= (~NODE_STATE_POWER_SAVE)
3479-
power_str = "+POWER"
3480-
3481-
slurm.slurm_get_select_nodeinfo(record.select_nodeinfo,
3482-
SELECT_NODEDATA_SUBCNT,
3483-
NODE_STATE_ALLOCATED,
3484-
&alloc_cpus)
3485-
3486-
Host_dict[u'alloc_cpus'] = alloc_cpus
3487-
total_used -= alloc_cpus
3488-
3489-
slurm.slurm_get_select_nodeinfo(record.select_nodeinfo,
3490-
SELECT_NODEDATA_SUBCNT,
3491-
NODE_STATE_ERROR, &err_cpus)
3492-
3493-
Host_dict[u'err_cpus'] = err_cpus
3494-
total_used -= err_cpus
3495-
3496-
if (alloc_cpus and err_cpus) or (total_used and
3497-
(total_used != record.cpus)):
3498-
node_state &= NODE_STATE_FLAGS
3499-
node_state |= NODE_STATE_MIXED
3500-
3501-
Host_dict[u'state'] = (
3502-
slurm.stringOrNone(slurm.slurm_node_state_string(node_state), '') +
3503-
slurm.stringOrNone(cloud_str, '') +
3504-
slurm.stringOrNone(comp_str, '') +
3505-
slurm.stringOrNone(drain_str, '') +
3506-
slurm.stringOrNone(power_str, '')
3507-
)
3458+
# Power Management
3459+
Host_dict[u'power_mgmt'] = {}
3460+
if (not record.power or (record.power.cap_watts == slurm.NO_VAL)):
3461+
Host_dict[u'power_mgmt'][u"cap_watts"] = None
3462+
else:
3463+
Host_dict[u'power_mgmt'][u"cap_watts"] = record.power.cap_watts
3464+
3465+
# Energy statistics
3466+
Host_dict[u'energy'] = {}
3467+
if (not record.energy or record.energy.current_watts == slurm.NO_VAL):
3468+
Host_dict[u'energy'][u'current_watts'] = 0
3469+
Host_dict[u'energy'][u'base_consumed_energy'] = 0
3470+
Host_dict[u'energy'][u'consumed_energy'] = 0
3471+
else:
3472+
Host_dict[u'energy'][u'current_watts'] = record.energy.current_watts
3473+
Host_dict[u'energy'][u'base_consumed_energy'] = int(record.energy.base_consumed_energy)
3474+
Host_dict[u'energy'][u'consumed_energy'] = int(record.energy.consumed_energy)
3475+
3476+
Host_dict[u'energy'][u'base_watts'] = record.energy.base_watts
3477+
Host_dict[u'energy'][u'previous_consumed_energy'] = int(record.energy.previous_consumed_energy)
3478+
3479+
node_state = record.node_state
3480+
if (node_state & NODE_STATE_CLOUD):
3481+
node_state &= (~NODE_STATE_CLOUD)
3482+
cloud_str = "+CLOUD"
3483+
3484+
if (node_state & NODE_STATE_COMPLETING):
3485+
node_state &= (~NODE_STATE_COMPLETING)
3486+
comp_str = "+COMPLETING"
3487+
3488+
if (node_state & NODE_STATE_DRAIN):
3489+
node_state &= (~NODE_STATE_DRAIN)
3490+
drain_str = "+DRAIN"
3491+
3492+
if (node_state & NODE_STATE_FAIL):
3493+
node_state &= (~NODE_STATE_FAIL)
3494+
drain_str = "+FAIL"
3495+
3496+
if (node_state & NODE_STATE_POWER_SAVE):
3497+
node_state &= (~NODE_STATE_POWER_SAVE)
3498+
power_str = "+POWER"
3499+
3500+
slurm.slurm_get_select_nodeinfo(record.select_nodeinfo,
3501+
SELECT_NODEDATA_SUBCNT,
3502+
NODE_STATE_ALLOCATED,
3503+
&alloc_cpus)
3504+
3505+
Host_dict[u'alloc_cpus'] = alloc_cpus
3506+
total_used -= alloc_cpus
3507+
3508+
slurm.slurm_get_select_nodeinfo(record.select_nodeinfo,
3509+
SELECT_NODEDATA_SUBCNT,
3510+
NODE_STATE_ERROR, &err_cpus)
3511+
3512+
Host_dict[u'err_cpus'] = err_cpus
3513+
total_used -= err_cpus
3514+
3515+
if (alloc_cpus and err_cpus) or (total_used and
3516+
(total_used != record.cpus)):
3517+
node_state &= NODE_STATE_FLAGS
3518+
node_state |= NODE_STATE_MIXED
3519+
3520+
Host_dict[u'state'] = (
3521+
slurm.stringOrNone(slurm.slurm_node_state_string(node_state), '') +
3522+
slurm.stringOrNone(cloud_str, '') +
3523+
slurm.stringOrNone(comp_str, '') +
3524+
slurm.stringOrNone(drain_str, '') +
3525+
slurm.stringOrNone(power_str, '')
3526+
)
35083527

3509-
slurm.slurm_get_select_nodeinfo(record.select_nodeinfo,
3510-
SELECT_NODEDATA_MEM_ALLOC,
3511-
NODE_STATE_ALLOCATED, &alloc_mem)
3528+
slurm.slurm_get_select_nodeinfo(record.select_nodeinfo,
3529+
SELECT_NODEDATA_MEM_ALLOC,
3530+
NODE_STATE_ALLOCATED, &alloc_mem)
35123531

3513-
Host_dict[u'alloc_mem'] = alloc_mem
3532+
Host_dict[u'alloc_mem'] = alloc_mem
35143533

3515-
b_name = slurm.stringOrNone(record.name, '')
3516-
self._NodeDict[b_name] = Host_dict
3534+
b_name = slurm.stringOrNone(record.name, '')
3535+
self._NodeDict[b_name] = Host_dict
3536+
3537+
if nodeID:
3538+
last_inx = i
3539+
break
3540+
3541+
slurm.slurm_free_node_info_msg(self._Node_ptr)
3542+
slurm.slurm_free_partition_info_msg(self._Part_ptr)
3543+
self._Node_ptr = NULL
3544+
self._Part_ptr = NULL
3545+
return self._NodeDict
35173546

3518-
slurm.slurm_free_node_info_msg(self._Node_ptr)
3519-
slurm.slurm_free_partition_info_msg(self._Part_ptr)
3520-
self._Node_ptr = NULL
3521-
self._Part_ptr = NULL
3522-
return self._NodeDict
3523-
else:
3524-
apiError = slurm.slurm_get_errno()
3525-
raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError)
35263547

35273548
cpdef update(self, dict node_dict):
35283549
u"""Update slurm node information.

0 commit comments

Comments
 (0)