Skip to content

Commit 8dfd125

Browse files
Pearl1594Pearl Dsilva
authored andcommitted
FR563 - Capacity recalculation on migration - considering overcommit ratio (#21)
https://shapeblue.atlassian.net/browse/FRO-916 Co-authored-by: Pearl Dsilva <pearl.dsilva@shapeblue.com>
1 parent 85a8a80 commit 8dfd125

File tree

4 files changed

+151
-46
lines changed

4 files changed

+151
-46
lines changed

engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2391,6 +2391,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
23912391
}
23922392

23932393
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, _offeringDao.findById(vm.getId(), vm.getServiceOfferingId()), null, null);
2394+
23942395
_networkMgr.prepareNicForMigration(profile, dest);
23952396
volumeMgr.prepareForMigration(profile, dest);
23962397
profile.setConfigDriveLabel(VmConfigDriveLabel.value());

server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,15 @@
3939

4040
import com.cloud.api.ApiDBUtils;
4141
import com.cloud.api.query.vo.HostJoinVO;
42+
import com.cloud.dc.ClusterDetailsDao;
4243
import com.cloud.gpu.HostGpuGroupsVO;
4344
import com.cloud.gpu.VGPUTypesVO;
4445
import com.cloud.host.Host;
4546
import com.cloud.host.HostStats;
4647
import com.cloud.host.dao.HostDetailsDao;
4748
import com.cloud.hypervisor.Hypervisor;
4849
import com.cloud.storage.StorageStats;
50+
import com.cloud.utils.NumbersUtil;
4951
import com.cloud.utils.db.GenericDaoBase;
5052
import com.cloud.utils.db.SearchBuilder;
5153
import com.cloud.utils.db.SearchCriteria;
@@ -64,6 +66,8 @@ public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements
6466
@Inject
6567
private HAConfigDao haConfigDao;
6668
@Inject
69+
protected ClusterDetailsDao _clusterDetailsDao;
70+
@Inject
6771
private OutOfBandManagementDao outOfBandManagementDao;
6872

6973
private final SearchBuilder<HostJoinVO> hostSearch;
@@ -183,7 +187,9 @@ public HostResponse newHostResponse(HostJoinVO host, EnumSet<HostDetails> detail
183187
Float cpuWithOverprovisioning = host.getCpus() * host.getSpeed() * ApiDBUtils.getCpuOverprovisioningFactor(host.getClusterId());
184188
String cpuAlloc = decimalFormat.format(((float)cpu / cpuWithOverprovisioning * 100f)) + "%";
185189
hostResponse.setCpuAllocated(cpuAlloc);
186-
hostResponse.setCpuWithOverprovisioning(cpuWithOverprovisioning.toString());
190+
final float clusterCpuOvercommitRatio = NumbersUtil.parseFloat(_clusterDetailsDao.findDetail(host.getClusterId(), "cpuOvercommitRatio").getValue(), ApiDBUtils.getCpuOverprovisioningFactor(host.getClusterId()));
191+
String cpuWithOverprovisioningStr = Float.toString(host.getCpus() * host.getSpeed() * clusterCpuOvercommitRatio);
192+
hostResponse.setCpuWithOverprovisioning(cpuWithOverprovisioningStr);
187193
}
188194

189195
if (details.contains(HostDetails.all) || details.contains(HostDetails.stats)) {
@@ -332,7 +338,9 @@ public HostForMigrationResponse newHostForMigrationResponse(HostJoinVO host, Enu
332338
Float cpuWithOverprovisioning = new Float(host.getCpus() * host.getSpeed() * ApiDBUtils.getCpuOverprovisioningFactor(host.getClusterId()));
333339
String cpuAlloc = decimalFormat.format(((float)cpu / cpuWithOverprovisioning * 100f)).toString() + "%";
334340
hostResponse.setCpuAllocated(cpuAlloc);
335-
hostResponse.setCpuWithOverprovisioning(cpuWithOverprovisioning.toString());
341+
final float clusterCpuOvercommitRatio = NumbersUtil.parseFloat(_clusterDetailsDao.findDetail(host.getClusterId(), "cpuOvercommitRatio").getValue(), ApiDBUtils.getCpuOverprovisioningFactor(host.getClusterId()));
342+
String cpuWithOverprovisioningStr = Float.toString(host.getCpus() * host.getSpeed() * clusterCpuOvercommitRatio);
343+
hostResponse.setCpuWithOverprovisioning(cpuWithOverprovisioningStr);
336344
}
337345

338346
if (details.contains(HostDetails.all) || details.contains(HostDetails.stats)) {

server/src/com/cloud/capacity/CapacityManagerImpl.java

Lines changed: 29 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import com.cloud.configuration.Config;
4949
import com.cloud.dc.ClusterDetailsDao;
5050
import com.cloud.dc.ClusterDetailsVO;
51-
import com.cloud.dc.ClusterVO;
5251
import com.cloud.dc.dao.ClusterDao;
5352
import com.cloud.deploy.DeploymentClusterPlanner;
5453
import com.cloud.event.UsageEventVO;
@@ -264,6 +263,9 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
264263
@Override
265264
public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
266265

266+
if (vm == null) {
267+
return;
268+
}
267269
final long hostId = vm.getHostId();
268270
HostVO host = _hostDao.findById(hostId);
269271
final long clusterId = host.getClusterId();
@@ -274,18 +276,22 @@ public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
274276

275277
CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU);
276278
CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY);
277-
278279
if (capacityCpu == null || capacityMem == null || svo == null) {
279280
return;
280281
}
281282

282-
final int cpu = svo.getCpu() * svo.getSpeed();
283-
final long ram = svo.getRamSize() * 1024L * 1024L;
283+
final int cpu = (int) (svo.getCpu() * svo.getSpeed());
284+
final long ram = (long) (svo.getRamSize() * 1024L * 1024L);
284285

285286
try {
286287
final long capacityCpuId = capacityCpu.getId();
287288
final long capacityMemId = capacityMem.getId();
288289

290+
// Update the over commit ratio of the VM to reflect the same value as that of the cluster to which it has been migrated to / deployed on.
291+
VMInstanceVO vmInstanceVO = _vmDao.findById(vm.getId());
292+
_userVmDetailsDao.addDetail(vmInstanceVO.getId(), "cpuOvercommitRatio", String.valueOf(cpuOvercommitRatio), true);
293+
_userVmDetailsDao.addDetail(vmInstanceVO.getId(), "memoryOvercommitRatio", String.valueOf(memoryOvercommitRatio), true);
294+
289295
Transaction.execute(new TransactionCallbackNoReturn() {
290296
@Override
291297
public void doInTransactionWithoutResult(TransactionStatus status) {
@@ -591,35 +597,19 @@ public void updateCapacityForHost(final Host host) {
591597
s_logger.debug("Found " + vms.size() + " VMs on host " + host.getId());
592598
}
593599

594-
ClusterVO cluster = _clusterDao.findById(host.getClusterId());
595-
ClusterDetailsVO clusterDetailCpu = _clusterDetailsDao.findDetail(cluster.getId(), "cpuOvercommitRatio");
596-
ClusterDetailsVO clusterDetailRam = _clusterDetailsDao.findDetail(cluster.getId(), "memoryOvercommitRatio");
597-
Float clusterCpuOvercommitRatio = Float.parseFloat(clusterDetailCpu.getValue());
598-
Float clusterRamOvercommitRatio = Float.parseFloat(clusterDetailRam.getValue());
599600
for (VMInstanceVO vm : vms) {
600601
Float cpuOvercommitRatio = 1.0f;
601602
Float ramOvercommitRatio = 1.0f;
602603
Map<String, String> vmDetails = _userVmDetailsDao.listDetailsKeyPairs(vm.getId());
603-
String vmDetailCpu = vmDetails.get("cpuOvercommitRatio");
604-
String vmDetailRam = vmDetails.get("memoryOvercommitRatio");
605-
if (vmDetailCpu != null) {
606-
//if vmDetail_cpu is not null it means it is running in a overcommited cluster.
607-
cpuOvercommitRatio = Float.parseFloat(vmDetailCpu);
608-
ramOvercommitRatio = Float.parseFloat(vmDetailRam);
609-
}
610604
ServiceOffering so = offeringsMap.get(vm.getServiceOfferingId());
611605
if (so.isDynamic()) {
612606
usedMemory +=
613-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L) / ramOvercommitRatio) *
614-
clusterRamOvercommitRatio;
607+
(Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L);
615608
usedCpu +=
616-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name()))) / cpuOvercommitRatio) *
617-
clusterCpuOvercommitRatio;
618-
usedCpuCore += Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name()));
609+
(Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name())));
619610
} else {
620-
usedMemory += ((so.getRamSize() * 1024L * 1024L) / ramOvercommitRatio) * clusterRamOvercommitRatio;
621-
usedCpu += ((so.getCpu() * so.getSpeed()) / cpuOvercommitRatio) * clusterCpuOvercommitRatio;
622-
usedCpuCore += so.getCpu();
611+
usedMemory += (so.getRamSize() * 1024L * 1024L);
612+
usedCpu += (so.getCpu() * so.getSpeed());
623613
}
624614
}
625615

@@ -632,27 +622,16 @@ public void updateCapacityForHost(final Host host) {
632622
Float ramOvercommitRatio = 1.0f;
633623
long secondsSinceLastUpdate = (DateUtil.currentGMTTime().getTime() - vm.getUpdateTime().getTime()) / 1000;
634624
if (secondsSinceLastUpdate < _vmCapacityReleaseInterval) {
635-
UserVmDetailVO vmDetailCpu = _userVmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio");
636-
UserVmDetailVO vmDetailRam = _userVmDetailsDao.findDetail(vm.getId(), "memoryOvercommitRatio");
637-
if (vmDetailCpu != null) {
638-
//if vmDetail_cpu is not null it means it is running in a overcommited cluster.
639-
cpuOvercommitRatio = Float.parseFloat(vmDetailCpu.getValue());
640-
ramOvercommitRatio = Float.parseFloat(vmDetailRam.getValue());
641-
}
642625
ServiceOffering so = offeringsMap.get(vm.getServiceOfferingId());
643626
Map<String, String> vmDetails = _userVmDetailsDao.listDetailsKeyPairs(vm.getId());
644627
if (so.isDynamic()) {
645-
reservedMemory +=
646-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L) / ramOvercommitRatio) *
647-
clusterRamOvercommitRatio;
648-
reservedCpu +=
649-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name()))) / cpuOvercommitRatio) *
650-
clusterCpuOvercommitRatio;
651-
reservedCpuCore += Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name()));
628+
reservedMemory +=
629+
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L));
630+
reservedCpu +=
631+
(Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name())));
652632
} else {
653-
reservedMemory += ((so.getRamSize() * 1024L * 1024L) / ramOvercommitRatio) * clusterRamOvercommitRatio;
654-
reservedCpu += (so.getCpu() * so.getSpeed() / cpuOvercommitRatio) * clusterCpuOvercommitRatio;
655-
reservedCpuCore += so.getCpu();
633+
reservedMemory += (so.getRamSize() * 1024L * 1024L);
634+
reservedCpu += (so.getCpu() * so.getSpeed());
656635
}
657636
} else {
658637
// signal if not done already, that the VM has been stopped for skip.counting.hours,
@@ -885,6 +864,15 @@ public boolean postStateTransitionEvent(StateMachine2.Transition<State, Event> t
885864
allocateVmCapacity(vm, fromLastHost);
886865
}
887866

867+
if (oldState == State.Migrating && newState == State.Running) {
868+
boolean fromLastHost = false;
869+
if (vm.getHostId().equals(vm.getLastHostId())) {
870+
s_logger.debug("VM starting again on the last host it was stopped on");
871+
fromLastHost = true;
872+
}
873+
allocateVmCapacity(vm, fromLastHost);
874+
}
875+
888876
if (newState == State.Stopped) {
889877
if (vm.getType() == VirtualMachine.Type.User) {
890878

test/integration/smoke/test_vm_life_cycle.py

Lines changed: 111 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
""" BVT tests for Virtual Machine Life Cycle
1818
"""
1919
# Import Local Modules
20-
from marvin.cloudstackTestCase import cloudstackTestCase
20+
from marvin.cloudstackTestCase import cloudstackTestCase, unittest
2121
from marvin.cloudstackAPI import (recoverVirtualMachine,
2222
destroyVirtualMachine,
2323
attachIso,
@@ -40,8 +40,10 @@
4040
DiskOffering)
4141
from marvin.lib.common import (get_domain,
4242
get_zone,
43-
get_template,
44-
list_hosts)
43+
list_clusters,
44+
list_hosts,
45+
list_storage_pools,
46+
get_template)
4547
from marvin.codes import FAILED, PASS
4648
from nose.plugins.attrib import attr
4749
# Import System modules
@@ -786,6 +788,112 @@ def test_10_attachAndDetach_iso(self):
786788
)
787789
return
788790

791+
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg", "security"], required_hardware="false")
792+
def test_11_livemigrate_VM_across_cluster_vmware(self):
793+
count_host = 0
794+
count_pool = 0
795+
try:
796+
self.list_vmware_clusters = list_clusters(self.apiclient, hypervisor="vmware")
797+
except Exception as e:
798+
raise unittest.SkipTest(e)
799+
if len(self.list_vmware_clusters) < 2:
800+
raise self.skipTest("The setup doesn't have more than one cluster, cannot execute live migration across cluster")
801+
if len(self.list_vmware_clusters) >= 2:
802+
for cluster in self.list_vmware_clusters:
803+
if len(list_hosts(self.apiclient, clusterid=cluster.id)) >= 1:
804+
count_host += 1
805+
pools = list_storage_pools(self.apiclient, clusterid=cluster.id)
806+
if len(pools) >= 1:
807+
count_pool += 1
808+
if count_host < 2 or count_pool < 2:
809+
raise self.skipTest("The setup doesn't have enough pools or enough hosts. To run these tests the setup must have atleast 2 clusters, \
810+
each having min 1 host and 1 storage pools")
811+
812+
# get each cluster's cpu and memory over commit ratios
813+
cluster_1_cpu_oc = self.list_vmware_clusters[0].cpuovercommitratio
814+
cluster_1_ram_oc = self.list_vmware_clusters[0].memoryovercommitratio
815+
cluster_2_cpu_oc = self.list_vmware_clusters[1].cpuovercommitratio
816+
cluster_2_ram_oc = self.list_vmware_clusters[1].memoryovercommitratio
817+
818+
if cluster_1_cpu_oc == cluster_2_cpu_oc and cluster_1_ram_oc == cluster_2_ram_oc:
819+
raise self.skipTest("The 2 clusters have same memory and cpu over commit ratios, skipping test")
820+
821+
hosts_c1 = Host.list(
822+
self.apiclient,
823+
zoneid=self.zone.id,
824+
type='Routing',
825+
clusterid=self.list_vmware_clusters[0]
826+
)
827+
828+
hosts_c2 = Host.list(
829+
self.apiclient,
830+
zoneid=self.zone.id,
831+
type='Routing',
832+
clusterid=self.list_vmware_clusters[1]
833+
)
834+
835+
target_host = hosts_c1[0]
836+
migrate_host = hosts_c2[0]
837+
838+
self.vm_to_migrate = VirtualMachine.create(
839+
self.apiclient,
840+
self.services["small"],
841+
accountid=self.account.name,
842+
domainid=self.account.domainid,
843+
serviceofferingid=self.small_offering.id,
844+
mode=self.services["mode"],
845+
hostid=target_host.id
846+
)
847+
self.debug("Migrating VM-ID: %s to Host: %s" % (
848+
self.vm_to_migrate.id,
849+
migrate_host.id
850+
))
851+
852+
self.cleanup.append(self.vm_to_migrate)
853+
854+
try:
855+
self.vm_to_migrate.migrate(self.apiclient, migrate_host.id)
856+
except Exception as e:
857+
self.fail("Failed to migrate instance: %s" % e)
858+
859+
# verify if the overcommit ratios are modified
860+
qresultset = self.dbclient.execute(
861+
"select value from user_vm_details where vm_id = '%s and name = 'cpuOvercommitRatio';" \
862+
% self.vm_to_migrate.id
863+
)
864+
self.assertNotEqual(
865+
len(qresultset),
866+
0,
867+
"Check DB Query result set"
868+
)
869+
self.assertEqual(
870+
isinstance(qresultset, list),
871+
True,
872+
"Check DB query result set for valid data"
873+
)
874+
qresult = str(qresultset)
875+
self.debug("Query result: %s" % qresult)
876+
877+
self.assertEqual(qresultset[0][0], cluster_2_cpu_oc, "VM's cpu over-commit ratio not updated on migration")
878+
879+
qresultset = self.dbclient.execute(
880+
"select value from user_vm_details where vm_id = '%s and name = 'memoryOvercommitRatio';" \
881+
% self.vm_to_migrate.id
882+
)
883+
self.assertNotEqual(
884+
len(qresultset),
885+
0,
886+
"Check DB Query result set"
887+
)
888+
self.assertEqual(
889+
isinstance(qresultset, list),
890+
True,
891+
"Check DB query result set for valid data"
892+
)
893+
qresult = str(qresultset)
894+
self.debug("Query result: %s" % qresult)
895+
896+
self.assertEqual(qresultset[0][0], cluster_2_ram_oc, "VM's memory over-commit ratio not updated on migration")
789897

790898
class TestSecuredVmMigration(cloudstackTestCase):
791899

0 commit comments

Comments
 (0)