Skip to content

Latest commit

 

History

History
220 lines (218 loc) · 8.91 KB

CostManagement.md

File metadata and controls

220 lines (218 loc) · 8.91 KB

Kusto query

let queryStartTime = ago(21600000ms);
let queryEndTime = now();
let tollerance = 1;
let tonullneg1 = (arg0: real) { iff(arg0 == -1., real(null), arg0) };
let NODE_LIMITS = Perf
    | where TimeGenerated > ago(10m)
    | where CounterName == "memoryAllocatableBytes" or CounterName == "cpuAllocatableNanoCores"
    | as T
    | where CounterName == "memoryAllocatableBytes"
    | summarize memoryAllocatableBytes = any(CounterValue) by Computer
    | join (
        T 
        | where CounterName == "cpuAllocatableNanoCores" 
        | summarize cpuAllocatableNanoCores = any(CounterValue) by Computer
        )
        on Computer
    | project memoryAllocatableBytes, cpuAllocatableNanoCores, Computer;
//
let perfdata = materialize(Perf
    | project
        TimeGenerated,
        ObjectName,
        InstanceName,
        _ResourceId,
        CounterName,
        CounterValue,
        Computer
    | where TimeGenerated >= queryStartTime and TimeGenerated <= queryEndTime
    | where ObjectName == 'K8SContainer'
    | where ((CounterName == 'memoryLimitBytes' or CounterName == 'memoryRequestBytes' or CounterName == 'cpuLimitNanoCores' or CounterName == 'cpuRequestNanoCores') and TimeGenerated > (queryEndTime - 1h))
        or CounterName == 'memoryRssBytes'
        or CounterName == 'cpuUsageNanoCores'
    | extend ClusterName = tostring(iff(InstanceName contains '/providers/microsoft.containerservice/managedclusters', split(InstanceName, '/')[8], iff(InstanceName contains '/subscriptions/', split(InstanceName, '/')[4], split(InstanceName, '/')[0])))
    | extend PodUid = tostring(iff(InstanceName contains '/providers/microsoft.containerservice/managedclusters', split(InstanceName, '/')[9], iff(InstanceName contains '/subscriptions/', split(InstanceName, '/')[5], split(InstanceName, '/')[1])))
    | extend ContainerName = tostring(iff(InstanceName contains '/providers/microsoft.containerservice/managedclusters', split(InstanceName, '/')[10], iff(InstanceName contains '/subscriptions/', split(InstanceName, '/')[6], split(InstanceName, '/')[2])))
    | where isnotempty(PodUid)
    | extend ResourceName = strcat(ClusterName, '/', PodUid, '/', ContainerName)
    | join kind = inner NODE_LIMITS on Computer
    | join kind = inner (
        KubePodInventory
        | where TimeGenerated >= queryEndTime - 10m and TimeGenerated <= queryEndTime
        | summarize any(Name, Namespace, ControllerName, ControllerKind, ClusterId) by PodUid
        | project
            PodName = any_Name,
            ControllerNameMaybe = any_ControllerName,
            ControllerKindMaybe = any_ControllerKind,
            Namespace = any_Namespace,
            PodUid
        )
        on PodUid
    | project-away PodUid1
    // not all pods have controllers
    | extend ControllerName = iff(isnull(ControllerNameMaybe) or ControllerNameMaybe == "", strcat(PodName, " (dummy value)"), ControllerNameMaybe)
    | extend ControllerKind = iff(isnull(ControllerKindMaybe) or ControllerKindMaybe == "", "single pod (dummy value)", ControllerKindMaybe)
    | project-away ControllerKindMaybe, ControllerNameMaybe
    //
    | extend hasMemLimit = iff(CounterValue == memoryAllocatableBytes, -1., CounterValue) // no limit check
    | extend hasMemRequest = iff(CounterValue == memoryAllocatableBytes, -1., CounterValue) // no request check
    | extend hasCpuRequest = iff(CounterValue == cpuAllocatableNanoCores, -1., CounterValue) // no request check
    | extend hasCpuLimit = iff(CounterValue == cpuAllocatableNanoCores, -1., CounterValue) // no limit check
    | summarize measurement_counts = count(), _max = max(CounterValue), p90=percentile(CounterValue, 90), p99=percentile(CounterValue, 99), measurementEndTime = max(TimeGenerated), measurementStartTime = min(TimeGenerated),
        hasMemLimit=min(hasMemLimit), hasMemRequest=min(hasMemRequest), hasCpuRequest=min(hasCpuRequest), hasCpuLimit=min(hasCpuLimit)
        by
        ClusterName,
        Namespace,
        ControllerName,
        ControllerKind,
        ContainerName,
        CounterName
    );
//
perfdata
| where CounterName == 'memoryLimitBytes'
| project
    memLimitCount = measurement_counts,
    memLimitVal = tonullneg1(hasMemLimit),
    ClusterName,
    Namespace,
    ControllerName,
    ControllerKind,
    ContainerName
//
| join kind = fullouter (
    perfdata
    | where CounterName == 'memoryRequestBytes'
    | project
        memRequestCount = measurement_counts,
        memRequestVal = tonullneg1(hasMemRequest),
        ClusterName,
        Namespace,
        ControllerName,
        ControllerKind,
        ContainerName
    )
    on ClusterName, Namespace, ControllerName, ControllerKind, ContainerName
| project-away ClusterName1, Namespace1, ControllerName1, ControllerKind1, ContainerName1
//
| join kind = fullouter (
    perfdata
    | where CounterName == 'memoryRssBytes'
    | project
        mem_measurement_counts = measurement_counts,
        mem_max = _max,
        mem_p90=p90,
        mem_p99=p99,
        measurementEndTime = measurementEndTime,
        measurementStartTime = measurementStartTime,
        ClusterName,
        Namespace,
        ControllerName,
        ControllerKind,
        ContainerName
    )
    on ClusterName, Namespace, ControllerName, ControllerKind, ContainerName
| project-away ClusterName1, Namespace1, ControllerName1, ControllerKind1, ContainerName1
//
| join kind = fullouter (
    perfdata
    | where CounterName == 'cpuRequestNanoCores'
    | project
        cpuRequestCount = measurement_counts,
        cpuRequestVal = tonullneg1(hasCpuRequest),
        ClusterName,
        Namespace,
        ControllerName,
        ControllerKind,
        ContainerName
    )
    on ClusterName, Namespace, ControllerName, ControllerKind, ContainerName
| project-away ClusterName1, Namespace1, ControllerName1, ControllerKind1, ContainerName1
//
| join kind = fullouter (
    perfdata
    | where CounterName == 'cpuLimitNanoCores'
    | project
        cpuLimitCount = measurement_counts,
        cpuLimitVal = tonullneg1(hasCpuLimit),
        ClusterName,
        Namespace,
        ControllerName,
        ControllerKind,
        ContainerName
    )
    on ClusterName, Namespace, ControllerName, ControllerKind, ContainerName
| project-away ClusterName1, Namespace1, ControllerName1, ControllerKind1, ContainerName1
//
| join kind = fullouter (
    perfdata
    | where CounterName == 'cpuUsageNanoCores'
    | project
        cpu_measurement_counts = measurement_counts,
        cpu_max = _max,
        cpu_p90=p90,
        cpu_p99=p99,
        ClusterName,
        Namespace,
        ControllerName,
        ControllerKind,
        ContainerName
    )
    on ClusterName, Namespace, ControllerName, ControllerKind, ContainerName
| project-away ClusterName1, Namespace1, ControllerName1, ControllerKind1, ContainerName1
//
| where cpuLimitCount > 2
    and memLimitCount > 2
    and mem_measurement_counts >= 1
    and cpu_measurement_counts >= 1 // ensure there are enough measurements
| extend suggestedMemRequest = mem_p99 * 1.5
| extend suggestedMemLimit = mem_p99 * 3
| extend suggestedCpuRequest = cpu_p99 * 1.5 + 5
| extend suggestedCpuLimit = cpu_p99 * 3 + 5
| extend diffMemRequest = abs(suggestedMemRequest - memRequestVal) / suggestedMemRequest
| extend diffMemLimit = abs(suggestedMemLimit - memLimitVal) / suggestedMemLimit
| extend diffCpuRequest = abs(suggestedCpuRequest - cpuRequestVal) / suggestedCpuRequest
| extend diffCpuLimit = abs(suggestedCpuLimit - cpuLimitVal) / suggestedCpuLimit
| extend distAboveTolerance = max_of(diffMemRequest / tollerance, diffMemLimit / tollerance, diffCpuRequest / tollerance, diffCpuLimit / tollerance)
| extend hasnulls = (isnull(cpuRequestVal) or isnull(cpuLimitVal) or isnull(memRequestVal) or isnull(memLimitVal))
| extend colorKey = iff(hasnulls, real(null), log10(distAboveTolerance))
| where (not(hasnulls) and ("all" == "set" or "all" == "all")) or (hasnulls and ("all" == "notset" or "all" == "all"))
| extend containerKey = base64_encode_tostring(strcat(ClusterName, "/", Namespace, "/", ControllerKind, "/", ControllerName, "/", ContainerName))
| project
    ClusterName,
    Namespace,
    ControllerName,
    ControllerKind,
    ContainerName,
    memRequestVal,
    memLimitVal,
    mem_p90,
    mem_p99,
    mem_max,
    cpuRequestVal,
    cpuLimitVal,
    cpu_p90,
    cpu_p99,
    cpu_max,
    suggestedMemRequest,
    suggestedMemLimit,
    suggestedCpuRequest,
    suggestedCpuLimit,
    distAboveTolerance,
    colorKey,
    containerKey
//
| extend
    memRequestVal_final = iff(isnull(memRequestVal), -1.0, memRequestVal),
    memLimitVal_final = iff(isnull(memLimitVal), -1.0, memLimitVal),
    cpuRequestVal_final = iff(isnull(cpuRequestVal), -1.0, cpuRequestVal),
    cpuLimitVal_final = iff(isnull(cpuLimitVal), -1.0, cpuLimitVal)
| project-away memRequestVal, memLimitVal, cpuRequestVal, cpuLimitVal
| project-rename
    memRequestVal = memRequestVal_final,
    memLimitVal = memLimitVal_final,
    cpuRequestVal = cpuRequestVal_final,
    cpuLimitVal = cpuLimitVal_final