tencent cloud

文档反馈

容器监控图表指标

最后更新时间:2024-08-07 21:55:37

    集群监控概览

    图表名称
    查询语句
    使用的指标
    配置文件
    CPU Requests Commitment
    sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"})
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    kube_node_status_allocatable_cpu_cores
    kube-state-metrics
    CPU Limits Commitment
    sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"})
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    kube_node_status_allocatable_cpu_cores
    kube-state-metrics
    Memory Utilisation
    1 - sum(:node_memory_MemAvailable_bytes:sum{cluster="$cluster"}) / sum(node_memory_MemTotal_bytes{cluster="$cluster"})
    node_memory_MemAvailable_bytes
    node-exporter
    node_memory_MemTotal_bytes
    node-exporter
    Memory Requests Commitment
    sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"})
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    kube_node_status_allocatable_memory_bytes
    kube-state-metrics
    Memory Limits Commitment
    sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"})
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    kube_node_status_allocatable_memory_bytes
    kube-state-metrics
    Node Count
    count(kube_node_info{cluster="$cluster"})
    kube_node_info
    kube-state-metrics
    Pod Count
    count(kube_pod_info{cluster="$cluster"})
    kube_pod_info
    kube-state-metrics
    Node Request CPU Average Percent
    avg(sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_cpu_cores{cluster="$cluster"})by(node))
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    kube_node_status_capacity_cpu_cores
    kube-state-metrics
    Node Request Memory Average Percent
    avg(sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_memory_bytes{cluster="$cluster"})by(node))
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    kube_node_status_capacity_memory_bytes
    kube-state-metrics
    API Server Success Request Percent
    sum(irate(apiserver_request_total{cluster="$cluster",code=~"20.*",verb=~"GET|LIST"}[5m]))/sum(irate(apiserver_request_total{cluster="$cluster",verb=~"GET|LIST"}[5m]))
    apiserver_request_total
    kube-apiserver
    apiserver_request_total
    kube-apiserver
    Namespace Overview
    count(kube_pod_info{cluster="$cluster"}) by (namespace)
    kube_pod_info
    kube-state-metrics
    count(kube_service_info{cluster="$cluster"}) by(namespace)
    kube_service_info
    kube-state-metrics
    count(kube_pod_container_info{cluster="$cluster"}) by(namespace)
    kube_pod_container_info
    kube-state-metrics
    count(kube_configmap_info{cluster="$cluster"}) by(namespace)
    kube_configmap_info
    kube-state-metrics
    count(kube_secret_info{cluster="$cluster"}) by(namespace)
    kube_secret_info
    kube-state-metrics
    count(kube_deployment_created{cluster="$cluster"}) by (namespace)
    kube_deployment_created
    kube-state-metrics
    count(kube_statefulset_created{cluster="$cluster"}) by (namespace)
    kube_statefulset_created
    kube-state-metrics
    count(kube_job_created{cluster="$cluster"}) by (namespace)
    kube_job_created
    kube-state-metrics
    count(kube_cronjob_created{cluster="$cluster"}) by (namespace)
    kube_cronjob_created
    kube-state-metrics
    count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace) - (count(kube_pod_status_phase{cluster="$cluster",phase="Succeeded"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace)
    kube_pod_status_ready
    kube-state-metrics
    kube_pod_status_phase
    kube-state-metrics
    kube_pod_status_ready
    kube-state-metrics
    count(kube_deployment_status_replicas_ready{cluster="$cluster"}<kube_deployment_spec_replicas{cluster="$cluster"}) by (namespace)
    kube_deployment_status_replicas_ready
    kube-state-metrics
    kube_deployment_spec_replicas
    kube-state-metrics
    count(kube_statefulset_status_replicas_ready{cluster="$cluster"}<kube_statefulset_replicas{cluster="$cluster"}) by (namespace)
    kube_statefulset_status_replicas_ready
    kube-state-metrics
    kube_statefulset_replicas
    kube-state-metrics
    count(kube_daemonset_status_number_unavailable{cluster="$cluster"}>0)by(namespace)
    kube_daemonset_status_number_unavailable
    kube-state-metrics
    count(kube_job_status_failed{cluster="$cluster"} == 1) by (namespace)
    kube_job_status_failed
    kube-state-metrics
    count(kube_daemonset_created{cluster="$cluster"}) by (namespace)
    kube_daemonset_created
    kube-state-metrics
    count(kube_persistentvolumeclaim_info{cluster="$cluster"}) by (namespace)
    kube_persistentvolumeclaim_info
    kube-state-metrics
    CPU Usage
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    CPU Quota
    sum(kube_pod_owner{cluster="$cluster"}) by (namespace)
    kube_pod_owner
    kube-state-metrics
    count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace)
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    Memory Usage (working_set)
    sum(container_memory_working_set_bytes{cluster="$cluster", container!="", container!="POD"}) by (namespace)
    container_memory_working_set_bytes
    cadvisor
    Memory Requests
    sum(kube_pod_owner{cluster="$cluster"}) by (namespace)
    kube_pod_owner
    kube-state-metrics
    count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace)
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace)
    container_memory_rss
    cadvisor
    sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace)
    container_memory_rss
    cadvisor
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace)
    container_memory_rss
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Node Memory Usage (Top 10)
    sum(label_replace(topk(10, 1-(node_memory_MemAvailable_bytes{cluster="$cluster"} / node_memory_MemTotal_bytes{cluster="$cluster"})), "node_ip", "$1", "instance", "(.*)"))by(node_ip)
    node_memory_MemAvailable_bytes
    node-exporter
    node_memory_MemTotal_bytes
    node-exporter
    Node CPU Usage (Top 10)
    topk(10, sum(label_replace(1 - sum(rate(node_cpu_seconds_total{cluster="$cluster",mode="idle"}[1m])) by (instance) / sum(rate(node_cpu_seconds_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
    node_cpu_seconds_total
    node-exporter
    node_cpu_seconds_total
    node-exporter
    Node Disk Usage (Top 10)
    topk(10, sum(label_replace(1-node_filesystem_free_bytes{cluster="$cluster",mountpoint="/"}/node_filesystem_size_bytes{cluster="$cluster",mountpoint="/",fstype!="rootfs"},"host_ip","$1","instance","(.*)"))by(host_ip))
    node_filesystem_free_bytes
    node-exporter
    Node Network In (Top 10)
    topk(10, sum(label_replace(max(irate(node_network_receive_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
    node_network_receive_bytes_total
    node-exporter
    Node Network Out (Top 10)
    topk(10, sum(label_replace(max(irate(node_network_transmit_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
    node_network_transmit_bytes_total
    node-exporter
    Node Sockets Count(Top 10)
    topk(10, sum(label_replace(max(node_sockstat_TCP_alloc{cluster="$cluster"}) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
    node_sockstat_TCP_alloc
    node-exporter
    Container Memory Usage(Top10)
    topk(10, sum (container_memory_working_set_bytes{cluster="$cluster",container !="",container!="POD"}) by (container))
    container_memory_working_set_bytes
    cadvisor
    Container Memory Usage/Limit(Top10)
    topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace))
    container_memory_working_set_bytes
    cadvisor
    container_spec_memory_limit_bytes
    cadvisor
    Container CPU Usage(Top10)
    topk(10, sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",container !="",container!="POD"}[2m])) by (container))
    container_cpu_usage_seconds_total
    cadvisor
    Container Network
    topk(10, sum(irate(container_network_receive_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod))
    container_network_receive_bytes_total
    cadvisor
    -topk(10, sum(irate(container_network_transmit_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod))
    container_network_transmit_bytes_total
    cadvisor
    Container Memory Usage/Limit (Top 10)
    topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace))
    container_memory_working_set_bytes
    cadvisor
    container_spec_memory_limit_bytes
    cadvisor
    Container CPU Usage (Top 10)
    topk(10, sum(irate(container_cpu_usage_seconds_total{cluster="$cluster",container!="",container!="POD"}[1m])) by (container,pod,namespace)or on() vector(0))
    container_cpu_usage_seconds_total
    cadvisor
    Container Socket Count(Top 10)
    topk(10, sum(container_sockets{cluster="$cluster",container!=""}) by (container,pod,namespace)or on() vector(0))
    container_sockets
    cadvisor

    集群 Namespace 大盘

    图表名称
    查询语句
    使用的指标
    配置文件
    CPU Usage
    sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))
    container_cpu_usage_seconds_total
    cadvisor
    CPU Usage/Request(%)
    sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU Usage/Limit(%)
    sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) or on() vector(0)
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    CPU Request
    sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU Limit
    sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    Cluster Available
    sum(sum(kube_node_status_capacity{resource="cpu",cluster="$cluster",namespace=~"$namespace"}) by (node) + sum(kube_node_spec_unschedulable{cluster="$cluster",namespace=~"$namespace"}==0) by(node))
    kube_node_status_capacity
    kube-state-metrics
    kube_node_spec_unschedulable
    kube-state-metrics
    StatefulSet Created
    count(kube_statefulset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_statefulset_created
    kube-state-metrics
    Pod Created
    count(kube_pod_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_pod_info
    kube-state-metrics
    Containers
    count(kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_pod_container_info
    kube-state-metrics
    DaemonSet Created
    count(kube_daemonset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_daemonset_created
    kube-state-metrics
    Job Created
    count(kube_job_info{cluster="$cluster",namespace="$namespace"})or on() vector(0)
    kube_job_info
    kube-state-metrics
    Job Active
    count(kube_job_status_active{cluster="$cluster",namespace="$namespace"}==1)or on() vector(0)
    kube_job_status_active
    kube-state-metrics
    Cron Job Created
    count(kube_cronjob_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_cronjob_created
    kube-state-metrics
    Cron Job Active
    count(kube_cronjob_status_active{cluster="$cluster",namespace="$namespace"}==1) or on() vector(0)
    kube_cronjob_status_active
    kube-state-metrics
    Unbound PVC
    count(kube_persistentvolumeclaim_status_phase{phase!="Bound", cluster="$cluster",namespace="$namespace"}==1) or on() vector(0)
    kube_persistentvolumeclaim_status_phase
    kube-state-metrics
    PersistentVolumeClaim Created
    count(kube_persistentvolumeclaim_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_persistentvolumeclaim_info
    kube-state-metrics
    Service Created
    count(kube_service_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_service_info
    kube-state-metrics
    LoadBalancer Created
    count(kube_service_spec_type{type="LoadBalancer", cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_service_spec_type
    kube-state-metrics
    Ingress Created
    count(kube_ingress_info{cluster="$cluster",namespace="$namespace"})or on() vector(0)
    kube_ingress_info
    kube-state-metrics
    ConfigMap Created
    count(kube_configmap_info{cluster="$cluster",namespace="$namespace"})
    kube_configmap_info
    kube-state-metrics
    Secret Created
    count(kube_secret_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_secret_info
    kube-state-metrics
    PVC Storage Requests Total
    sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_persistentvolumeclaim_resource_requests_storage_bytes
    kube-state-metrics
    Pod NotReady
    count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace) - (count(kube_pod_status_phase{phase="Succeeded", cluster="$cluster",namespace="$namespace"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace)
    kube_pod_status_ready
    kube-state-metrics
    kube_pod_status_phase
    kube-state-metrics
    kube_pod_status_ready
    kube-state-metrics
    Pod UnSchedulable
    count(kube_pod_status_unschedulable{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
    kube_pod_status_unschedulable
    kube-state-metrics
    Deployment NotReady
    count(sum(kube_deployment_status_replicas_ready{cluster="$cluster",namespace="$namespace"}) by (deployment)<sum(kube_deployment_spec_replicas{cluster="$cluster",namespace="$namespace"}) by (deployment)) or on() vector(0)
    kube_deployment_status_replicas_ready
    kube-state-metrics
    kube_deployment_spec_replicas
    kube-state-metrics
    Daemonset NotReady
    count(kube_daemonset_status_number_unavailable{cluster="$cluster",namespace="$namespace"}>0) or on() vector(0)
    kube_daemonset_status_number_unavailable
    kube-state-metrics
    Job Failed
    count(kube_job_status_failed{cluster="$cluster",namespace="$namespace"} == 1)
    kube_job_status_failed
    kube-state-metrics
    CPU Usage
    sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m])) or on() vector(0)
    container_cpu_usage_seconds_total
    cadvisor
    CPU Quota
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    Memory Usage
    sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}) or on() vector(0)
    container_memory_working_set_bytes
    cadvisor
    Memory Usage/Request(%)
    sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0)
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Memory Usage/Limit(%)
    sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0)
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Memory Request
    sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"})
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Memory Limit
    sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"})
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Cluster Available
    sum(sum(kube_node_status_capacity{resource="memory"}) by (node) + sum(kube_node_spec_unschedulable==0) by(node)) or on() vector(0)
    kube_node_status_capacity
    kube-state-metrics
    kube_node_spec_unschedulable
    kube-state-metrics
    Memory Usage (w/o cache)
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}) by (pod)
    container_memory_working_set_bytes
    cadvisor
    scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})
    kube_resourcequota
    kube-state-metrics
    scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})
    kube_resourcequota
    kube-state-metrics
    Memory Quota
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
    container_memory_working_set_bytes
    cadvisor
    sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}) by (pod)
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod)
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    sum(container_memory_rss{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
    container_memory_rss
    cadvisor
    sum(container_memory_cache{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
    container_memory_cache
    cadvisor
    sum(container_memory_swap{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
    container_memory_swap
    cadvisor
    Containers
    group by (image, container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"})
    kube_pod_container_info
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace"})
    kube_pod_container_info
    kube-state-metrics
    kube_pod_container_status_running
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace"})
    kube_pod_container_info
    kube-state-metrics
    kube_pod_container_status_restarts_total
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(irate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[1m])) by (pod,container)
    kube_pod_container_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max(container_spec_cpu_quota{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000 > 0) by (container,pod)))
    kube_pod_container_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    container_spec_cpu_quota
    cadvisor
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"})
    kube_pod_container_info
    kube-state-metrics
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"})))
    kube_pod_container_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace"})
    kube_pod_container_info
    kube-state-metrics
    kube_pod_container_resource_limits
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container)
    kube_pod_container_info
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max(container_spec_memory_limit_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod, container) < 1)
    kube_pod_container_info
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    container_spec_memory_limit_bytes
    cadvisor
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"})
    kube_pod_container_info
    kube-state-metrics
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))
    kube_pod_container_info
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics

    API Server(独立集群)

    图表名称
    查询语句
    使用的指标
    配置文件
    Availability > 99.000%
    1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m]))
    apiserver_request_duration_seconds_count
    kube-apiserver
    apiserver_request_duration_seconds_bucket
    kube-apiserver
    apiserver_request_total
    kube-apiserver
    ErrorBudget > 99.000%
    100 * (1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m])) -0.990000)
    apiserver_request_duration_seconds_count
    kube-apiserver
    apiserver_request_duration_seconds_bucket
    kube-apiserver
    apiserver_request_total
    kube-apiserver
    Read Availability
    1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET", cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET", cluster="$cluster"}[5m]))
    apiserver_request_duration_seconds_count
    kube-apiserver
    apiserver_request_duration_seconds_bucket
    kube-apiserver
    apiserver_request_total
    kube-apiserver
    Read SLI - Requests
    sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))
    apiserver_request_total
    kube-apiserver
    Read SLI - Errors
    sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..",cluster="$cluster"}[5m]))/ sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))
    apiserver_request_total
    kube-apiserver
    Read SLI - Duration
    histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))) > 0
    apiserver_request_duration_seconds_bucket
    kube-apiserver
    Write Availability
    1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m]))
    apiserver_request_duration_seconds_count
    kube-apiserver
    apiserver_request_duration_seconds_bucket
    kube-apiserver
    apiserver_request_total
    kube-apiserver
    Write SLI - Requests
    sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))
    apiserver_request_total
    kube-apiserver
    Write SLI - Errors
    sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..",cluster="$cluster"}[5m]))/ sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))
    apiserver_request_total
    kube-apiserver
    Write SLI - Duration
    histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))) > 0
    apiserver_request_duration_seconds_bucket
    kube-apiserver
    Work Queue Add Rate
    sum(rate(workqueue_adds_total{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name)
    workqueue_adds_total
    kubelet
    Work Queue Depth
    sum(rate(workqueue_depth{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name)
    workqueue_depth
    kubelet
    Work Queue Latency
    histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name, le))
    workqueue_queue_duration_seconds_bucket
    kubelet
    Memory
    process_resident_memory_bytes{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}
    process_resident_memory_bytes
    node-exporter
    CPU usage
    rate(process_cpu_seconds_total{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}[5m])
    process_cpu_seconds_total
    node-exporter

    Controller Manager(独立集群)

    图表名称
    查询语句
    使用的指标
    配置文件
    Up
    sum(up{cluster=~"$cluster",job="kube-controller-manager"})
    up
    kubelet
    Work Queue Add Rate
    sum(rate(workqueue_adds_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name)
    workqueue_adds_total
    kubelet
    Work Queue Depth
    sum(rate(workqueue_depth{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name)
    workqueue_depth
    kubelet
    Work Queue Latency
    histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name, le))
    workqueue_queue_duration_seconds_bucket
    kubelet
    Kube API Request Rate
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"2.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"3.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"4.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"5.."}[5m]))
    rest_client_requests_total
    kubelet
    Post Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Get Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Memory
    process_resident_memory_bytes{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}
    process_resident_memory_bytes
    node-exporter
    CPU usage
    rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}[5m])
    process_cpu_seconds_total
    node-exporter

    Kubelet

    图表名称
    查询语句
    使用的指标
    配置文件
    Up
    sum(up{cluster="$cluster", job="kubelet"})
    up
    kubelet
    Running Pods
    sum(kubelet_running_pods{cluster="$cluster", job="kubelet", instance=~"$instance"})
    kubelet_running_pods
    kubelet
    Running Container
    sum(kubelet_running_containers{cluster="$cluster", job="kubelet", instance=~"$instance"})
    kubelet_running_containers
    kubelet
    Actual Volume Count
    sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance", state="actual_state_of_world"})
    volume_manager_total_volumes
    kubelet
    Desired Volume Count
    sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance",state="desired_state_of_world"})
    volume_manager_total_volumes
    kubelet
    Config Error Count
    sum(rate(kubelet_node_config_error{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m]))
    kubelet_node_config_error
    kubelet
    Operation Rate
    sum(rate(kubelet_runtime_operations_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (operation_type, instance)
    kubelet_runtime_operations_total
    kubelet
    Operation Error Rate
    sum(rate(kubelet_runtime_operations_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type)
    kubelet_runtime_operations_errors_total
    kubelet
    Operation duration 99th quantile
    histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type, le))
    kubelet_runtime_operations_duration_seconds_bucket
    kubelet
    Pod Start Rate
    sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance)
    kubelet_pod_start_duration_seconds_count
    kubelet
    sum(rate(kubelet_pod_worker_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance)
    kubelet_pod_worker_duration_seconds_count
    kubelet
    Pod Start Duration
    histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
    kubelet_pod_start_duration_seconds_count
    kubelet
    histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
    kubelet_pod_worker_duration_seconds_bucket
    kubelet
    Storage Operation Rate
    sum(rate(storage_operation_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin)
    storage_operation_duration_seconds_count
    kubelet
    Storage Operation Error Rate
    sum(rate(storage_operation_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin)
    storage_operation_errors_total
    kubelet
    Storage Operation Duration 99th quantile
    histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin, le))
    storage_operation_duration_seconds_bucket
    kubelet
    Cgroup manager operation rate
    sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type)
    kubelet_cgroup_manager_duration_seconds_count
    kubelet
    Cgroup manager 99th quantile
    histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type, le))
    kubelet_cgroup_manager_duration_seconds_bucket
    kubelet
    PLEG relist rate
    sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance)
    kubelet_pleg_relist_duration_seconds_count
    kubelet
    PLEG relist interval
    histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
    kubelet_pleg_relist_interval_seconds_bucket
    kubelet
    PLEG relist duration
    histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
    kubelet_pleg_relist_duration_seconds_bucket
    kubelet
    RPC Rate
    sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"2.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"3.."}[5m]))
    rest_client_requests_total
    kubelet
    Request duration 99th quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster="$cluster",job="kubelet", instance=~"$instance"}[5m])) by (instance, verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Memory
    process_resident_memory_bytes{cluster="$cluster",job="kubelet",instance=~"$instance"}
    process_resident_memory_bytes
    node-exporter
    CPU usage
    rate(process_cpu_seconds_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])
    process_cpu_seconds_total
    node-exporter
    Goroutines
    go_goroutines{cluster="$cluster",job="kubelet",instance=~"$instance"}
    go_goroutines
    node-exporter

    Proxy(非默认安装组件)

    图表名称
    查询语句
    使用的指标
    配置文件
    Up
    sum(up{job="kube-proxy"})
    up
    kubelet
    Rules Sync Rate
    sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m]))
    kubeproxy_sync_proxy_rules_duration_seconds_count
    kube-proxy
    Rule Sync Latency 99th Quantile
    histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m]))
    kubeproxy_sync_proxy_rules_duration_seconds_bucket
    kube-proxy
    Network Programming Rate
    sum(rate(kubeproxy_network_programming_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m]))
    kubeproxy_network_programming_duration_seconds_count
    kube-proxy
    Network Programming Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m])) by (instance, le))
    kubeproxy_network_programming_duration_seconds_bucket
    kube-proxy
    Kube API Request Rate
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m]))
    rest_client_requests_total
    kubelet
    Post Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Kube API Request Rate
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m]))
    rest_client_requests_total
    kubelet
    Post Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Get Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Memory
    process_resident_memory_bytes{job="kube-proxy",instance=~"$instance"}
    process_resident_memory_bytes
    node-exporter
    CPU usage
    rate(process_cpu_seconds_total{job="kube-proxy",instance=~"$instance"}[5m])
    process_cpu_seconds_total
    node-exporter

    Scheduler(独立集群)

    图表名称
    查询语句
    使用的指标
    配置文件
    Up
    sum(up{cluster=~"$cluster", job="kube-scheduler"})
    up
    kubelet
    Kube API Request Rate
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"2.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"3.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"4.."}[5m]))
    rest_client_requests_total
    kubelet
    sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"5.."}[5m]))
    rest_client_requests_total
    kubelet
    Post Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Get Request Latency 99th Quantile
    histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
    rest_client_request_duration_seconds_bucket
    kubelet
    Memory
    process_resident_memory_bytes{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}
    process_resident_memory_bytes
    node-exporter
    CPU usage
    rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}[5m])
    process_cpu_seconds_total
    node-exporter

    集群节点监控详情

    图表名称
    查询语句
    使用的指标
    配置文件
    服务器资源总览表
    node_uname_info{job=~"$job", cluster=~"$cluster"} - 0
    node_uname_info
    node-exporter
    node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - 0
    node_memory_MemTotal_bytes
    node-exporter
    count(node_cpu_seconds_total{job=~"$job",mode='system',cluster=~"$cluster"}) by (instance)
    node_cpu_seconds_total
    node-exporter
    sum(time() - node_boot_time_seconds{job=~"$job",cluster=~"$cluster"})by(instance)
    node_boot_time_seconds
    node-exporter
    max((node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}) *100/(node_filesystem_avail_bytes {job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}+(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"})))by(instance)
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_avail_bytes
    node-exporter
    node_filesystem_free_bytes
    node-exporter
    (1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100
    node_cpu_seconds_total
    node-exporter
    (1 - (node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})))* 100
    node_memory_MemAvailable_bytes
    node-exporter
    node_memory_MemTotal_bytes
    node-exporter
    node_load5{job=~"$job",cluster=~"$cluster"}
    node_load5
    node-exporter
    max(irate(node_disk_written_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])) by (instance)
    node_disk_written_bytes_total
    node-exporter
    max(irate(node_network_receive_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance)
    node_network_receive_bytes_total
    node-exporter
    max(irate(node_network_transmit_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance)
    node_network_transmit_bytes_total
    node-exporter
    node_load5{job=~"$job",cluster=~"$cluster"}
    node_load5
    node-exporter
    整体总负载与整体平均 CPU 使用率
    count(node_cpu_seconds_total{job=~"$job",cluster=~"$cluster", mode='system'})
    node_cpu_seconds_total
    node-exporter
    sum(node_load5{job=~"$job",cluster=~"$cluster"})
    node_load5
    node-exporter
    avg(1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100
    node_cpu_seconds_total
    node-exporter
    整体总内存与整体平均内存使用率
    sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})
    node_memory_MemTotal_bytes
    node-exporter
    sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"})
    node_memory_MemTotal_bytes
    node-exporter
    node_memory_MemAvailable_bytes
    node-exporter
    (sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"}) / sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"}))*100
    node_memory_MemTotal_bytes
    node-exporter
    node_memory_MemAvailable_bytes
    node-exporter
    整体总磁盘与整体平均磁盘使用率
    sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))
    node_filesystem_size_bytes
    node-exporter
    sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_free_bytes
    node-exporter
    (sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{job=~"$job",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))))
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_free_bytes
    node-exporter
    node_filesystem_avail_bytes
    node-exporter
    运行时间
    avg(time() - node_boot_time_seconds{instance=~"$node",cluster=~"$cluster"}) 75
    node_boot_time_seconds
    node-exporter
    CPU 核数
    count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'})
    node_cpu_seconds_total
    node-exporter
    总内存
    sum(node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})
    node_memory_MemTotal_bytes
    node-exporter
    总 CPU 使用率
    100 - (avg(irate(node_cpu_seconds_total{instance=~"$node",mode="idle",cluster=~"$cluster"}[5m])) * 100)
    node_cpu_seconds_total
    node-exporter
    内存使用率
    (1 - (node_memory_MemAvailable_bytes{instance=~"$node",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{instance=~"$node",cluster=~"$cluster"})))* 100
    node_memory_MemAvailable_bytes
    node-exporter
    最大分区使用率
    (node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"})*100 /(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}))
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_free_bytes
    node-exporter
    node_filesystem_avail_bytes
    node-exporter
    CPU iowait
    avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) * 100
    node_cpu_seconds_total
    node-exporter
    各分区可用空间
    node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0
    node_filesystem_avail_bytes
    node-exporter
    (node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}))
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_free_bytes
    node-exporter
    node_filesystem_avail_bytes
    node-exporter
    CPU 使用率
    avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="system"}[5m])) by (instance) *100
    node_cpu_seconds_total
    node-exporter
    avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="user"}[5m])) by (instance) *100
    node_cpu_seconds_total
    node-exporter
    avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) by (instance) *100
    node_cpu_seconds_total
    node-exporter
    (1 - avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="idle"}[5m])) by (instance))*100
    node_cpu_seconds_total
    node-exporter
    内存信息
    node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"}
    node_memory_MemTotal_bytes
    node-exporter
    node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"} - node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"}
    node_memory_MemTotal_bytes
    node-exporter
    node_memory_MemAvailable_bytes
    node-exporter
    node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"}
    node_memory_MemAvailable_bytes
    node-exporter
    (1 - (node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"} / (node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})))* 100
    node_memory_MemAvailable_bytes
    node-exporter
    node_memory_MemTotal_bytes
    node-exporter
    每秒网络带宽使用
    irate(node_network_receive_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8
    node_network_receive_bytes_total
    node-exporter
    irate(node_network_transmit_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8
    node_network_transmit_bytes_total
    node-exporter
    系统平均负载
    node_load1{cluster=~"$cluster",instance=~"$node"}
    node_load1
    node-exporter
    node_load5{cluster=~"$cluster",instance=~"$node"}
    node_load5
    node-exporter
    node_load15{cluster=~"$cluster",instance=~"$node"}
    node_load15
    node-exporter
    sum(count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'}) by (cpu,instance)) by(instance)
    node_cpu_seconds_total
    node-exporter
    每秒磁盘读写容量
    irate(node_disk_read_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_read_bytes_total
    node-exporter
    irate(node_disk_written_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_written_bytes_total
    node-exporter
    磁盘使用率
    (node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}))
    node_filesystem_size_bytes
    node-exporter
    node_filesystem_free_bytes
    node-exporter
    node_filesystem_avail_bytes
    node-exporter
    node_filesystem_files_free{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"} / node_filesystem_files{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"}
    node_filesystem_files_free
    node-exporter
    磁盘读写速率(IOPS)
    irate(node_disk_reads_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_reads_completed_total
    node-exporter
    irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_writes_completed_total
    node-exporter
    node_disk_io_now{cluster=~"$cluster",instance=~"$node"}
    node_disk_io_now
    node-exporter
    每1秒内 I/O 操作耗时占比
    irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_io_time_seconds_total
    node-exporter
    每次 IO 读写的耗时
    irate(node_disk_read_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_reads_completed_total{instance=~"$node"}[5m])
    node_disk_read_time_seconds_total
    node-exporter
    node_disk_reads_completed_total
    node-exporter
    irate(node_disk_write_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_write_time_seconds_total
    node-exporter
    node_disk_writes_completed_total
    node-exporter
    irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_io_time_seconds_total
    node-exporter
    irate(node_disk_io_time_weighted_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_disk_io_time_weighted_seconds_total
    node-exporter
    网络 Socket 连接信息
    node_netstat_Tcp_CurrEstab{cluster=~"$cluster",instance=~'$node'}
    node_netstat_Tcp_CurrEstab
    node-exporter
    node_sockstat_TCP_tw{cluster=~"$cluster",instance=~'$node'}
    node_sockstat_TCP_tw
    node-exporter
    node_sockstat_sockets_used{cluster=~"$cluster",instance=~'$node'}
    node_sockstat_sockets_used
    node-exporter
    node_sockstat_UDP_inuse{cluster=~"$cluster",instance=~'$node'}
    node_sockstat_UDP_inuse
    node-exporter
    node_sockstat_TCP_alloc{cluster=~"$cluster",instance=~'$node'}
    node_sockstat_TCP_alloc
    node-exporter
    irate(node_netstat_Tcp_PassiveOpens{cluster=~"$cluster",instance=~'$node'}[5m])
    node_netstat_Tcp_PassiveOpens
    node-exporter
    irate(node_netstat_Tcp_ActiveOpens{cluster=~"$cluster",instance=~'$node'}[5m])
    node_netstat_Tcp_ActiveOpens
    node-exporter
    irate(node_netstat_Tcp_InSegs{cluster=~"$cluster",instance=~'$node'}[5m])
    node_netstat_Tcp_InSegs
    node-exporter
    irate(node_netstat_Tcp_OutSegs{cluster=~"$cluster",instance=~'$node'}[5m])
    node_netstat_Tcp_OutSegs
    node-exporter
    irate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster",instance=~'$node'}[5m])
    node_netstat_Tcp_RetransSegs
    node-exporter
    打开的文件描述符(左 )/每秒上下文切换次数(右)
    node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}
    node_filefd_allocated
    node-exporter
    irate(node_context_switches_total{cluster=~"$cluster",instance=~"$node"}[5m])
    node_context_switches_total
    node-exporter
    (node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}/node_filefd_maximum{cluster=~"$cluster",instance=~"$node"}) *100
    node_filefd_allocated
    node-exporter
    node_filefd_maximum
    node-exporter

    节点 Pod 监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Pods
    count(kube_pod_info{node=~"$node"})
    kube_pod_info
    kube-state-metrics
    Pod Request Memory
    sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"})by(node)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Pod Request CPU Cores
    sum(kube_pod_container_resource_requests_cpu_cores{node=~"$node"})by(node)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU Usage
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    CPU Quota
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    Memory Usage
    sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node", container!="", container!="POD"}) by (pod)
    node_namespace_pod_container:container_memory_working_set_bytes
    预聚合指标
    Memory Quota
    sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod)
    node_namespace_pod_container:container_memory_working_set_bytes
    预聚合指标
    sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"}) by (pod)
    node_namespace_pod_container:container_memory_working_set_bytes
    预聚合指标
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~"$node"}) by (pod)
    node_namespace_pod_container:container_memory_working_set_bytes
    预聚合指标
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Pod List
    group (kube_pod_info{host_ip="$node"})by(created_by_kind, created_by_name,host_network,pod_ip,pod,priority_class,namespace)
    kube_pod_info
    kube-state-metrics
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(kube_pod_status_phase{}==1) by (pod, phase)
    kube_pod_info
    kube-state-metrics
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(container_memory_working_set_bytes) by (pod)
    kube_pod_info
    kube-state-metrics
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(rate(container_cpu_usage_seconds_total{image!=""}[5m])) by (pod)
    kube_pod_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(time()-kube_pod_start_time) by (pod)
    kube_pod_info
    kube-state-metrics
    kube_pod_start_time
    kube-state-metrics
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) max(kube_pod_status_ready{condition="true"}) by (pod) or on() vector(0)
    kube_pod_info
    kube-state-metrics
    kube_pod_status_ready
    kube-state-metrics
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{image!=""}[5m])) by (pod) or on() vector(0)
    kube_pod_info
    kube-state-metrics
    container_network_receive_bytes_total
    cadvisor
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{image!=""}[5m])) by (pod) or on() vector(0)
    kube_pod_info
    kube-state-metrics
    container_network_transmit_bytes_total
    cadvisor
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0)
    kube_pod_info
    kube-state-metrics
    container_fs_reads_bytes_total
    cadvisor
    min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0)
    kube_pod_info
    kube-state-metrics
    container_fs_writes_bytes_total
    cadvisor

    工作负载监控概览

    图表名称
    查询语句
    使用的指标
    配置文件
    CPU Usage
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})
    kube_resourcequota
    kube-state-metrics
    scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"})
    kube_resourcequota
    kube-state-metrics
    CPU Quota
    count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Usage
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})
    kube_resourcequota
    kube-state-metrics
    scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})
    kube_resourcequota
    kube-state-metrics
    Memory Quota
    count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标 Deployment

    Deployment

    图表名称
    查询语句
    使用的指标
    配置文件
    Age
    time() - max(kube_deployment_created{cluster="$cluster",namespace="$namespace",deployment="$workload"})
    kube_deployment_created
    kube-state-metrics
    Replicas(Pods)-Request
    max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"})
    kube_deployment_spec_replicas
    kube-state-metrics
    Replicas(Pods)-Ready
    max(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"})
    kube_deployment_status_replicas_ready
    kube-state-metrics
    Replica Trend
    max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_deployment_spec_replicas
    kube-state-metrics
    max(kube_deployment_status_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_deployment_status_replicas
    kube-state-metrics
    min(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_deployment_status_replicas_ready
    kube-state-metrics
    min(kube_deployment_status_replicas_available{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_deployment_status_replicas_available
    kube-state-metrics
    min(kube_deployment_status_replicas_updated{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_deployment_status_replicas_updated
    kube-state-metrics
    min(kube_deployment_status_replicas_unavailable{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_deployment_status_replicas_unavailable
    kube-state-metrics
    CPU Usage
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    CPU Quota
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    CPU Limit-Total
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    CPU Request-Total
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU Info
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    CPU Usage/Limit (%)
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    CPU Usage/Request(%)
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU User Time(%)
    avg(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container))) by (pod,container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    container_cpu_usage_seconds_total
    cadvisor
    container_cpu_usage_seconds_total
    cadvisor
    Memory Usage
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Quota
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Limit-Total
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_spec_memory_limit_bytes
    cadvisor
    Memory Request-Total
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Memory Info
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Memory Usage/Limit(%)
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Memory Usage/Request(%)
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Sockets
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_sockets
    cadvisor
    Network In
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_network_receive_bytes_total
    cadvisor
    Network Out
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_network_transmit_bytes_total
    cadvisor
    Network Errors
    sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod)))
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_network_receive_errors_total
    cadvisor
    container_network_transmit_errors_total
    cadvisor
    Network IO
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_network_receive_bytes_total
    cadvisor
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_network_transmit_bytes_total
    cadvisor
    File System Read
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_fs_reads_bytes_total
    cadvisor
    File System Write
    label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container)
    kube_pod_info
    kube-state-metrics
    kube_replicaset_owner
    kube-state-metrics
    container_fs_writes_bytes_total
    cadvisor

    StatefulSet

    图表名称
    查询语句
    使用的指标
    配置文件
    Generation
    max(kube_statefulset_metadata_generation{cluster="$cluster",namespace="$namespace", statefulset="$workload"})
    kube_statefulset_metadata_generation
    kube-state-metrics
    Replicas(Pods)-Request
    max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"})
    kube_statefulset_replicas
    kube-state-metrics
    Replicas(Pods)-Ready
    max(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"})
    kube_statefulset_status_replicas_ready
    kube-state-metrics
    Age
    time() - max(kube_statefulset_created{cluster="$cluster",namespace="$namespace",statefulset="$workload"})
    kube_statefulset_created
    kube-state-metrics
    Replica Trend
    max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_statefulset_replicas
    kube-state-metrics
    max(kube_statefulset_status_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_statefulset_status_replicas
    kube-state-metrics
    min(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_statefulset_status_replicas_ready
    kube-state-metrics
    min(kube_statefulset_status_replicas_available{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_statefulset_status_replicas_available
    kube-state-metrics
    min(kube_statefulset_status_replicas_updated{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
    kube_statefulset_status_replicas_updated
    kube-state-metrics
    CPU Usage
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    CPU Quota
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    CPU Limit-Total
    sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    CPU Request-Total
    sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU Info
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image)
    kube_pod_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image)
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image)
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU Usage/Limit (%)
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    CPU Usage/Request(%)
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    container_cpu_usage_seconds_total
    cadvisor
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    CPU User Time(%)
    avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container,image))) by (pod,container,image)
    kube_pod_info
    kube-state-metrics
    container_cpu_user_seconds_total
    cadvisor
    container_cpu_user_seconds_total
    cadvisor
    container_cpu_system_seconds_total
    cadvisor
    Memory Usage
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Quota
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Limit-Total
    sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod))
    kube_pod_info
    kube-state-metrics
    container_spec_memory_limit_bytes
    cadvisor
    Memory Request-Total
    sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod))
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Memory Info
    avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod, image) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"}))by (container, pod, image)
    kube_pod_info
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod, image) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container)
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container)
    kube_pod_info
    kube-state-metrics
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Memory Usage/Limit(%)
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Memory Usage/Request(%)
    group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
    kube_pod_info
    kube-state-metrics
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Sockets
    sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
    kube_pod_info
    kube-state-metrics
    container_sockets
    cadvisor
    Network In
    sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
    kube_pod_info
    kube-state-metrics
    container_network_receive_bytes_total
    cadvisor
    Network Out
    sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
    kube_pod_info
    kube-state-metrics
    container_network_transmit_bytes_total
    cadvisor
    Network Errors
    sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod)))
    kube_pod_info
    kube-state-metrics
    container_network_receive_errors_total
    cadvisor
    container_network_transmit_errors_total
    cadvisor
    Network IO
    sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
    kube_pod_info
    kube-state-metrics
    container_network_receive_bytes_total
    cadvisor
    -sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
    kube_pod_info
    kube-state-metrics
    container_network_transmit_bytes_total
    cadvisor

    DaemonSet

    图表名称
    查询语句
    使用的指标
    配置文件
    CPU Usage
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    CPU Quota
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Usage
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Memory Quota
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum(
    kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    container_memory_working_set_bytes
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum(
    kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标

    集群 Pod 监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Age
    time() - max(kube_pod_created{pod=~"$pod",cluster="$cluster",namespace="$namespace"})
    kube_pod_created
    kube-state-metrics
    Restart Count-Last 1 Hour
    ceil(sum (increase(kube_pod_container_status_restarts_total{pod=~"$pod",cluster="$cluster",namespace="$namespace"}[1h])))
    kube_pod_container_status_restarts_total
    kube-state-metrics
    Requests-CPU
    sum(kube_pod_container_resource_requests_cpu_cores{pod=~"$pod"}) or vector(0)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    Requests-Memory
    sum(kube_pod_container_resource_requests_memory_bytes{pod=~"$pod"}) or vector(0)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    Limits-CPU
    sum(kube_pod_container_resource_limits_cpu_cores{pod=~"$pod"}) or vector(0)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    Limits-Memory
    sum(kube_pod_container_resource_limits_memory_bytes{pod=~"$pod"}) or vector(0)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Containers
    group by (image, container,pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_info
    kube-state-metrics
    sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_status_running
    kube-state-metrics
    sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_resource_limits
    kube-state-metrics
    sum by (container,pod)(kube_pod_container_resource_limits{resource="memory",cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_resource_limits
    kube-state-metrics
    max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
    kube_pod_container_status_restarts_total
    kube-state-metrics
    CPU Usage (%)
    max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (container,namespace,pod) / max(container_spec_cpu_quota{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000) by (container,namespace,pod) or on() vector(0)
    container_cpu_usage_seconds_total
    cadvisor
    container_spec_cpu_quota
    cadvisor
    CPU Usage By Cores
    max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (pod,container,namespace)or on() vector(0)
    container_cpu_usage_seconds_total
    cadvisor
    CPU Load (10s)
    max(container_cpu_load_average_10s{namespace=~"$namespace", pod=~"$pod", container!="", container!="POD"} / 1000)by(pod,container)
    container_cpu_load_average_10s
    cadvisor
    CPU Throttled Percent
    max (rate (container_cpu_cfs_throttled_seconds_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) / max (rate (container_cpu_cfs_periods_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) or on() vector(0)
    container_cpu_cfs_throttled_seconds_total
    cadvisor
    container_cpu_cfs_periods_total
    cadvisor
    CPU Quota
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_requests_cpu_cores
    kube-state-metrics
    sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
    预聚合指标
    kube_pod_container_resource_limits_cpu_cores
    kube-state-metrics
    Memory Usage (WSS)
    max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
    container_memory_working_set_bytes
    cadvisor
    Memory Usage
    max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
    container_memory_usage_bytes
    cadvisor
    Memory Usage (RSS)
    max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) or on() vector(0)
    container_memory_rss
    cadvisor
    Memory Cache
    max(container_memory_cache{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
    container_memory_cache
    cadvisor
    Usage WSS/Limit (%)
    (max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
    container_memory_working_set_bytes
    cadvisor
    container_spec_memory_limit_bytes
    cadvisor
    Usage/Limit (%)
    (max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
    container_memory_usage_bytes
    cadvisor
    container_spec_memory_limit_bytes
    cadvisor
    Usage RSS/Limit (%)
    (max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ sum(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
    container_memory_rss
    cadvisor
    container_spec_memory_limit_bytes
    cadvisor
    Memory Failcnt
    max (increase(container_memory_failcnt{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (pod,container)
    container_memory_failcnt
    cadvisor
    Memory Quota
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)
    container_memory_working_set_bytes
    cadvisor
    sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_requests_memory_bytes
    kube-state-metrics
    sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)
    container_memory_working_set_bytes
    cadvisor
    kube_pod_container_resource_limits_memory_bytes
    kube-state-metrics
    Network Input
    max (rate (container_network_receive_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m])) by(pod)
    container_network_receive_bytes_total
    cadvisor
    Network Output
    max (rate (container_network_transmit_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m]))by(pod)
    container_network_transmit_bytes_total
    cadvisor
    Network Input Error (%)
    max (increase (container_network_receive_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
    container_network_receive_packets_dropped_total
    cadvisor
    container_network_receive_packets_total
    cadvisor
    max (increase (container_network_receive_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
    container_network_receive_errors_total
    cadvisor
    container_network_receive_packets_total
    cadvisor
    Network Output Error (%)
    max (increase (container_network_transmit_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_transmit_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
    container_network_transmit_packets_dropped_total
    cadvisor
    container_network_transmit_packets_total
    cadvisor
    max (increase (container_network_transmit_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
    container_network_transmit_errors_total
    cadvisor
    container_network_receive_packets_total
    cadvisor
    File System Read
    max (rate(container_fs_reads_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m]))by (container,pod)
    container_fs_reads_bytes_total
    cadvisor
    File System Write
    max (rate(container_fs_writes_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (container,pod)
    container_fs_writes_bytes_total
    cadvisor
    Network Socket
    max(container_sockets{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod)
    container_sockets
    cadvisor
    Process Number
    count(container_processes{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod)
    container_processes
    cadvisor

    集群网络监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Current Rate of Bytes Received
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_bytes_total
    cadvisor
    Current Rate of Bytes Transmitted
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_bytes_total
    cadvisor
    Current Status
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_bytes_total
    cadvisor
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_bytes_total
    cadvisor
    sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_bytes_total
    cadvisor
    sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_bytes_total
    cadvisor
    sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_packets_total
    cadvisor
    sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_packets_total
    cadvisor
    sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_packets_total
    cadvisor
    sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_packets_dropped_total
    cadvisor
    Average Rate of Bytes Received
    sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_bytes_total
    cadvisor
    Average Rate of Bytes Transmitted
    sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_bytes_total
    cadvisor
    Receive Bandwidth
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_bytes_total
    cadvisor
    Transmit Bandwidth
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_bytes_total
    cadvisor
    Rate of Received Packets
    sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_packets_total
    cadvisor
    Rate of Transmitted Packets
    sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_packets_total
    cadvisor
    Rate of Received Packets Dropped
    sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_receive_packets_dropped_total
    cadvisor
    Rate of Transmitted Packets Dropped
    sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
    container_network_transmit_packets_dropped_total
    cadvisor
    Rate of TCP Retransmits out of all sent segments
    sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[5m]) / rate(node_netstat_Tcp_OutSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance))
    node_netstat_Tcp_RetransSegs
    node-exporter
    Rate of TCP SYN Retransmits out of all retransmits
    sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=~"$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance))
    node_netstat_TcpExt_TCPSynRetrans
    node-exporter
    node_netstat_Tcp_RetransSegs
    node-exporter

    命名空间 Pods 网络监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Current Rate of Bytes Received
    sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]))
    container_network_receive_bytes_total
    cadvisor
    Current Rate of Bytes Transmitted
    sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[5m]))
    container_network_transmit_bytes_total
    cadvisor
    Current Status
    sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_receive_bytes_total
    cadvisor
    sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_transmit_bytes_total
    cadvisor
    sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_receive_packets_total
    cadvisor
    sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_transmit_packets_total
    cadvisor
    sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_receive_packets_dropped_total
    cadvisor
    sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_transmit_packets_dropped_total
    cadvisor
    Receive Bandwidth
    sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_receive_bytes_total
    cadvisor
    Transmit Bandwidth
    sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_transmit_bytes_total
    cadvisor
    Rate of Received Packets
    sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_receive_packets_total
    cadvisor
    Rate of Transmitted Packets
    sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_transmit_packets_total
    cadvisor
    Rate of Received Packets Dropped
    sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
    container_network_receive_packets_dropped_total
    cadvisor
    Rate of Transmitted Packets Dropped
    sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster", namespace=~"$namespace"}[5m])) by (pod)
    container_network_transmit_packets_dropped_total
    cadvisor

    命名空间工作负载网络监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Current Rate of Bytes Received
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Current Rate of Bytes Transmitted
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Current Status
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_packets_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_packets_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_packets_dropped_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_packets_dropped_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Average Rate of Bytes Received
    sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Average Rate of Bytes Transmitted
    sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Receive Bandwidth
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Transmit Bandwidth
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Rate of Received Packets
    sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_packets_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Rate of Transmitted Packets
    sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_packets_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Rate of Received Packets Dropped
    sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_receive_packets_dropped_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Rate of Transmitted Packets Dropped
    sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
    container_network_transmit_packets_dropped_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标

    Pod 网络监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Current Rate of Bytes Received
    sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m]))
    container_network_receive_bytes_total
    cadvisor
    Current Rate of Bytes Transmitted
    sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m]))
    container_network_transmit_bytes_total
    cadvisor
    Receive Bandwidth
    sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
    container_network_receive_bytes_total
    cadvisor
    Transmit Bandwidth
    sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
    container_network_transmit_bytes_total
    cadvisor
    Rate of Received Packets
    sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
    container_network_receive_packets_total
    cadvisor
    Rate of Transmitted Packets
    sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
    container_network_transmit_packets_total
    cadvisor
    Rate of Received Packets Dropped
    sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
    container_network_receive_packets_dropped_total
    cadvisor
    Rate of Transmitted Packets Dropped
    sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
    container_network_transmit_packets_dropped_total
    cadvisor

    工作负载网络监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Current Rate of Bytes Received
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
    * on (namespace,pod)
    group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_transmit_bytes_total
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    cadvisor
    Current Rate of Bytes Transmitted
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Average Rate of Bytes Received
    sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Average Rate of Bytes Transmitted
    sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Receive Bandwidth
    sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_receive_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Transmit Bandwidth
    sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_transmit_bytes_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    
    
    Rate of Received Packets
    sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_receive_packets_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Rate of Transmitted Packets
    sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_transmit_packets_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标
    Rate of Received Packets Dropped
    sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
    * on (namespace,pod)
    group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_receive_packets_dropped_total
    预聚合指标
    namespace_workload_pod:kube_pod_owner:relabel
    cadvisor
    Rate of Transmitted Packets Dropped
    sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
    * on (namespace,pod)
    group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
    container_network_transmit_packets_dropped_total
    cadvisor
    namespace_workload_pod:kube_pod_owner:relabel
    预聚合指标

    PVC 存储监控

    图表名称
    查询语句
    使用的指标
    配置文件
    Volume Space Usage
    ( sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) - sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) )
    kubelet_volume_stats_capacity_bytes
    kubelet
    kubelet_volume_stats_available_bytes
    kubelet
    sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
    kubelet_volume_stats_available_bytes
    kubelet
    Volume Space Usage
    ( kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} - kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} ) / kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} * 100
    kubelet_volume_stats_capacity_bytes
    kubelet
    kubelet_volume_stats_available_bytes
    kubelet
    kubelet_volume_stats_capacity_bytes
    kubelet
    Volume inodes Usage
    sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
    kubelet_volume_stats_inodes_used
    kubelet
    ( sum without(instance, node) (kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) - sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) )
    kubelet_volume_stats_inodes
    kubelet
    kubelet_volume_stats_inodes_used
    kubelet
    Volume inodes Usage
    kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} / kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} * 100
    kubelet_volume_stats_inodes_used
    kubelet
    kubelet_volume_stats_inodes
    kubelet
    
    联系我们

    联系我们,为您的业务提供专属服务。

    技术支持

    如果你想寻求进一步的帮助,通过工单与我们进行联络。我们提供7x24的工单服务。

    7x24 电话支持