可直接复制以下命令到容器内shell中执行:
cat << "EOF" > /tmp/mem-monitor.sh
#!/bin/bash
DURATION=${1:-60}
INTERVAL=${2:-5}
# 自动检测容器 CPU 核心数
if [ -f /sys/fs/cgroup/cpu/cpu.cfs_quota_us ] && [ -f /sys/fs/cgroup/cpu/cpu.cfs_period_us ]; thenQUOTA=$(cat /sys/fs/cgroup/cpu/cpu.cfs_quota_us)PERIOD=$(cat /sys/fs/cgroup/cpu/cpu.cfs_period_us)if [ "$QUOTA" -gt 0 ]; thenCPU_CORES=$(awk "BEGIN {printf \"%.0f\", $QUOTA/$PERIOD}")elseCPU_CORES=$(nproc 2>/dev/null || echo 1)fi
elseCPU_CORES=$(nproc 2>/dev/null || echo 1)
fi
# 获取容器 CPU 使用时间(纳秒)
get_cpu_usage() {if [ -f /sys/fs/cgroup/cpuacct/cpuacct.usage ]; thencat /sys/fs/cgroup/cpuacct/cpuacct.usageelif [ -f /sys/fs/cgroup/cpu/cpuacct.usage ]; thencat /sys/fs/cgroup/cpu/cpuacct.usageelseecho "0"fi
}
echo "=========================================="
echo " 进程内存 & CPU 监控脚本"
echo "=========================================="
echo "监控时长: ${DURATION}秒 | 采集间隔: ${INTERVAL}秒"
echo "容器 CPU: ${CPU_CORES} 核 (100% = 用满配额)"
echo "开始时间: $(date +"%Y-%m-%d %H:%M:%S")"
echo "=========================================="
echo ""
echo ">>> 请在监控期间触发你的操作 <<<"
echo ""
TMP_FILE="/tmp/mem_monitor_$$.csv"
echo "timestamp,pid,rss_kb,cpu_raw,cmd" > "$TMP_FILE"
if [ -f /sys/fs/cgroup/memory/memory.usage_in_bytes ]; thencontainer_usage=$(cat /sys/fs/cgroup/memory/memory.usage_in_bytes)echo "容器内存基线: $((container_usage/1024/1024))MB"
fi
# 初始化 CPU 基线
PREV_CPU_USAGE=$(get_cpu_usage)
PREV_TIME=$(date +%s%N)
to_mb() {awk "BEGIN {printf \"%.1f\", $1/1024}"
}
# 计算容器总 CPU 占用率
calc_container_cpu() {local current_usage=$1local current_time=$2local usage_diff=$((current_usage - PREV_CPU_USAGE))local time_diff=$((current_time - PREV_TIME))if [ $time_diff -gt 0 ]; then# CPU% = (usage_diff / time_diff) / CPU_CORES * 100awk "BEGIN {printf \"%.1f\", ($usage_diff / $time_diff) / $CPU_CORES * 100}"elseecho "0.0"fi
}
show_snapshot() {local timestamp=$1local label=$2# 计算容器总 CPUlocal current_cpu_usage=$(get_cpu_usage)local current_time=$(date +%s%N)local container_cpu=$(calc_container_cpu $current_cpu_usage $current_time)# 更新基线PREV_CPU_USAGE=$current_cpu_usagePREV_TIME=$current_timeecho "[$timestamp] $label"echo "--------------------------------------------------------------------"if [ -f /sys/fs/cgroup/memory/memory.usage_in_bytes ]; thenlocal usage=$(cat /sys/fs/cgroup/memory/memory.usage_in_bytes)echo " 容器内存: $((usage/1024/1024))MB | 容器总CPU: ${container_cpu}%"elseecho " 容器总CPU: ${container_cpu}%"fiprintf " %-8s %10s %12s %s\n" "PID" "RSS(MB)" "CPU(实际%)" "命令"printf " %-8s %10s %12s %s\n" "---" "-------" "----------" "----"ps -eo pid,rss,%cpu,args --sort=-%cpu | grep -iE "java|node" | grep -v "grep" | head -20 | while IFS= read -r line; dopid=$(echo "$line" | awk "{print \$1}")rss=$(echo "$line" | awk "{print \$2}")cpu_raw=$(echo "$line" | awk "{print \$3}")cmd=$(echo "$line" | awk "{for(i=4;i<=NF;i++) printf \"%s \", \$i}" | cut -c1-55)[ "$rss" = "0" ] && continuecpu_real=$(awk "BEGIN {printf \"%.1f\", $cpu_raw/$CPU_CORES}")cpu_mark=""awk "BEGIN {exit ($cpu_real > 80) ? 0 : 1}" && cpu_mark="🔴"awk "BEGIN {exit ($cpu_real > 50 && $cpu_real <= 80) ? 0 : 1}" && cpu_mark="🟡"printf " %-8s %10s %10s%% %s %s\n" "$pid" "$(to_mb $rss)" "$cpu_real" "$cpu_mark" "$cmd"csv_cmd=$(echo "$cmd" | tr "," ";")echo "$timestamp,$pid,$rss,$cpu_raw,$csv_cmd" >> "$TMP_FILE"doneecho ""
}
START_TIME=$(date +%s)
ITERATION=0
show_snapshot "$(date +"%H:%M:%S")" "开始"
while true; doCURRENT_TIME=$(date +%s)ELAPSED=$((CURRENT_TIME - START_TIME))[ $ELAPSED -ge $DURATION ] && breaksleep $INTERVALITERATION=$((ITERATION + 1))show_snapshot "$(date +"%H:%M:%S")" "第 ${ITERATION} 次采集 (${ELAPSED}s)"
done
echo "=========================================="
echo " 变化分析"
echo "=========================================="
echo ""
declare -A first_rss last_rss max_rss cpu_sum cpu_count
while IFS=, read -r timestamp pid rss cpu_raw cmd; do[ "$pid" = "pid" ] && continue[ -z "${first_rss[$pid]}" ] && first_rss[$pid]=$rsslast_rss[$pid]=$rss[ -z "${max_rss[$pid]}" ] || [ $rss -gt ${max_rss[$pid]} ] && max_rss[$pid]=$rsscpu_sum[$pid]=$(awk "BEGIN {print ${cpu_sum[$pid]:-0} + $cpu_raw}")cpu_count[$pid]=$(( ${cpu_count[$pid]:-0} + 1 ))
done < "$TMP_FILE"
printf "%-8s %10s %10s %10s %12s %s\n" "PID" "开始(MB)" "结束(MB)" "最大(MB)" "平均CPU(实际%)" "内存变化"
printf "%-8s %10s %10s %10s %12s %s\n" "---" "--------" "--------" "--------" "------------" "--------"
for key in "${!first_rss[@]}"; dostart=${first_rss[$key]}end=${last_rss[$key]}max=${max_rss[$key]}change=$((end - start))avg_cpu_raw=$(awk "BEGIN {printf \"%.1f\", ${cpu_sum[$key]} / ${cpu_count[$key]}}")avg_cpu_real=$(awk "BEGIN {printf \"%.1f\", $avg_cpu_raw / $CPU_CORES}")if [ $change -gt 10240 ]; then marker="🔴 +$(to_mb $change)"elif [ $change -gt 5120 ]; then marker="🟡 +$(to_mb $change)"elif [ $change -lt -5120 ]; then marker="🟢 $(to_mb $change)"else marker="⚪ $(to_mb $change)"fiprintf "%-8s %10s %10s %10s %12s %sMB\n" "$key" "$(to_mb $start)" "$(to_mb $end)" "$(to_mb $max)" "${avg_cpu_real}%" "$marker"
done
echo ""
if [ -f /sys/fs/cgroup/memory/memory.usage_in_bytes ]; thencontainer_end=$(cat /sys/fs/cgroup/memory/memory.usage_in_bytes)echo "容器内存变化: $(( (container_end - container_usage) / 1024 / 1024 ))MB"
fi
echo ""
echo "========== 结论 =========="
max_increase=0 max_mem_pid=""
max_cpu=0 max_cpu_pid=""
for key in "${!first_rss[@]}"; doincrease=$((last_rss[$key] - first_rss[$key]))[ $increase -gt $max_increase ] && { max_increase=$increase; max_mem_pid=$key; }avg_raw=$(awk "BEGIN {print ${cpu_sum[$key]} / ${cpu_count[$key]}}")avg_real=$(awk "BEGIN {print $avg_raw / $CPU_CORES}")awk "BEGIN {exit ($avg_real > $max_cpu) ? 0 : 1}" && { max_cpu=$avg_real; max_cpu_pid=$key; }
done
echo "📊 内存增长最多: PID $max_mem_pid (+$(to_mb $max_increase)MB)"
echo "📊 CPU 占用最高: PID $max_cpu_pid (${max_cpu}% 容器配额)"
if [ $max_increase -gt 10240 ]; thenecho ""echo "🔴 PID $max_mem_pid 内存增长较大,可能存在泄漏"
fi
awk "BEGIN {exit ($max_cpu > 80) ? 0 : 1}" && {echo "🔴 PID $max_cpu_pid CPU 占用超过 80%,检查是否有死循环或密集计算"
}
awk "BEGIN {exit ($max_cpu > 100) ? 0 : 1}" && {echo "🔴🔴 PID $max_cpu_pid CPU 已超配额,会被 K8s 限流!"
}
echo "=========================================="
echo "监控完成: $(date +"%Y-%m-%d %H:%M:%S")"
echo "=========================================="
EOF
chmod +x /tmp/mem-monitor.sh && /tmp/mem-monitor.sh 60 5