mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
hung_task: add hung_task_sys_info sysctl to dump sys info on task-hung
When task-hung happens, developers may need different kinds of system information (call-stacks, memory info, locks, etc.) to help debugging. Add 'hung_task_sys_info' sysctl knob to take human readable string like "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all requested information will be dumped. (refer kernel/sys_info.c for more details). Meanwhile, the newly introduced sys_info() call is used to unify some existing info-dumping knobs. [feng.tang@linux.alibaba.com: maintain consistecy established behavior, per Lance and Petr] Link: https://lkml.kernel.org/r/aRncJo1mA5Zk77Hr@U-2FWC9VHC-2323.local Link: https://lkml.kernel.org/r/20251113111039.22701-3-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com> Suggested-by: Petr Mladek <pmladek@suse.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Reviewed-by: Lance Yang <lance.yang@linux.dev> Cc: Jonathan Corbet <corbet@lwn.net> Cc: "Paul E . McKenney" <paulmck@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
@@ -422,6 +422,11 @@ the system boot.
|
||||
|
||||
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
|
||||
|
||||
hung_task_sys_info
|
||||
==================
|
||||
A comma separated list of extra system information to be dumped when
|
||||
hung task is detected, for example, "tasks,mem,timers,locks,...".
|
||||
Refer 'panic_sys_info' section below for more details.
|
||||
|
||||
hung_task_timeout_secs
|
||||
======================
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/hung_task.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sys_info.h>
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
@@ -59,12 +60,17 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
|
||||
static int __read_mostly sysctl_hung_task_warnings = 10;
|
||||
|
||||
static int __read_mostly did_panic;
|
||||
static bool hung_task_show_lock;
|
||||
static bool hung_task_call_panic;
|
||||
static bool hung_task_show_all_bt;
|
||||
|
||||
static struct task_struct *watchdog_task;
|
||||
|
||||
/*
|
||||
* A bitmask to control what kinds of system info to be printed when
|
||||
* a hung task is detected, it could be task, memory, lock etc. Refer
|
||||
* include/linux/sys_info.h for detailed bit definition.
|
||||
*/
|
||||
static unsigned long hung_task_si_mask;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Should we dump all CPUs backtraces in a hung task event?
|
||||
@@ -236,7 +242,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
|
||||
|
||||
if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
|
||||
console_verbose();
|
||||
hung_task_show_lock = true;
|
||||
hung_task_call_panic = true;
|
||||
}
|
||||
|
||||
@@ -259,10 +264,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
|
||||
" disables this message.\n");
|
||||
sched_show_task(t);
|
||||
debug_show_blocker(t, timeout);
|
||||
hung_task_show_lock = true;
|
||||
|
||||
if (sysctl_hung_task_all_cpu_backtrace)
|
||||
hung_task_show_all_bt = true;
|
||||
if (!sysctl_hung_task_warnings)
|
||||
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
|
||||
}
|
||||
@@ -304,6 +306,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
unsigned long last_break = jiffies;
|
||||
struct task_struct *g, *t;
|
||||
unsigned long prev_detect_count = sysctl_hung_task_detect_count;
|
||||
int need_warning = sysctl_hung_task_warnings;
|
||||
unsigned long si_mask = hung_task_si_mask;
|
||||
|
||||
/*
|
||||
* If the system crashed already then all bets are off,
|
||||
@@ -312,7 +316,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
if (test_taint(TAINT_DIE) || did_panic)
|
||||
return;
|
||||
|
||||
hung_task_show_lock = false;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_process_thread(g, t) {
|
||||
|
||||
@@ -328,14 +332,19 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
}
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
if (hung_task_show_lock)
|
||||
debug_show_all_locks();
|
||||
|
||||
if (hung_task_show_all_bt) {
|
||||
hung_task_show_all_bt = false;
|
||||
trigger_all_cpu_backtrace();
|
||||
if (!(sysctl_hung_task_detect_count - prev_detect_count))
|
||||
return;
|
||||
|
||||
if (need_warning || hung_task_call_panic) {
|
||||
si_mask |= SYS_INFO_LOCKS;
|
||||
|
||||
if (sysctl_hung_task_all_cpu_backtrace)
|
||||
si_mask |= SYS_INFO_ALL_BT;
|
||||
}
|
||||
|
||||
sys_info(si_mask);
|
||||
|
||||
if (hung_task_call_panic)
|
||||
panic("hung_task: blocked tasks");
|
||||
}
|
||||
@@ -434,6 +443,13 @@ static const struct ctl_table hung_task_sysctls[] = {
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "hung_task_sys_info",
|
||||
.data = &hung_task_si_mask,
|
||||
.maxlen = sizeof(hung_task_si_mask),
|
||||
.mode = 0644,
|
||||
.proc_handler = sysctl_sys_info_handler,
|
||||
},
|
||||
};
|
||||
|
||||
static void __init hung_task_sysctl_init(void)
|
||||
|
||||
Reference in New Issue
Block a user