mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge tag 'mm-nonmm-stable-2025-10-02-15-29' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: - "ida: Remove the ida_simple_xxx() API" from Christophe Jaillet completes the removal of this legacy IDR API - "panic: introduce panic status function family" from Jinchao Wang provides a number of cleanups to the panic code and its various helpers, which were rather ad-hoc and scattered all over the place - "tools/delaytop: implement real-time keyboard interaction support" from Fan Yu adds a few nice user-facing usability changes to the delaytop monitoring tool - "efi: Fix EFI boot with kexec handover (KHO)" from Evangelos Petrongonas fixes a panic which was happening with the combination of EFI and KHO - "Squashfs: performance improvement and a sanity check" from Phillip Lougher teaches squashfs's lseek() about SEEK_DATA/SEEK_HOLE. A mere 150x speedup was measured for a well-chosen microbenchmark - plus another 50-odd singleton patches all over the place * tag 'mm-nonmm-stable-2025-10-02-15-29' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (75 commits) Squashfs: reject negative file sizes in squashfs_read_inode() kallsyms: use kmalloc_array() instead of kmalloc() MAINTAINERS: update Sibi Sankar's email address Squashfs: add SEEK_DATA/SEEK_HOLE support Squashfs: add additional inode sanity checking lib/genalloc: fix device leak in of_gen_pool_get() panic: remove CONFIG_PANIC_ON_OOPS_VALUE ocfs2: fix double free in user_cluster_connect() checkpatch: suppress strscpy warnings for userspace tools cramfs: fix incorrect physical page address calculation kernel: prevent prctl(PR_SET_PDEATHSIG) from racing with parent process exit Squashfs: fix uninit-value in squashfs_get_parent kho: only fill kimage if KHO is finalized ocfs2: avoid extra calls to strlen() after ocfs2_sprintf_system_inode_name() kernel/sys.c: fix the racy usage of task_lock(tsk->group_leader) in sys_prlimit64() paths sched/task.h: fix the wrong comment on task_lock() nesting with tasklist_lock coccinelle: platform_no_drv_owner: handle also built-in drivers coccinelle: of_table: handle SPI device ID tables lib/decompress: use designated initializers for struct compress_format efi: support booting with kexec handover (KHO) ...
This commit is contained in:
3
.mailmap
3
.mailmap
@@ -721,7 +721,8 @@ Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
|
||||
Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
|
||||
Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com>
|
||||
Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
|
||||
Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
|
||||
Sibi Sankar <sibi.sankar@oss.qualcomm.com> <sibis@codeaurora.org>
|
||||
Sibi Sankar <sibi.sankar@oss.qualcomm.com> <quic_sibis@quicinc.com>
|
||||
Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
|
||||
Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
|
||||
Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@ffwll.ch>
|
||||
|
||||
@@ -134,47 +134,72 @@ The above command can be used with -v to get more debug information.
|
||||
|
||||
After the system starts, use `delaytop` to get the system-wide delay information,
|
||||
which includes system-wide PSI information and Top-N high-latency tasks.
|
||||
Note: PSI support requires `CONFIG_PSI=y` and `psi=1` for full functionality.
|
||||
|
||||
`delaytop` supports sorting by CPU latency in descending order by default,
|
||||
displays the top 20 high-latency tasks by default, and refreshes the latency
|
||||
data every 2 seconds by default.
|
||||
`delaytop` is an interactive tool for monitoring system pressure and task delays.
|
||||
It supports multiple sorting options, display modes, and real-time keyboard controls.
|
||||
|
||||
Get PSI information and Top-N tasks delay, since system boot::
|
||||
Basic usage with default settings (sorts by CPU delay, shows top 20 tasks, refreshes every 2 seconds)::
|
||||
|
||||
bash# ./delaytop
|
||||
System Pressure Information: (avg10/avg60/avg300/total)
|
||||
CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms)
|
||||
System Pressure Information: (avg10/avg60vg300/total)
|
||||
CPU some: 0.0%/ 0.0%/ 0.0%/ 106137(ms)
|
||||
CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms)
|
||||
IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms)
|
||||
IO full: 0.0%/ 0.0%/ 0.0%/ 2240(ms)
|
||||
IO some: 0.0%/ 0.0%/ 0.0%/ 2783(ms)
|
||||
IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Top 20 processes (sorted by CPU delay):
|
||||
PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)
|
||||
----------------------------------------------------------------------------------------------
|
||||
161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
[o]sort [M]memverbose [q]quit
|
||||
Top 20 processes (sorted by cpu delay):
|
||||
PID TGID COMMAND CPU(ms) IO(ms) IRQ(ms) MEM(ms)
|
||||
------------------------------------------------------------------------
|
||||
110 110 kworker/15:0H-s 27.91 0.00 0.00 0.00
|
||||
57 57 cpuhp/7 3.18 0.00 0.00 0.00
|
||||
99 99 cpuhp/14 2.97 0.00 0.00 0.00
|
||||
51 51 cpuhp/6 0.90 0.00 0.00 0.00
|
||||
44 44 kworker/4:0H-sy 0.80 0.00 0.00 0.00
|
||||
60 60 ksoftirqd/7 0.74 0.00 0.00 0.00
|
||||
76 76 idle_inject/10 0.31 0.00 0.00 0.00
|
||||
100 100 idle_inject/14 0.30 0.00 0.00 0.00
|
||||
1309 1309 systemsettings 0.29 0.00 0.00 0.00
|
||||
45 45 cpuhp/5 0.22 0.00 0.00 0.00
|
||||
63 63 cpuhp/8 0.20 0.00 0.00 0.00
|
||||
87 87 cpuhp/12 0.18 0.00 0.00 0.00
|
||||
93 93 cpuhp/13 0.17 0.00 0.00 0.00
|
||||
1265 1265 acpid 0.17 0.00 0.00 0.00
|
||||
1552 1552 sshd 0.17 0.00 0.00 0.00
|
||||
2584 2584 sddm-helper 0.16 0.00 0.00 0.00
|
||||
1284 1284 rtkit-daemon 0.15 0.00 0.00 0.00
|
||||
1326 1326 nde-netfilter 0.14 0.00 0.00 0.00
|
||||
27 27 cpuhp/2 0.13 0.00 0.00 0.00
|
||||
631 631 kworker/11:2-rc 0.11 0.00 0.00 0.00
|
||||
|
||||
Dynamic interactive interface of delaytop::
|
||||
Interactive keyboard controls during runtime::
|
||||
|
||||
o - Select sort field (CPU, IO, IRQ, Memory, etc.)
|
||||
M - Toggle display mode (Default/Memory Verbose)
|
||||
q - Quit
|
||||
|
||||
Available sort fields(use -s/--sort or interactive command)::
|
||||
|
||||
cpu(c) - CPU delay
|
||||
blkio(i) - I/O delay
|
||||
irq(q) - IRQ delay
|
||||
mem(m) - Total memory delay
|
||||
swapin(s) - Swapin delay (memory verbose mode only)
|
||||
freepages(r) - Freepages reclaim delay (memory verbose mode only)
|
||||
thrashing(t) - Thrashing delay (memory verbose mode only)
|
||||
compact(p) - Compaction delay (memory verbose mode only)
|
||||
wpcopy(w) - Write page copy delay (memory verbose mode only)
|
||||
|
||||
Advanced usage examples::
|
||||
|
||||
# ./delaytop -s blkio
|
||||
Sorted by IO delay
|
||||
|
||||
# ./delaytop -s mem -M
|
||||
Sorted by memory delay in memory verbose mode
|
||||
|
||||
# ./delaytop -p pid
|
||||
Print delayacct stats
|
||||
|
||||
@@ -4603,7 +4603,7 @@
|
||||
bit 2: print timer info
|
||||
bit 3: print locks info if CONFIG_LOCKDEP is on
|
||||
bit 4: print ftrace buffer
|
||||
bit 5: replay all messages on consoles at the end of panic
|
||||
bit 5: replay all kernel messages on consoles at the end of panic
|
||||
bit 6: print all CPUs backtrace (if available in the arch)
|
||||
bit 7: print only tasks in uninterruptible (blocked) state
|
||||
*Be aware* that this option may print a _lot_ of lines,
|
||||
|
||||
@@ -890,7 +890,7 @@ bit 1 print system memory info
|
||||
bit 2 print timer info
|
||||
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
||||
bit 4 print ftrace buffer
|
||||
bit 5 replay all messages on consoles at the end of panic
|
||||
bit 5 replay all kernel messages on consoles at the end of panic
|
||||
bit 6 print all CPUs backtrace (if available in the arch)
|
||||
bit 7 print only tasks in uninterruptible (blocked) state
|
||||
===== ============================================
|
||||
|
||||
@@ -361,7 +361,12 @@ local tasks spawned by the process and the global task that handles USB bus #1:
|
||||
*/
|
||||
sleep(2);
|
||||
|
||||
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
|
||||
/*
|
||||
* The load to the coverage count should be an acquire to pair with
|
||||
* pair with the corresponding write memory barrier (smp_wmb()) on
|
||||
* the kernel-side in kcov_move_area().
|
||||
*/
|
||||
n = __atomic_load_n(&cover[0], __ATOMIC_ACQUIRE);
|
||||
for (i = 0; i < n; i++)
|
||||
printf("0x%lx\n", cover[i + 1]);
|
||||
if (ioctl(fd, KCOV_DISABLE, 0))
|
||||
|
||||
@@ -20987,7 +20987,7 @@ F: Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml
|
||||
F: drivers/pmdomain/qcom/cpr.c
|
||||
|
||||
QUALCOMM CPUCP MAILBOX DRIVER
|
||||
M: Sibi Sankar <quic_sibis@quicinc.com>
|
||||
M: Sibi Sankar <sibi.sankar@oss.qualcomm.com>
|
||||
L: linux-arm-msm@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/mailbox/qcom,cpucp-mbox.yaml
|
||||
|
||||
@@ -165,14 +165,23 @@ static struct crash_mem *fill_up_crash_elf_data(void)
|
||||
/*
|
||||
* Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges
|
||||
* may cause range splits. So add extra slots here.
|
||||
*
|
||||
* Exclusion of low 1M may not cause another range split, because the
|
||||
* range of exclude is [0, 1M] and the condition for splitting a new
|
||||
* region is that the start, end parameters are both in a certain
|
||||
* existing region in cmem and cannot be equal to existing region's
|
||||
* start or end. Obviously, the start of [0, 1M] cannot meet this
|
||||
* condition.
|
||||
*
|
||||
* But in order to lest the low 1M could be changed in the future,
|
||||
* (e.g. [start, 1M]), add a extra slot.
|
||||
*/
|
||||
nr_ranges += 2 + crashk_cma_cnt;
|
||||
nr_ranges += 3 + crashk_cma_cnt;
|
||||
cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
|
||||
if (!cmem)
|
||||
return NULL;
|
||||
|
||||
cmem->max_nr_ranges = nr_ranges;
|
||||
cmem->nr_ranges = 0;
|
||||
|
||||
return cmem;
|
||||
}
|
||||
@@ -323,16 +332,20 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
|
||||
struct crash_mem *cmem;
|
||||
|
||||
/*
|
||||
* Using random kexec_buf for passing dm crypt keys may cause a range
|
||||
* split. So use two slots here.
|
||||
* In the current x86 architecture code, the elfheader is always
|
||||
* allocated at crashk_res.start. But it depends on the allocation
|
||||
* position of elfheader in crashk_res. To avoid potential out of
|
||||
* bounds in future, add an extra slot.
|
||||
*
|
||||
* And using random kexec_buf for passing dm crypt keys may cause a
|
||||
* range split too, add another extra slot here.
|
||||
*/
|
||||
nr_ranges = 2;
|
||||
nr_ranges = 3;
|
||||
cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
|
||||
if (!cmem)
|
||||
return -ENOMEM;
|
||||
|
||||
cmem->max_nr_ranges = nr_ranges;
|
||||
cmem->nr_ranges = 0;
|
||||
|
||||
memset(&cmd, 0, sizeof(struct crash_memmap_data));
|
||||
cmd.params = params;
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
@@ -212,6 +214,28 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
|
||||
}
|
||||
#endif /* CONFIG_EFI */
|
||||
|
||||
#ifdef CONFIG_OF_FLATTREE
|
||||
static void setup_dtb(struct boot_params *params,
|
||||
unsigned long params_load_addr,
|
||||
unsigned int dtb_setup_data_offset)
|
||||
{
|
||||
struct setup_data *sd = (void *)params + dtb_setup_data_offset;
|
||||
unsigned long setup_data_phys, dtb_len;
|
||||
|
||||
dtb_len = fdt_totalsize(initial_boot_params);
|
||||
sd->type = SETUP_DTB;
|
||||
sd->len = dtb_len;
|
||||
|
||||
/* Carry over current boot DTB with setup_data */
|
||||
memcpy(sd->data, initial_boot_params, dtb_len);
|
||||
|
||||
/* Add setup data */
|
||||
setup_data_phys = params_load_addr + dtb_setup_data_offset;
|
||||
sd->next = params->hdr.setup_data;
|
||||
params->hdr.setup_data = setup_data_phys;
|
||||
}
|
||||
#endif /* CONFIG_OF_FLATTREE */
|
||||
|
||||
static void
|
||||
setup_ima_state(const struct kimage *image, struct boot_params *params,
|
||||
unsigned long params_load_addr,
|
||||
@@ -336,6 +360,17 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
|
||||
sizeof(struct efi_setup_data);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_OF_FLATTREE
|
||||
if (image->force_dtb && initial_boot_params) {
|
||||
setup_dtb(params, params_load_addr, setup_data_offset);
|
||||
setup_data_offset += sizeof(struct setup_data) +
|
||||
fdt_totalsize(initial_boot_params);
|
||||
} else {
|
||||
pr_debug("Not carrying over DTB, force_dtb = %d\n",
|
||||
image->force_dtb);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
|
||||
/* Setup IMA log buffer state */
|
||||
setup_ima_state(image, params, params_load_addr,
|
||||
@@ -529,6 +564,12 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
|
||||
sizeof(struct setup_data) +
|
||||
RNG_SEED_LENGTH;
|
||||
|
||||
#ifdef CONFIG_OF_FLATTREE
|
||||
if (image->force_dtb && initial_boot_params)
|
||||
kbuf.bufsz += sizeof(struct setup_data) +
|
||||
fdt_totalsize(initial_boot_params);
|
||||
#endif
|
||||
|
||||
if (IS_ENABLED(CONFIG_IMA_KEXEC))
|
||||
kbuf.bufsz += sizeof(struct setup_data) +
|
||||
sizeof(struct ima_setup_data);
|
||||
@@ -537,7 +578,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
|
||||
kbuf.bufsz += sizeof(struct setup_data) +
|
||||
sizeof(struct kho_data);
|
||||
|
||||
params = kzalloc(kbuf.bufsz, GFP_KERNEL);
|
||||
params = kvzalloc(kbuf.bufsz, GFP_KERNEL);
|
||||
if (!params)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
efi_map_offset = params_cmdline_sz;
|
||||
@@ -647,7 +688,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
|
||||
return ldata;
|
||||
|
||||
out_free_params:
|
||||
kfree(params);
|
||||
kvfree(params);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@@ -659,7 +700,7 @@ static int bzImage64_cleanup(void *loader_data)
|
||||
if (!ldata)
|
||||
return 0;
|
||||
|
||||
kfree(ldata->bootparams_buf);
|
||||
kvfree(ldata->bootparams_buf);
|
||||
ldata->bootparams_buf = NULL;
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/efi.h>
|
||||
#include <linux/fwnode.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kexec_handover.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/of.h>
|
||||
@@ -164,12 +165,32 @@ static __init void reserve_regions(void)
|
||||
pr_info("Processing EFI memory map:\n");
|
||||
|
||||
/*
|
||||
* Discard memblocks discovered so far: if there are any at this
|
||||
* point, they originate from memory nodes in the DT, and UEFI
|
||||
* uses its own memory map instead.
|
||||
* Discard memblocks discovered so far except for KHO scratch
|
||||
* regions. Most memblocks at this point originate from memory nodes
|
||||
* in the DT and UEFI uses its own memory map instead. However, if
|
||||
* KHO is enabled, scratch regions, which are good known memory
|
||||
* must be preserved.
|
||||
*/
|
||||
memblock_dump_all();
|
||||
memblock_remove(0, PHYS_ADDR_MAX);
|
||||
|
||||
if (is_kho_boot()) {
|
||||
struct memblock_region *r;
|
||||
|
||||
/* Remove all non-KHO regions */
|
||||
for_each_mem_region(r) {
|
||||
if (!memblock_is_kho_scratch(r)) {
|
||||
memblock_remove(r->base, r->size);
|
||||
r--;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* KHO is disabled. Discard memblocks discovered so far:
|
||||
* if there are any at this point, they originate from memory
|
||||
* nodes in the DT, and UEFI uses its own memory map instead.
|
||||
*/
|
||||
memblock_remove(0, PHYS_ADDR_MAX);
|
||||
}
|
||||
|
||||
for_each_efi_memory_desc(md) {
|
||||
paddr = md->phys_addr;
|
||||
|
||||
@@ -279,14 +279,7 @@ static int fbcon_get_rotate(struct fb_info *info)
|
||||
|
||||
static bool fbcon_skip_panic(struct fb_info *info)
|
||||
{
|
||||
/* panic_cpu is not exported, and can't be used if built as module. Use
|
||||
* oops_in_progress instead, but non-fatal oops won't be printed.
|
||||
*/
|
||||
#if defined(MODULE)
|
||||
return (info->skip_panic && unlikely(oops_in_progress));
|
||||
#else
|
||||
return (info->skip_panic && unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID));
|
||||
#endif
|
||||
return (info->skip_panic && unlikely(panic_in_progress()));
|
||||
}
|
||||
|
||||
static inline bool fbcon_is_active(struct vc_data *vc, struct fb_info *info)
|
||||
|
||||
@@ -421,7 +421,7 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
vm_fault_t vmf;
|
||||
unsigned long off = i * PAGE_SIZE;
|
||||
vmf = vmf_insert_mixed(vma, vma->vm_start + off,
|
||||
address + off);
|
||||
PHYS_PFN(address + off));
|
||||
if (vmf & VM_FAULT_ERROR)
|
||||
ret = vm_fault_to_errno(vmf, 0);
|
||||
}
|
||||
|
||||
@@ -1209,7 +1209,7 @@ EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
|
||||
|
||||
static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
|
||||
int *nr_cluster, struct msdos_dir_entry **de,
|
||||
struct buffer_head **bh, loff_t *i_pos)
|
||||
struct buffer_head **bh)
|
||||
{
|
||||
struct super_block *sb = dir->i_sb;
|
||||
struct msdos_sb_info *sbi = MSDOS_SB(sb);
|
||||
@@ -1269,7 +1269,6 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
|
||||
get_bh(bhs[n]);
|
||||
*bh = bhs[n];
|
||||
*de = (struct msdos_dir_entry *)((*bh)->b_data + offset);
|
||||
*i_pos = fat_make_i_pos(sb, *bh, *de);
|
||||
|
||||
/* Second stage: clear the rest of cluster, and write outs */
|
||||
err = fat_zeroed_cluster(dir, start_blknr, ++n, bhs, MAX_BUF_PER_PAGE);
|
||||
@@ -1298,7 +1297,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
|
||||
struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */
|
||||
struct msdos_dir_entry *de;
|
||||
int err, free_slots, i, nr_bhs;
|
||||
loff_t pos, i_pos;
|
||||
loff_t pos;
|
||||
|
||||
sinfo->nr_slots = nr_slots;
|
||||
|
||||
@@ -1386,7 +1385,7 @@ found:
|
||||
* add the cluster to dir.
|
||||
*/
|
||||
cluster = fat_add_new_entries(dir, slots, nr_slots, &nr_cluster,
|
||||
&de, &bh, &i_pos);
|
||||
&de, &bh);
|
||||
if (cluster < 0) {
|
||||
err = cluster;
|
||||
goto error_remove;
|
||||
|
||||
@@ -6928,8 +6928,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end,
|
||||
|
||||
out:
|
||||
if (ret != 0) {
|
||||
if (folios)
|
||||
ocfs2_unlock_and_free_folios(folios, numfolios);
|
||||
ocfs2_unlock_and_free_folios(folios, numfolios);
|
||||
numfolios = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1477,7 +1477,6 @@ way_up_top:
|
||||
goto send_response;
|
||||
} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
|
||||
spin_unlock(&res->spinlock);
|
||||
// mlog(0, "node %u is the master\n", res->owner);
|
||||
response = DLM_MASTER_RESP_NO;
|
||||
if (mle)
|
||||
kmem_cache_free(dlm_mle_cache, mle);
|
||||
@@ -1493,7 +1492,6 @@ way_up_top:
|
||||
BUG();
|
||||
}
|
||||
|
||||
// mlog(0, "lockres is in progress...\n");
|
||||
spin_lock(&dlm->master_lock);
|
||||
found = dlm_find_mle(dlm, &tmpmle, name, namelen);
|
||||
if (!found) {
|
||||
@@ -1503,8 +1501,6 @@ way_up_top:
|
||||
set_maybe = 1;
|
||||
spin_lock(&tmpmle->spinlock);
|
||||
if (tmpmle->type == DLM_MLE_BLOCK) {
|
||||
// mlog(0, "this node is waiting for "
|
||||
// "lockres to be mastered\n");
|
||||
response = DLM_MASTER_RESP_NO;
|
||||
} else if (tmpmle->type == DLM_MLE_MIGRATION) {
|
||||
mlog(0, "node %u is master, but trying to migrate to "
|
||||
@@ -1531,8 +1527,6 @@ way_up_top:
|
||||
} else
|
||||
response = DLM_MASTER_RESP_NO;
|
||||
} else {
|
||||
// mlog(0, "this node is attempting to "
|
||||
// "master lockres\n");
|
||||
response = DLM_MASTER_RESP_MAYBE;
|
||||
}
|
||||
if (set_maybe)
|
||||
@@ -1559,7 +1553,6 @@ way_up_top:
|
||||
found = dlm_find_mle(dlm, &tmpmle, name, namelen);
|
||||
if (!found) {
|
||||
/* this lockid has never been seen on this node yet */
|
||||
// mlog(0, "no mle found\n");
|
||||
if (!mle) {
|
||||
spin_unlock(&dlm->master_lock);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
@@ -1573,8 +1566,6 @@ way_up_top:
|
||||
goto way_up_top;
|
||||
}
|
||||
|
||||
// mlog(0, "this is second time thru, already allocated, "
|
||||
// "add the block.\n");
|
||||
dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
|
||||
set_bit(request->node_idx, mle->maybe_map);
|
||||
__dlm_insert_mle(dlm, mle);
|
||||
@@ -1897,8 +1888,6 @@ ok:
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
|
||||
// mlog(0, "woo! got an assert_master from node %u!\n",
|
||||
// assert->node_idx);
|
||||
if (mle) {
|
||||
int extra_ref = 0;
|
||||
int nn = -1;
|
||||
|
||||
@@ -464,7 +464,6 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
|
||||
}
|
||||
|
||||
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
|
||||
// mlog(0, "nothing to recover! sleeping now!\n");
|
||||
spin_unlock(&dlm->spinlock);
|
||||
/* return to main thread loop and sleep. */
|
||||
return 0;
|
||||
|
||||
@@ -1495,6 +1495,14 @@ int ocfs2_validate_inode_block(struct super_block *sb,
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(di->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
|
||||
(u32)le16_to_cpu(di->i_suballoc_slot) > OCFS2_SB(sb)->max_slots - 1) {
|
||||
rc = ocfs2_error(sb, "Invalid dinode %llu: suballoc slot %u\n",
|
||||
(unsigned long long)bh->b_blocknr,
|
||||
le16_to_cpu(di->i_suballoc_slot));
|
||||
goto bail;
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
bail:
|
||||
|
||||
@@ -358,13 +358,11 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
ocfs2_sprintf_system_inode_name(namebuf,
|
||||
sizeof(namebuf),
|
||||
type, i);
|
||||
int len = ocfs2_sprintf_system_inode_name(namebuf,
|
||||
sizeof(namebuf),
|
||||
type, i);
|
||||
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
|
||||
namebuf,
|
||||
strlen(namebuf),
|
||||
&blkno);
|
||||
namebuf, len, &blkno);
|
||||
if (status < 0) {
|
||||
status = -ENOENT;
|
||||
goto bail;
|
||||
@@ -651,12 +649,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type,
|
||||
OCFS2_INVALID_SLOT);
|
||||
int len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf),
|
||||
type, OCFS2_INVALID_SLOT);
|
||||
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
|
||||
namebuf,
|
||||
strlen(namebuf),
|
||||
&blkno);
|
||||
namebuf, len, &blkno);
|
||||
if (status < 0) {
|
||||
status = -ENOENT;
|
||||
goto bail;
|
||||
|
||||
@@ -364,7 +364,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
|
||||
int *vict_bit,
|
||||
struct buffer_head **ret_bh)
|
||||
{
|
||||
int ret, i, bits_per_unit = 0;
|
||||
int ret, i, len, bits_per_unit = 0;
|
||||
u64 blkno;
|
||||
char namebuf[40];
|
||||
|
||||
@@ -375,9 +375,9 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
|
||||
struct ocfs2_dinode *ac_dinode;
|
||||
struct ocfs2_group_desc *bg;
|
||||
|
||||
ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
|
||||
ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
|
||||
strlen(namebuf), &blkno);
|
||||
len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
|
||||
ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, len, &blkno);
|
||||
|
||||
if (ret) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
|
||||
@@ -614,7 +614,7 @@ struct ocfs2_super_block {
|
||||
__le16 s_reserved0;
|
||||
__le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash.
|
||||
* s_uuid_hash serves as seed[3]. */
|
||||
/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */
|
||||
/*C8*/ __le64 s_reserved2[15]; /* Fill out superblock */
|
||||
/*140*/
|
||||
|
||||
/*
|
||||
|
||||
@@ -1011,6 +1011,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
printk(KERN_ERR "ocfs2: Could not determine"
|
||||
" locking version\n");
|
||||
user_cluster_disconnect(conn);
|
||||
lc = NULL;
|
||||
goto out;
|
||||
}
|
||||
wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
|
||||
|
||||
@@ -127,14 +127,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
|
||||
char namebuf[40];
|
||||
struct inode *inode = NULL;
|
||||
u64 blkno;
|
||||
int status = 0;
|
||||
int len, status = 0;
|
||||
|
||||
ocfs2_sprintf_system_inode_name(namebuf,
|
||||
sizeof(namebuf),
|
||||
type, slot);
|
||||
len = ocfs2_sprintf_system_inode_name(namebuf,
|
||||
sizeof(namebuf),
|
||||
type, slot);
|
||||
|
||||
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
|
||||
strlen(namebuf), &blkno);
|
||||
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
|
||||
namebuf, len, &blkno);
|
||||
if (status < 0) {
|
||||
goto bail;
|
||||
}
|
||||
|
||||
@@ -3947,7 +3947,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
|
||||
tid = task_pid_nr_ns(task, ns);
|
||||
if (!tid)
|
||||
continue; /* The task has just exited. */
|
||||
len = snprintf(name, sizeof(name), "%u", tid);
|
||||
len = snprintf(name, sizeof(name), "%d", tid);
|
||||
if (!proc_fill_cache(file, ctx, name, len,
|
||||
proc_task_instantiate, task, NULL)) {
|
||||
/* returning this tgid failed, save it as the first
|
||||
|
||||
@@ -307,7 +307,8 @@ static int fill_meta_index(struct inode *inode, int index,
|
||||
all_done:
|
||||
*index_block = cur_index_block;
|
||||
*index_offset = cur_offset;
|
||||
*data_block = cur_data_block;
|
||||
if (data_block)
|
||||
*data_block = cur_data_block;
|
||||
|
||||
/*
|
||||
* Scale cache index (cache slot entry) to index
|
||||
@@ -324,17 +325,15 @@ failed:
|
||||
* Get the on-disk location and compressed size of the datablock
|
||||
* specified by index. Fill_meta_index() does most of the work.
|
||||
*/
|
||||
static int read_blocklist(struct inode *inode, int index, u64 *block)
|
||||
static int read_blocklist_ptrs(struct inode *inode, int index, u64 *start,
|
||||
int *offset, u64 *block)
|
||||
{
|
||||
u64 start;
|
||||
long long blks;
|
||||
int offset;
|
||||
__le32 size;
|
||||
int res = fill_meta_index(inode, index, &start, &offset, block);
|
||||
int res = fill_meta_index(inode, index, start, offset, block);
|
||||
|
||||
TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset"
|
||||
" 0x%x, block 0x%llx\n", res, index, start, offset,
|
||||
*block);
|
||||
TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset 0x%x, block 0x%llx\n",
|
||||
res, index, *start, *offset, block ? *block : 0);
|
||||
|
||||
if (res < 0)
|
||||
return res;
|
||||
@@ -346,22 +345,31 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
|
||||
* extra block indexes needed.
|
||||
*/
|
||||
if (res < index) {
|
||||
blks = read_indexes(inode->i_sb, index - res, &start, &offset);
|
||||
blks = read_indexes(inode->i_sb, index - res, start, offset);
|
||||
if (blks < 0)
|
||||
return (int) blks;
|
||||
*block += blks;
|
||||
if (block)
|
||||
*block += blks;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read length of block specified by index.
|
||||
*/
|
||||
res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset,
|
||||
res = squashfs_read_metadata(inode->i_sb, &size, start, offset,
|
||||
sizeof(size));
|
||||
if (res < 0)
|
||||
return res;
|
||||
return squashfs_block_size(size);
|
||||
}
|
||||
|
||||
static inline int read_blocklist(struct inode *inode, int index, u64 *block)
|
||||
{
|
||||
u64 start;
|
||||
int offset;
|
||||
|
||||
return read_blocklist_ptrs(inode, index, &start, &offset, block);
|
||||
}
|
||||
|
||||
static bool squashfs_fill_page(struct folio *folio,
|
||||
struct squashfs_cache_entry *buffer, size_t offset,
|
||||
size_t avail)
|
||||
@@ -658,7 +666,114 @@ skip_pages:
|
||||
kfree(pages);
|
||||
}
|
||||
|
||||
static loff_t seek_hole_data(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct squashfs_sb_info *msblk = sb->s_fs_info;
|
||||
u64 start, index = offset >> msblk->block_log;
|
||||
u64 file_end = (i_size_read(inode) + msblk->block_size - 1) >> msblk->block_log;
|
||||
int s_offset, length;
|
||||
__le32 *blist = NULL;
|
||||
|
||||
/* reject offset if negative or beyond file end */
|
||||
if ((unsigned long long)offset >= i_size_read(inode))
|
||||
return -ENXIO;
|
||||
|
||||
/* is offset within tailend and is tailend packed into a fragment? */
|
||||
if (index + 1 == file_end &&
|
||||
squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) {
|
||||
if (whence == SEEK_DATA)
|
||||
return offset;
|
||||
|
||||
/* there is an implicit hole at the end of any file */
|
||||
return i_size_read(inode);
|
||||
}
|
||||
|
||||
length = read_blocklist_ptrs(inode, index, &start, &s_offset, NULL);
|
||||
if (length < 0)
|
||||
return length;
|
||||
|
||||
/* nothing more to do if offset matches desired whence value */
|
||||
if ((length == 0 && whence == SEEK_HOLE) ||
|
||||
(length && whence == SEEK_DATA))
|
||||
return offset;
|
||||
|
||||
/* skip scanning forwards if we're at file end */
|
||||
if (++ index == file_end)
|
||||
goto not_found;
|
||||
|
||||
blist = kmalloc(SQUASHFS_SCAN_INDEXES << 2, GFP_KERNEL);
|
||||
if (blist == NULL) {
|
||||
ERROR("%s: Failed to allocate block_list\n", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
while (index < file_end) {
|
||||
int i, indexes = min(file_end - index, SQUASHFS_SCAN_INDEXES);
|
||||
|
||||
offset = squashfs_read_metadata(sb, blist, &start, &s_offset, indexes << 2);
|
||||
if (offset < 0)
|
||||
goto finished;
|
||||
|
||||
for (i = 0; i < indexes; i++) {
|
||||
length = squashfs_block_size(blist[i]);
|
||||
if (length < 0) {
|
||||
offset = length;
|
||||
goto finished;
|
||||
}
|
||||
|
||||
/* does this block match desired whence value? */
|
||||
if ((length == 0 && whence == SEEK_HOLE) ||
|
||||
(length && whence == SEEK_DATA)) {
|
||||
offset = (index + i) << msblk->block_log;
|
||||
goto finished;
|
||||
}
|
||||
}
|
||||
|
||||
index += indexes;
|
||||
}
|
||||
|
||||
not_found:
|
||||
/* whence value determines what happens */
|
||||
if (whence == SEEK_DATA)
|
||||
offset = -ENXIO;
|
||||
else
|
||||
/* there is an implicit hole at the end of any file */
|
||||
offset = i_size_read(inode);
|
||||
|
||||
finished:
|
||||
kfree(blist);
|
||||
return offset;
|
||||
}
|
||||
|
||||
static loff_t squashfs_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
|
||||
switch (whence) {
|
||||
default:
|
||||
return generic_file_llseek(file, offset, whence);
|
||||
case SEEK_DATA:
|
||||
case SEEK_HOLE:
|
||||
offset = seek_hole_data(file, offset, whence);
|
||||
break;
|
||||
}
|
||||
|
||||
if (offset < 0)
|
||||
return offset;
|
||||
|
||||
return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
|
||||
}
|
||||
|
||||
const struct address_space_operations squashfs_aops = {
|
||||
.read_folio = squashfs_read_folio,
|
||||
.readahead = squashfs_readahead
|
||||
};
|
||||
|
||||
const struct file_operations squashfs_file_operations = {
|
||||
.llseek = squashfs_llseek,
|
||||
.read_iter = generic_file_read_iter,
|
||||
.mmap_prepare = generic_file_readonly_mmap_prepare,
|
||||
.splice_read = filemap_splice_read
|
||||
};
|
||||
|
||||
@@ -68,6 +68,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
|
||||
inode->i_mode = le16_to_cpu(sqsh_ino->mode);
|
||||
inode->i_size = 0;
|
||||
|
||||
/* File type must not be set at this moment, for it will later be set by the caller. */
|
||||
if (inode->i_mode & S_IFMT)
|
||||
err = -EIO;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -140,8 +144,17 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
if (err < 0)
|
||||
goto failed_read;
|
||||
|
||||
inode->i_size = le32_to_cpu(sqsh_ino->file_size);
|
||||
frag = le32_to_cpu(sqsh_ino->fragment);
|
||||
if (frag != SQUASHFS_INVALID_FRAG) {
|
||||
/*
|
||||
* the file cannot have a fragment (tailend) and have a
|
||||
* file size a multiple of the block size
|
||||
*/
|
||||
if ((inode->i_size & (msblk->block_size - 1)) == 0) {
|
||||
err = -EINVAL;
|
||||
goto failed_read;
|
||||
}
|
||||
frag_offset = le32_to_cpu(sqsh_ino->offset);
|
||||
frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
|
||||
if (frag_size < 0) {
|
||||
@@ -155,8 +168,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
}
|
||||
|
||||
set_nlink(inode, 1);
|
||||
inode->i_size = le32_to_cpu(sqsh_ino->file_size);
|
||||
inode->i_fop = &generic_ro_fops;
|
||||
inode->i_fop = &squashfs_file_operations;
|
||||
inode->i_mode |= S_IFREG;
|
||||
inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
|
||||
squashfs_i(inode)->fragment_block = frag_blk;
|
||||
@@ -165,6 +177,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
|
||||
squashfs_i(inode)->block_list_start = block;
|
||||
squashfs_i(inode)->offset = offset;
|
||||
squashfs_i(inode)->parent = 0;
|
||||
inode->i_data.a_ops = &squashfs_aops;
|
||||
|
||||
TRACE("File inode %x:%x, start_block %llx, block_list_start "
|
||||
@@ -183,8 +196,21 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
if (err < 0)
|
||||
goto failed_read;
|
||||
|
||||
inode->i_size = le64_to_cpu(sqsh_ino->file_size);
|
||||
if (inode->i_size < 0) {
|
||||
err = -EINVAL;
|
||||
goto failed_read;
|
||||
}
|
||||
frag = le32_to_cpu(sqsh_ino->fragment);
|
||||
if (frag != SQUASHFS_INVALID_FRAG) {
|
||||
/*
|
||||
* the file cannot have a fragment (tailend) and have a
|
||||
* file size a multiple of the block size
|
||||
*/
|
||||
if ((inode->i_size & (msblk->block_size - 1)) == 0) {
|
||||
err = -EINVAL;
|
||||
goto failed_read;
|
||||
}
|
||||
frag_offset = le32_to_cpu(sqsh_ino->offset);
|
||||
frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
|
||||
if (frag_size < 0) {
|
||||
@@ -199,9 +225,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
|
||||
xattr_id = le32_to_cpu(sqsh_ino->xattr);
|
||||
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
|
||||
inode->i_size = le64_to_cpu(sqsh_ino->file_size);
|
||||
inode->i_op = &squashfs_inode_ops;
|
||||
inode->i_fop = &generic_ro_fops;
|
||||
inode->i_fop = &squashfs_file_operations;
|
||||
inode->i_mode |= S_IFREG;
|
||||
inode->i_blocks = (inode->i_size -
|
||||
le64_to_cpu(sqsh_ino->sparse) + 511) >> 9;
|
||||
@@ -212,6 +237,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block);
|
||||
squashfs_i(inode)->block_list_start = block;
|
||||
squashfs_i(inode)->offset = offset;
|
||||
squashfs_i(inode)->parent = 0;
|
||||
inode->i_data.a_ops = &squashfs_aops;
|
||||
|
||||
TRACE("File inode %x:%x, start_block %llx, block_list_start "
|
||||
@@ -292,6 +318,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
inode->i_mode |= S_IFLNK;
|
||||
squashfs_i(inode)->start = block;
|
||||
squashfs_i(inode)->offset = offset;
|
||||
squashfs_i(inode)->parent = 0;
|
||||
|
||||
if (type == SQUASHFS_LSYMLINK_TYPE) {
|
||||
__le32 xattr;
|
||||
@@ -329,6 +356,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
|
||||
rdev = le32_to_cpu(sqsh_ino->rdev);
|
||||
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
|
||||
squashfs_i(inode)->parent = 0;
|
||||
|
||||
TRACE("Device inode %x:%x, rdev %x\n",
|
||||
SQUASHFS_INODE_BLK(ino), offset, rdev);
|
||||
@@ -353,6 +381,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
|
||||
rdev = le32_to_cpu(sqsh_ino->rdev);
|
||||
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
|
||||
squashfs_i(inode)->parent = 0;
|
||||
|
||||
TRACE("Device inode %x:%x, rdev %x\n",
|
||||
SQUASHFS_INODE_BLK(ino), offset, rdev);
|
||||
@@ -373,6 +402,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
inode->i_mode |= S_IFSOCK;
|
||||
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
|
||||
init_special_inode(inode, inode->i_mode, 0);
|
||||
squashfs_i(inode)->parent = 0;
|
||||
break;
|
||||
}
|
||||
case SQUASHFS_LFIFO_TYPE:
|
||||
@@ -392,6 +422,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
|
||||
inode->i_op = &squashfs_inode_ops;
|
||||
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
|
||||
init_special_inode(inode, inode->i_mode, 0);
|
||||
squashfs_i(inode)->parent = 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
||||
@@ -107,6 +107,7 @@ extern const struct address_space_operations squashfs_aops;
|
||||
|
||||
/* inode.c */
|
||||
extern const struct inode_operations squashfs_inode_ops;
|
||||
extern const struct file_operations squashfs_file_operations;
|
||||
|
||||
/* namei.c */
|
||||
extern const struct inode_operations squashfs_dir_inode_ops;
|
||||
|
||||
@@ -208,6 +208,7 @@ static inline int squashfs_block_size(__le32 raw)
|
||||
#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
|
||||
#define SQUASHFS_META_ENTRIES 127
|
||||
#define SQUASHFS_META_SLOTS 8
|
||||
#define SQUASHFS_SCAN_INDEXES 1024
|
||||
|
||||
struct meta_entry {
|
||||
u64 data_block;
|
||||
|
||||
@@ -16,6 +16,7 @@ struct squashfs_inode_info {
|
||||
u64 xattr;
|
||||
unsigned int xattr_size;
|
||||
int xattr_count;
|
||||
int parent;
|
||||
union {
|
||||
struct {
|
||||
u64 fragment_block;
|
||||
@@ -27,7 +28,6 @@ struct squashfs_inode_info {
|
||||
u64 dir_idx_start;
|
||||
int dir_idx_offset;
|
||||
int dir_idx_cnt;
|
||||
int parent;
|
||||
};
|
||||
};
|
||||
struct inode vfs_inode;
|
||||
|
||||
@@ -334,14 +334,6 @@ static inline void ida_init(struct ida *ida)
|
||||
xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
|
||||
}
|
||||
|
||||
/*
|
||||
* ida_simple_get() and ida_simple_remove() are deprecated. Use
|
||||
* ida_alloc() and ida_free() instead respectively.
|
||||
*/
|
||||
#define ida_simple_get(ida, start, end, gfp) \
|
||||
ida_alloc_range(ida, start, (end) - 1, gfp)
|
||||
#define ida_simple_remove(ida, id) ida_free(ida, id)
|
||||
|
||||
static inline bool ida_is_empty(const struct ida *ida)
|
||||
{
|
||||
return xa_empty(&ida->xa);
|
||||
|
||||
@@ -164,11 +164,23 @@ extern int root_mountflags;
|
||||
|
||||
extern bool early_boot_irqs_disabled;
|
||||
|
||||
/*
|
||||
* Values used for system_state. Ordering of the states must not be changed
|
||||
/**
|
||||
* enum system_states - Values used for system_state.
|
||||
*
|
||||
* @SYSTEM_BOOTING: %0, no init needed
|
||||
* @SYSTEM_SCHEDULING: system is ready for scheduling; OK to use RCU
|
||||
* @SYSTEM_FREEING_INITMEM: system is freeing all of initmem; almost running
|
||||
* @SYSTEM_RUNNING: system is up and running
|
||||
* @SYSTEM_HALT: system entered clean system halt state
|
||||
* @SYSTEM_POWER_OFF: system entered shutdown/clean power off state
|
||||
* @SYSTEM_RESTART: system entered emergency power off or normal restart
|
||||
* @SYSTEM_SUSPEND: system entered suspend or hibernate state
|
||||
*
|
||||
* Note:
|
||||
* Ordering of the states must not be changed
|
||||
* as code checks for <, <=, >, >= STATE.
|
||||
*/
|
||||
extern enum system_states {
|
||||
enum system_states {
|
||||
SYSTEM_BOOTING,
|
||||
SYSTEM_SCHEDULING,
|
||||
SYSTEM_FREEING_INITMEM,
|
||||
@@ -177,7 +189,8 @@ extern enum system_states {
|
||||
SYSTEM_POWER_OFF,
|
||||
SYSTEM_RESTART,
|
||||
SYSTEM_SUSPEND,
|
||||
} system_state;
|
||||
};
|
||||
extern enum system_states system_state;
|
||||
|
||||
/*
|
||||
* General tracing related utility functions - trace_printk(),
|
||||
|
||||
@@ -395,6 +395,9 @@ struct kimage {
|
||||
|
||||
/* Information for loading purgatory */
|
||||
struct purgatory_info purgatory_info;
|
||||
|
||||
/* Force carrying over the DTB from the current boot */
|
||||
bool force_dtb;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CRASH_HOTPLUG
|
||||
@@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_image_type);
|
||||
/* List of defined/legal kexec file flags */
|
||||
#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
|
||||
KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
|
||||
KEXEC_FILE_NO_CMA)
|
||||
KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB)
|
||||
|
||||
/* flag to track if kexec reboot is in progress */
|
||||
extern bool kexec_in_progress;
|
||||
|
||||
@@ -40,6 +40,7 @@ struct kho_serialization;
|
||||
|
||||
#ifdef CONFIG_KEXEC_HANDOVER
|
||||
bool kho_is_enabled(void);
|
||||
bool is_kho_boot(void);
|
||||
|
||||
int kho_preserve_folio(struct folio *folio);
|
||||
int kho_preserve_phys(phys_addr_t phys, size_t size);
|
||||
@@ -60,6 +61,11 @@ static inline bool kho_is_enabled(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_kho_boot(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int kho_preserve_folio(struct folio *folio)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
@@ -20,8 +20,16 @@
|
||||
* using the generic single-entry routines.
|
||||
*/
|
||||
|
||||
/**
|
||||
* LIST_HEAD_INIT - initialize a &struct list_head's links to point to itself
|
||||
* @name: name of the list_head
|
||||
*/
|
||||
#define LIST_HEAD_INIT(name) { &(name), &(name) }
|
||||
|
||||
/**
|
||||
* LIST_HEAD - definition of a &struct list_head with initialization values
|
||||
* @name: name of the list_head
|
||||
*/
|
||||
#define LIST_HEAD(name) \
|
||||
struct list_head name = LIST_HEAD_INIT(name)
|
||||
|
||||
|
||||
@@ -349,6 +349,19 @@ static inline void kernel_param_unlock(struct module *mod)
|
||||
__module_param_call("", name, ¶m_ops_##type, &var, perm, \
|
||||
-1, KERNEL_PARAM_FL_UNSAFE)
|
||||
|
||||
/**
|
||||
* __core_param_cb - similar like core_param, with a set/get ops instead of type.
|
||||
* @name: the name of the cmdline and sysfs parameter (often the same as var)
|
||||
* @var: the variable
|
||||
* @ops: the set & get operations for this parameter.
|
||||
* @perm: visibility in sysfs
|
||||
*
|
||||
* Ideally this should be called 'core_param_cb', but the name has been
|
||||
* used for module core parameter, so add the '__' prefix
|
||||
*/
|
||||
#define __core_param_cb(name, ops, arg, perm) \
|
||||
__module_param_call("", name, ops, arg, perm, -1, 0)
|
||||
|
||||
#endif /* !MODULE */
|
||||
|
||||
/**
|
||||
|
||||
@@ -103,7 +103,7 @@ struct nvmem_cell_info {
|
||||
*
|
||||
* Note: A default "nvmem<id>" name will be assigned to the device if
|
||||
* no name is specified in its configuration. In such case "<id>" is
|
||||
* generated with ida_simple_get() and provided id field is ignored.
|
||||
* generated with ida_alloc() and provided id field is ignored.
|
||||
*
|
||||
* Note: Specifying name and setting id to -1 implies a unique device
|
||||
* whose name is provided as-is (kept unaltered).
|
||||
|
||||
@@ -43,6 +43,12 @@ void abort(void);
|
||||
extern atomic_t panic_cpu;
|
||||
#define PANIC_CPU_INVALID -1
|
||||
|
||||
bool panic_try_start(void);
|
||||
void panic_reset(void);
|
||||
bool panic_in_progress(void);
|
||||
bool panic_on_this_cpu(void);
|
||||
bool panic_on_other_cpu(void);
|
||||
|
||||
/*
|
||||
* Only to be used by arch init code. If the user over-wrote the default
|
||||
* CONFIG_PANIC_TIMEOUT, honor it.
|
||||
|
||||
@@ -330,8 +330,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
|
||||
|
||||
#endif
|
||||
|
||||
bool this_cpu_in_panic(void);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern int __printk_cpu_sync_try_get(void);
|
||||
extern void __printk_cpu_sync_wait(void);
|
||||
|
||||
@@ -210,9 +210,8 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
|
||||
* pins the final release of task.io_context. Also protects ->cpuset and
|
||||
* ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
|
||||
*
|
||||
* Nests both inside and outside of read_lock(&tasklist_lock).
|
||||
* It must not be nested with write_lock_irq(&tasklist_lock),
|
||||
* neither inside nor outside.
|
||||
* Nests inside of read_lock(&tasklist_lock). It must not be nested with
|
||||
* write_lock_irq(&tasklist_lock), neither inside nor outside.
|
||||
*/
|
||||
static inline void task_lock(struct task_struct *p)
|
||||
{
|
||||
|
||||
@@ -22,12 +22,16 @@
|
||||
* KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
|
||||
* KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
|
||||
* fd field.
|
||||
* KEXEC_FILE_FORCE_DTB : Force carrying over the current boot's DTB to the new
|
||||
* kernel on x86. This is already the default behavior on
|
||||
* some other architectures, like ARM64 and PowerPC.
|
||||
*/
|
||||
#define KEXEC_FILE_UNLOAD 0x00000001
|
||||
#define KEXEC_FILE_ON_CRASH 0x00000002
|
||||
#define KEXEC_FILE_NO_INITRAMFS 0x00000004
|
||||
#define KEXEC_FILE_DEBUG 0x00000008
|
||||
#define KEXEC_FILE_NO_CMA 0x00000010
|
||||
#define KEXEC_FILE_FORCE_DTB 0x00000020
|
||||
|
||||
/* These values match the ELF architecture values.
|
||||
* Unless there is a good reason that should continue to be the case.
|
||||
|
||||
12
init/main.c
12
init/main.c
@@ -545,6 +545,12 @@ static int __init unknown_bootoption(char *param, char *val,
|
||||
const char *unused, void *arg)
|
||||
{
|
||||
size_t len = strlen(param);
|
||||
/*
|
||||
* Well-known bootloader identifiers:
|
||||
* 1. LILO/Grub pass "BOOT_IMAGE=...";
|
||||
* 2. kexec/kdump (kexec-tools) pass "kexec".
|
||||
*/
|
||||
const char *bootloader[] = { "BOOT_IMAGE=", "kexec", NULL };
|
||||
|
||||
/* Handle params aliased to sysctls */
|
||||
if (sysctl_is_alias(param))
|
||||
@@ -552,6 +558,12 @@ static int __init unknown_bootoption(char *param, char *val,
|
||||
|
||||
repair_env_string(param, val);
|
||||
|
||||
/* Handle bootloader identifier */
|
||||
for (int i = 0; bootloader[i]; i++) {
|
||||
if (strstarts(param, bootloader[i]))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Handle obsolete-style parameters */
|
||||
if (obsolete_checksetup(param))
|
||||
return 0;
|
||||
|
||||
@@ -148,6 +148,17 @@ config CRASH_DM_CRYPT_CONFIGS
|
||||
CRASH_DM_CRYPT cannot directly select CONFIGFS_FS, because that
|
||||
is required to be built-in.
|
||||
|
||||
config CRASH_DUMP_KUNIT_TEST
|
||||
tristate "Unit Tests for kernel crash dumps" if !KUNIT_ALL_TESTS
|
||||
depends on CRASH_DUMP && KUNIT
|
||||
default KUNIT_ALL_TESTS
|
||||
help
|
||||
This option builds KUnit unit tests for kernel crash dumps. The unit
|
||||
tests will be used to verify the correctness of covered functions and
|
||||
also prevent any regression.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRASH_HOTPLUG
|
||||
bool "Update the crash elfcorehdr on system configuration changes"
|
||||
default y
|
||||
|
||||
@@ -78,6 +78,7 @@ obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o
|
||||
obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
|
||||
obj-$(CONFIG_CRASH_DUMP) += crash_core.o
|
||||
obj-$(CONFIG_CRASH_DM_CRYPT) += crash_dump_dm_crypt.o
|
||||
obj-$(CONFIG_CRASH_DUMP_KUNIT_TEST) += crash_core_test.o
|
||||
obj-$(CONFIG_KEXEC) += kexec.o
|
||||
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
|
||||
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <linux/btf.h>
|
||||
#include <linux/objtool.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/panic.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/sections.h>
|
||||
@@ -143,17 +144,7 @@ STACK_FRAME_NON_STANDARD(__crash_kexec);
|
||||
|
||||
__bpf_kfunc void crash_kexec(struct pt_regs *regs)
|
||||
{
|
||||
int old_cpu, this_cpu;
|
||||
|
||||
/*
|
||||
* Only one CPU is allowed to execute the crash_kexec() code as with
|
||||
* panic(). Otherwise parallel calls of panic() and crash_kexec()
|
||||
* may stop each other. To exclude them, we use panic_cpu here too.
|
||||
*/
|
||||
old_cpu = PANIC_CPU_INVALID;
|
||||
this_cpu = raw_smp_processor_id();
|
||||
|
||||
if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
|
||||
if (panic_try_start()) {
|
||||
/* This is the 1st CPU which comes here, so go ahead. */
|
||||
__crash_kexec(regs);
|
||||
|
||||
@@ -161,7 +152,7 @@ __bpf_kfunc void crash_kexec(struct pt_regs *regs)
|
||||
* Reset panic_cpu to allow another panic()/crash_kexec()
|
||||
* call.
|
||||
*/
|
||||
atomic_set(&panic_cpu, PANIC_CPU_INVALID);
|
||||
panic_reset();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,6 +265,20 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* crash_exclude_mem_range - exclude a mem range for existing ranges
|
||||
* @mem: mem->range contains an array of ranges sorted in ascending order
|
||||
* @mstart: the start of to-be-excluded range
|
||||
* @mend: the start of to-be-excluded range
|
||||
*
|
||||
* If you are unsure if a range split will happen, to avoid function call
|
||||
* failure because of -ENOMEM, always make sure
|
||||
* mem->max_nr_ranges == mem->nr_ranges + 1
|
||||
* before calling the function each time.
|
||||
*
|
||||
* returns 0 if a memory range is excluded successfully
|
||||
* return -ENOMEM if mem->ranges doesn't have space to hold split ranges
|
||||
*/
|
||||
int crash_exclude_mem_range(struct crash_mem *mem,
|
||||
unsigned long long mstart, unsigned long long mend)
|
||||
{
|
||||
@@ -333,6 +338,7 @@ int crash_exclude_mem_range(struct crash_mem *mem,
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crash_exclude_mem_range);
|
||||
|
||||
ssize_t crash_get_memory_size(void)
|
||||
{
|
||||
|
||||
343
kernel/crash_core_test.c
Normal file
343
kernel/crash_core_test.c
Normal file
@@ -0,0 +1,343 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <kunit/test.h>
|
||||
#include <linux/crash_core.h> // For struct crash_mem and struct range if defined there
|
||||
|
||||
// Helper to create and initialize crash_mem
|
||||
static struct crash_mem *create_crash_mem(struct kunit *test, unsigned int max_ranges,
|
||||
unsigned int nr_initial_ranges,
|
||||
const struct range *initial_ranges)
|
||||
{
|
||||
struct crash_mem *mem;
|
||||
size_t alloc_size;
|
||||
|
||||
// Check if max_ranges can even hold initial_ranges
|
||||
if (max_ranges < nr_initial_ranges) {
|
||||
kunit_err(test, "max_ranges (%u) < nr_initial_ranges (%u)\n",
|
||||
max_ranges, nr_initial_ranges);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
alloc_size = sizeof(struct crash_mem) + (size_t)max_ranges * sizeof(struct range);
|
||||
mem = kunit_kzalloc(test, alloc_size, GFP_KERNEL);
|
||||
if (!mem) {
|
||||
kunit_err(test, "Failed to allocate crash_mem\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mem->max_nr_ranges = max_ranges;
|
||||
mem->nr_ranges = nr_initial_ranges;
|
||||
if (initial_ranges && nr_initial_ranges > 0) {
|
||||
memcpy(mem->ranges, initial_ranges,
|
||||
nr_initial_ranges * sizeof(struct range));
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
// Helper to compare ranges for assertions
|
||||
static void assert_ranges_equal(struct kunit *test,
|
||||
const struct range *actual_ranges,
|
||||
unsigned int actual_nr_ranges,
|
||||
const struct range *expected_ranges,
|
||||
unsigned int expected_nr_ranges,
|
||||
const char *case_name)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
KUNIT_ASSERT_EQ_MSG(test, expected_nr_ranges, actual_nr_ranges,
|
||||
"%s: Number of ranges mismatch.", case_name);
|
||||
|
||||
for (i = 0; i < expected_nr_ranges; i++) {
|
||||
KUNIT_ASSERT_EQ_MSG(test, expected_ranges[i].start, actual_ranges[i].start,
|
||||
"%s: Range %u start mismatch.", case_name, i);
|
||||
KUNIT_ASSERT_EQ_MSG(test, expected_ranges[i].end, actual_ranges[i].end,
|
||||
"%s: Range %u end mismatch.", case_name, i);
|
||||
}
|
||||
}
|
||||
|
||||
// Structure for test parameters
|
||||
struct exclude_test_param {
|
||||
const char *description;
|
||||
unsigned long long exclude_start;
|
||||
unsigned long long exclude_end;
|
||||
unsigned int initial_max_ranges;
|
||||
const struct range *initial_ranges;
|
||||
unsigned int initial_nr_ranges;
|
||||
const struct range *expected_ranges;
|
||||
unsigned int expected_nr_ranges;
|
||||
int expected_ret;
|
||||
};
|
||||
|
||||
static void run_exclude_test_case(struct kunit *test, const struct exclude_test_param *params)
|
||||
{
|
||||
struct crash_mem *mem;
|
||||
int ret;
|
||||
|
||||
kunit_info(test, "%s", params->description);
|
||||
|
||||
mem = create_crash_mem(test, params->initial_max_ranges,
|
||||
params->initial_nr_ranges, params->initial_ranges);
|
||||
if (!mem)
|
||||
return; // Error already logged by create_crash_mem or kunit_kzalloc
|
||||
|
||||
ret = crash_exclude_mem_range(mem, params->exclude_start, params->exclude_end);
|
||||
|
||||
KUNIT_ASSERT_EQ_MSG(test, params->expected_ret, ret,
|
||||
"%s: Return value mismatch.", params->description);
|
||||
|
||||
if (params->expected_ret == 0) {
|
||||
assert_ranges_equal(test, mem->ranges, mem->nr_ranges,
|
||||
params->expected_ranges, params->expected_nr_ranges,
|
||||
params->description);
|
||||
} else {
|
||||
// If an error is expected, nr_ranges might still be relevant to check
|
||||
// depending on the exact point of failure. For ENOMEM on split,
|
||||
// nr_ranges shouldn't have changed.
|
||||
KUNIT_ASSERT_EQ_MSG(test, params->initial_nr_ranges,
|
||||
mem->nr_ranges,
|
||||
"%s: Number of ranges mismatch on error.",
|
||||
params->description);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test Strategy 1: One to-be-excluded range A and one existing range B.
|
||||
*
|
||||
* Exhaust all possibilities of the position of A regarding B.
|
||||
*/
|
||||
|
||||
static const struct range single_range_b = { .start = 100, .end = 199 };
|
||||
|
||||
static const struct exclude_test_param exclude_single_range_test_data[] = {
|
||||
{
|
||||
.description = "1.1: A is left of B, no overlap",
|
||||
.exclude_start = 10, .exclude_end = 50,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.2: A's right boundary touches B's left boundary",
|
||||
.exclude_start = 10, .exclude_end = 99,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.3: A overlaps B's left part",
|
||||
.exclude_start = 50, .exclude_end = 149,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = (const struct range[]){{ .start = 150, .end = 199 }},
|
||||
.expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.4: A is completely inside B",
|
||||
.exclude_start = 120, .exclude_end = 179,
|
||||
.initial_max_ranges = 2, // Needs space for split
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = (const struct range[]){
|
||||
{ .start = 100, .end = 119 },
|
||||
{ .start = 180, .end = 199 }
|
||||
},
|
||||
.expected_nr_ranges = 2,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.5: A overlaps B's right part",
|
||||
.exclude_start = 150, .exclude_end = 249,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = (const struct range[]){{ .start = 100, .end = 149 }},
|
||||
.expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.6: A's left boundary touches B's right boundary",
|
||||
.exclude_start = 200, .exclude_end = 250,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.7: A is right of B, no overlap",
|
||||
.exclude_start = 250, .exclude_end = 300,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.8: A completely covers B and extends beyond",
|
||||
.exclude_start = 50, .exclude_end = 250,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = NULL, .expected_nr_ranges = 0,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.9: A covers B and extends to the left",
|
||||
.exclude_start = 50, .exclude_end = 199, // A ends exactly where B ends
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = NULL, .expected_nr_ranges = 0,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.10: A covers B and extends to the right",
|
||||
.exclude_start = 100, .exclude_end = 250, // A starts exactly where B starts
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = NULL, .expected_nr_ranges = 0,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.11: A is identical to B",
|
||||
.exclude_start = 100, .exclude_end = 199,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = NULL, .expected_nr_ranges = 0,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.12: A is a point, left of B, no overlap",
|
||||
.exclude_start = 10, .exclude_end = 10,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.13: A is a point, at start of B",
|
||||
.exclude_start = 100, .exclude_end = 100,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = (const struct range[]){{ .start = 101, .end = 199 }},
|
||||
.expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.14: A is a point, in middle of B (causes split)",
|
||||
.exclude_start = 150, .exclude_end = 150,
|
||||
.initial_max_ranges = 2, // Needs space for split
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = (const struct range[]){
|
||||
{ .start = 100, .end = 149 },
|
||||
{ .start = 151, .end = 199 }
|
||||
},
|
||||
.expected_nr_ranges = 2,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.15: A is a point, at end of B",
|
||||
.exclude_start = 199, .exclude_end = 199,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = (const struct range[]){{ .start = 100, .end = 198 }},
|
||||
.expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
{
|
||||
.description = "1.16: A is a point, right of B, no overlap",
|
||||
.exclude_start = 250, .exclude_end = 250,
|
||||
.initial_max_ranges = 1,
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
// ENOMEM case for single range split
|
||||
{
|
||||
.description = "1.17: A completely inside B (split), no space (ENOMEM)",
|
||||
.exclude_start = 120, .exclude_end = 179,
|
||||
.initial_max_ranges = 1, // Not enough for split
|
||||
.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
|
||||
.expected_ranges = NULL, // Not checked on error by assert_ranges_equal for content
|
||||
.expected_nr_ranges = 1, // Should remain unchanged
|
||||
.expected_ret = -ENOMEM,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static void exclude_single_range_test(struct kunit *test)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(exclude_single_range_test_data); i++) {
|
||||
kunit_log(KERN_INFO, test, "Running: %s", exclude_single_range_test_data[i].description);
|
||||
run_exclude_test_case(test, &exclude_single_range_test_data[i]);
|
||||
// KUnit will stop on first KUNIT_ASSERT failure within run_exclude_test_case
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test Strategy 2: Regression test.
|
||||
*/
|
||||
|
||||
static const struct exclude_test_param exclude_range_regression_test_data[] = {
|
||||
// Test data from commit a2e9a95d2190
|
||||
{
|
||||
.description = "2.1: exclude low 1M",
|
||||
.exclude_start = 0, .exclude_end = (1 << 20) - 1,
|
||||
.initial_max_ranges = 3,
|
||||
.initial_ranges = (const struct range[]){
|
||||
{ .start = 0, .end = 0x3efff },
|
||||
{ .start = 0x3f000, .end = 0x3ffff },
|
||||
{ .start = 0x40000, .end = 0x9ffff }
|
||||
},
|
||||
.initial_nr_ranges = 3,
|
||||
.expected_nr_ranges = 0,
|
||||
.expected_ret = 0,
|
||||
},
|
||||
// Test data from https://lore.kernel.org/all/ZXrY7QbXAlxydsSC@MiWiFi-R3L-srv/T/#u
|
||||
{
|
||||
.description = "2.2: when range out of bound",
|
||||
.exclude_start = 100, .exclude_end = 200,
|
||||
.initial_max_ranges = 3,
|
||||
.initial_ranges = (const struct range[]){
|
||||
{ .start = 1, .end = 299 },
|
||||
{ .start = 401, .end = 1000 },
|
||||
{ .start = 1001, .end = 2000 }
|
||||
},
|
||||
.initial_nr_ranges = 3,
|
||||
.expected_ranges = NULL, // Not checked on error by assert_ranges_equal for content
|
||||
.expected_nr_ranges = 3, // Should remain unchanged
|
||||
.expected_ret = -ENOMEM
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
|
||||
static void exclude_range_regression_test(struct kunit *test)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(exclude_range_regression_test_data); i++) {
|
||||
kunit_log(KERN_INFO, test, "Running: %s", exclude_range_regression_test_data[i].description);
|
||||
run_exclude_test_case(test, &exclude_range_regression_test_data[i]);
|
||||
// KUnit will stop on first KUNIT_ASSERT failure within run_exclude_test_case
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* KUnit Test Suite
|
||||
*/
|
||||
static struct kunit_case crash_exclude_mem_range_test_cases[] = {
|
||||
KUNIT_CASE(exclude_single_range_test),
|
||||
KUNIT_CASE(exclude_range_regression_test),
|
||||
{}
|
||||
};
|
||||
|
||||
static struct kunit_suite crash_exclude_mem_range_suite = {
|
||||
.name = "crash_exclude_mem_range_tests",
|
||||
.test_cases = crash_exclude_mem_range_test_cases,
|
||||
// .init and .exit can be NULL if not needed globally for the suite
|
||||
};
|
||||
|
||||
kunit_test_suite(crash_exclude_mem_range_suite);
|
||||
|
||||
MODULE_DESCRIPTION("crash dump KUnit test suite");
|
||||
MODULE_LICENSE("GPL");
|
||||
@@ -2132,9 +2132,7 @@ __latent_entropy struct task_struct *copy_process(
|
||||
|
||||
p->pagefault_disabled = 0;
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
lockdep_init_task(p);
|
||||
#endif
|
||||
|
||||
p->blocked_on = NULL; /* not blocked yet */
|
||||
|
||||
@@ -2547,11 +2545,9 @@ struct task_struct * __init fork_idle(int cpu)
|
||||
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
|
||||
{
|
||||
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
|
||||
CLONE_IO;
|
||||
CLONE_IO|CLONE_VM|CLONE_UNTRACED;
|
||||
struct kernel_clone_args args = {
|
||||
.flags = ((lower_32_bits(flags) | CLONE_VM |
|
||||
CLONE_UNTRACED) & ~CSIGNAL),
|
||||
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
|
||||
.flags = flags,
|
||||
.fn = fn,
|
||||
.fn_arg = arg,
|
||||
.io_thread = 1,
|
||||
@@ -2663,9 +2659,8 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct kernel_clone_args args = {
|
||||
.flags = ((lower_32_bits(flags) | CLONE_VM |
|
||||
CLONE_UNTRACED) & ~CSIGNAL),
|
||||
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
|
||||
.flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
|
||||
.exit_signal = (flags & CSIGNAL),
|
||||
.fn = fn,
|
||||
.fn_arg = arg,
|
||||
.name = name,
|
||||
@@ -2681,9 +2676,8 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
|
||||
pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
||||
{
|
||||
struct kernel_clone_args args = {
|
||||
.flags = ((lower_32_bits(flags) | CLONE_VM |
|
||||
CLONE_UNTRACED) & ~CSIGNAL),
|
||||
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
|
||||
.flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
|
||||
.exit_signal = (flags & CSIGNAL),
|
||||
.fn = fn,
|
||||
.fn_arg = arg,
|
||||
};
|
||||
|
||||
@@ -95,9 +95,41 @@ static struct notifier_block panic_block = {
|
||||
.notifier_call = hung_task_panic,
|
||||
};
|
||||
|
||||
static bool task_is_hung(struct task_struct *t, unsigned long timeout)
|
||||
{
|
||||
unsigned long switch_count = t->nvcsw + t->nivcsw;
|
||||
unsigned int state = READ_ONCE(t->__state);
|
||||
|
||||
/*
|
||||
* skip the TASK_KILLABLE tasks -- these can be killed
|
||||
* skip the TASK_IDLE tasks -- those are genuinely idle
|
||||
* skip the TASK_FROZEN task -- it reasonably stops scheduling by freezer
|
||||
*/
|
||||
if (!(state & TASK_UNINTERRUPTIBLE) ||
|
||||
(state & (TASK_WAKEKILL | TASK_NOLOAD | TASK_FROZEN)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* When a freshly created task is scheduled once, changes its state to
|
||||
* TASK_UNINTERRUPTIBLE without having ever been switched out once, it
|
||||
* musn't be checked.
|
||||
*/
|
||||
if (unlikely(!switch_count))
|
||||
return false;
|
||||
|
||||
if (switch_count != t->last_switch_count) {
|
||||
t->last_switch_count = switch_count;
|
||||
t->last_switch_time = jiffies;
|
||||
return false;
|
||||
}
|
||||
if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
|
||||
static void debug_show_blocker(struct task_struct *task)
|
||||
static void debug_show_blocker(struct task_struct *task, unsigned long timeout)
|
||||
{
|
||||
struct task_struct *g, *t;
|
||||
unsigned long owner, blocker, blocker_type;
|
||||
@@ -174,41 +206,21 @@ static void debug_show_blocker(struct task_struct *task)
|
||||
t->pid, rwsem_blocked_by);
|
||||
break;
|
||||
}
|
||||
sched_show_task(t);
|
||||
/* Avoid duplicated task dump, skip if the task is also hung. */
|
||||
if (!task_is_hung(t, timeout))
|
||||
sched_show_task(t);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void debug_show_blocker(struct task_struct *task)
|
||||
static inline void debug_show_blocker(struct task_struct *task, unsigned long timeout)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static void check_hung_task(struct task_struct *t, unsigned long timeout)
|
||||
{
|
||||
unsigned long switch_count = t->nvcsw + t->nivcsw;
|
||||
|
||||
/*
|
||||
* Ensure the task is not frozen.
|
||||
* Also, skip vfork and any other user process that freezer should skip.
|
||||
*/
|
||||
if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN))
|
||||
return;
|
||||
|
||||
/*
|
||||
* When a freshly created task is scheduled once, changes its state to
|
||||
* TASK_UNINTERRUPTIBLE without having ever been switched out once, it
|
||||
* musn't be checked.
|
||||
*/
|
||||
if (unlikely(!switch_count))
|
||||
return;
|
||||
|
||||
if (switch_count != t->last_switch_count) {
|
||||
t->last_switch_count = switch_count;
|
||||
t->last_switch_time = jiffies;
|
||||
return;
|
||||
}
|
||||
if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
|
||||
if (!task_is_hung(t, timeout))
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -243,7 +255,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
|
||||
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
|
||||
" disables this message.\n");
|
||||
sched_show_task(t);
|
||||
debug_show_blocker(t);
|
||||
debug_show_blocker(t, timeout);
|
||||
hung_task_show_lock = true;
|
||||
|
||||
if (sysctl_hung_task_all_cpu_backtrace)
|
||||
@@ -299,7 +311,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
hung_task_show_lock = false;
|
||||
rcu_read_lock();
|
||||
for_each_process_thread(g, t) {
|
||||
unsigned int state;
|
||||
|
||||
if (!max_count--)
|
||||
goto unlock;
|
||||
@@ -308,15 +319,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
goto unlock;
|
||||
last_break = jiffies;
|
||||
}
|
||||
/*
|
||||
* skip the TASK_KILLABLE tasks -- these can be killed
|
||||
* skip the TASK_IDLE tasks -- those are genuinely idle
|
||||
*/
|
||||
state = READ_ONCE(t->__state);
|
||||
if ((state & TASK_UNINTERRUPTIBLE) &&
|
||||
!(state & TASK_WAKEKILL) &&
|
||||
!(state & TASK_NOLOAD))
|
||||
check_hung_task(t, timeout);
|
||||
|
||||
check_hung_task(t, timeout);
|
||||
}
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -264,7 +264,7 @@ static int test_kallsyms_basic_function(void)
|
||||
char namebuf[KSYM_NAME_LEN];
|
||||
struct test_stat *stat, *stat2;
|
||||
|
||||
stat = kmalloc(sizeof(*stat) * 2, GFP_KERNEL);
|
||||
stat = kmalloc_array(2, sizeof(*stat), GFP_KERNEL);
|
||||
if (!stat)
|
||||
return -ENOMEM;
|
||||
stat2 = stat + 1;
|
||||
|
||||
@@ -978,6 +978,15 @@ static void kcov_move_area(enum kcov_mode mode, void *dst_area,
|
||||
memcpy(dst_entries, src_entries, bytes_to_move);
|
||||
entries_moved = bytes_to_move >> entry_size_log;
|
||||
|
||||
/*
|
||||
* A write memory barrier is required here, to ensure
|
||||
* that the writes from the memcpy() are visible before
|
||||
* the count is updated. Without this, it is possible for
|
||||
* a user to observe a new count value but stale
|
||||
* coverage data.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
switch (mode) {
|
||||
case KCOV_MODE_TRACE_PC:
|
||||
WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved);
|
||||
|
||||
@@ -233,7 +233,6 @@ struct kimage *do_kimage_alloc_init(void)
|
||||
if (!image)
|
||||
return NULL;
|
||||
|
||||
image->head = 0;
|
||||
image->entry = &image->head;
|
||||
image->last_entry = &image->head;
|
||||
image->control_page = ~0; /* By default this does not apply */
|
||||
|
||||
@@ -255,6 +255,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
|
||||
}
|
||||
|
||||
image->no_cma = !!(flags & KEXEC_FILE_NO_CMA);
|
||||
image->force_dtb = flags & KEXEC_FILE_FORCE_DTB;
|
||||
|
||||
if (cmdline_len) {
|
||||
image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len);
|
||||
|
||||
@@ -987,6 +987,26 @@ static const void *kho_get_fdt(void)
|
||||
return kho_in.fdt_phys ? phys_to_virt(kho_in.fdt_phys) : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* is_kho_boot - check if current kernel was booted via KHO-enabled
|
||||
* kexec
|
||||
*
|
||||
* This function checks if the current kernel was loaded through a kexec
|
||||
* operation with KHO enabled, by verifying that a valid KHO FDT
|
||||
* was passed.
|
||||
*
|
||||
* Note: This function returns reliable results only after
|
||||
* kho_populate() has been called during early boot. Before that,
|
||||
* it may return false even if KHO data is present.
|
||||
*
|
||||
* Return: true if booted via KHO-enabled kexec, false otherwise
|
||||
*/
|
||||
bool is_kho_boot(void)
|
||||
{
|
||||
return !!kho_get_fdt();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_kho_boot);
|
||||
|
||||
/**
|
||||
* kho_retrieve_subtree - retrieve a preserved sub FDT by its name.
|
||||
* @name: the name of the sub FDT passed to kho_add_subtree().
|
||||
@@ -1269,7 +1289,7 @@ int kho_fill_kimage(struct kimage *image)
|
||||
int err = 0;
|
||||
struct kexec_buf scratch;
|
||||
|
||||
if (!kho_enable)
|
||||
if (!kho_out.finalized)
|
||||
return 0;
|
||||
|
||||
image->kho.fdt = page_to_phys(kho_out.ser.fdt);
|
||||
|
||||
129
kernel/panic.c
129
kernel/panic.c
@@ -53,7 +53,7 @@ static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace;
|
||||
#define sysctl_oops_all_cpu_backtrace 0
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
|
||||
int panic_on_oops = IS_ENABLED(CONFIG_PANIC_ON_OOPS);
|
||||
static unsigned long tainted_mask =
|
||||
IS_ENABLED(CONFIG_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
|
||||
static int pause_on_oops;
|
||||
@@ -67,6 +67,7 @@ static unsigned int warn_limit __read_mostly;
|
||||
static bool panic_console_replay;
|
||||
|
||||
bool panic_triggering_all_cpu_backtrace;
|
||||
static bool panic_this_cpu_backtrace_printed;
|
||||
|
||||
int panic_timeout = CONFIG_PANIC_TIMEOUT;
|
||||
EXPORT_SYMBOL_GPL(panic_timeout);
|
||||
@@ -77,6 +78,11 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
|
||||
|
||||
EXPORT_SYMBOL(panic_notifier_list);
|
||||
|
||||
static void panic_print_deprecated(void)
|
||||
{
|
||||
pr_info_once("Kernel: The 'panic_print' parameter is now deprecated. Please use 'panic_sys_info' and 'panic_console_replay' instead.\n");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
/*
|
||||
@@ -125,7 +131,7 @@ static int proc_taint(const struct ctl_table *table, int write,
|
||||
static int sysctl_panic_print_handler(const struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
pr_info_once("Kernel: 'panic_print' sysctl interface will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
|
||||
panic_print_deprecated();
|
||||
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
@@ -294,6 +300,59 @@ void __weak crash_smp_send_stop(void)
|
||||
|
||||
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
|
||||
|
||||
bool panic_try_start(void)
|
||||
{
|
||||
int old_cpu, this_cpu;
|
||||
|
||||
/*
|
||||
* Only one CPU is allowed to execute the crash_kexec() code as with
|
||||
* panic(). Otherwise parallel calls of panic() and crash_kexec()
|
||||
* may stop each other. To exclude them, we use panic_cpu here too.
|
||||
*/
|
||||
old_cpu = PANIC_CPU_INVALID;
|
||||
this_cpu = raw_smp_processor_id();
|
||||
|
||||
return atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu);
|
||||
}
|
||||
EXPORT_SYMBOL(panic_try_start);
|
||||
|
||||
void panic_reset(void)
|
||||
{
|
||||
atomic_set(&panic_cpu, PANIC_CPU_INVALID);
|
||||
}
|
||||
EXPORT_SYMBOL(panic_reset);
|
||||
|
||||
bool panic_in_progress(void)
|
||||
{
|
||||
return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
|
||||
}
|
||||
EXPORT_SYMBOL(panic_in_progress);
|
||||
|
||||
/* Return true if a panic is in progress on the current CPU. */
|
||||
bool panic_on_this_cpu(void)
|
||||
{
|
||||
/*
|
||||
* We can use raw_smp_processor_id() here because it is impossible for
|
||||
* the task to be migrated to the panic_cpu, or away from it. If
|
||||
* panic_cpu has already been set, and we're not currently executing on
|
||||
* that CPU, then we never will be.
|
||||
*/
|
||||
return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
|
||||
}
|
||||
EXPORT_SYMBOL(panic_on_this_cpu);
|
||||
|
||||
/*
|
||||
* Return true if a panic is in progress on a remote CPU.
|
||||
*
|
||||
* On true, the local CPU should immediately release any printing resources
|
||||
* that may be needed by the panic CPU.
|
||||
*/
|
||||
bool panic_on_other_cpu(void)
|
||||
{
|
||||
return (panic_in_progress() && !panic_on_this_cpu());
|
||||
}
|
||||
EXPORT_SYMBOL(panic_on_other_cpu);
|
||||
|
||||
/*
|
||||
* A variant of panic() called from NMI context. We return if we've already
|
||||
* panicked on this CPU. If another CPU already panicked, loop in
|
||||
@@ -302,15 +361,9 @@ atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
|
||||
*/
|
||||
void nmi_panic(struct pt_regs *regs, const char *msg)
|
||||
{
|
||||
int old_cpu, this_cpu;
|
||||
|
||||
old_cpu = PANIC_CPU_INVALID;
|
||||
this_cpu = raw_smp_processor_id();
|
||||
|
||||
/* atomic_try_cmpxchg updates old_cpu on failure */
|
||||
if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu))
|
||||
if (panic_try_start())
|
||||
panic("%s", msg);
|
||||
else if (old_cpu != this_cpu)
|
||||
else if (panic_on_other_cpu())
|
||||
nmi_panic_self_stop(regs);
|
||||
}
|
||||
EXPORT_SYMBOL(nmi_panic);
|
||||
@@ -328,6 +381,19 @@ void check_panic_on_warn(const char *origin)
|
||||
origin, limit);
|
||||
}
|
||||
|
||||
static void panic_trigger_all_cpu_backtrace(void)
|
||||
{
|
||||
/* Temporary allow non-panic CPUs to write their backtraces. */
|
||||
panic_triggering_all_cpu_backtrace = true;
|
||||
|
||||
if (panic_this_cpu_backtrace_printed)
|
||||
trigger_allbutcpu_cpu_backtrace(raw_smp_processor_id());
|
||||
else
|
||||
trigger_all_cpu_backtrace();
|
||||
|
||||
panic_triggering_all_cpu_backtrace = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper that triggers the NMI backtrace (if set in panic_print)
|
||||
* and then performs the secondary CPUs shutdown - we cannot have
|
||||
@@ -335,12 +401,8 @@ void check_panic_on_warn(const char *origin)
|
||||
*/
|
||||
static void panic_other_cpus_shutdown(bool crash_kexec)
|
||||
{
|
||||
if (panic_print & SYS_INFO_ALL_CPU_BT) {
|
||||
/* Temporary allow non-panic CPUs to write their backtraces. */
|
||||
panic_triggering_all_cpu_backtrace = true;
|
||||
trigger_all_cpu_backtrace();
|
||||
panic_triggering_all_cpu_backtrace = false;
|
||||
}
|
||||
if (panic_print & SYS_INFO_ALL_CPU_BT)
|
||||
panic_trigger_all_cpu_backtrace();
|
||||
|
||||
/*
|
||||
* Note that smp_send_stop() is the usual SMP shutdown function,
|
||||
@@ -368,7 +430,6 @@ void vpanic(const char *fmt, va_list args)
|
||||
static char buf[1024];
|
||||
long i, i_next = 0, len;
|
||||
int state = 0;
|
||||
int old_cpu, this_cpu;
|
||||
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
|
||||
|
||||
if (panic_on_warn) {
|
||||
@@ -405,13 +466,10 @@ void vpanic(const char *fmt, va_list args)
|
||||
* `old_cpu == this_cpu' means we came from nmi_panic() which sets
|
||||
* panic_cpu to this CPU. In this case, this is also the 1st CPU.
|
||||
*/
|
||||
old_cpu = PANIC_CPU_INVALID;
|
||||
this_cpu = raw_smp_processor_id();
|
||||
|
||||
/* atomic_try_cmpxchg updates old_cpu on failure */
|
||||
if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
|
||||
if (panic_try_start()) {
|
||||
/* go ahead */
|
||||
} else if (old_cpu != this_cpu)
|
||||
} else if (panic_on_other_cpu())
|
||||
panic_smp_self_stop();
|
||||
|
||||
console_verbose();
|
||||
@@ -422,13 +480,15 @@ void vpanic(const char *fmt, va_list args)
|
||||
buf[len - 1] = '\0';
|
||||
|
||||
pr_emerg("Kernel panic - not syncing: %s\n", buf);
|
||||
#ifdef CONFIG_DEBUG_BUGVERBOSE
|
||||
/*
|
||||
* Avoid nested stack-dumping if a panic occurs during oops processing
|
||||
*/
|
||||
if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
|
||||
if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
|
||||
panic_this_cpu_backtrace_printed = true;
|
||||
} else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
|
||||
dump_stack();
|
||||
#endif
|
||||
panic_this_cpu_backtrace_printed = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If kgdb is enabled, give it a chance to run before we stop all
|
||||
@@ -937,12 +997,29 @@ EXPORT_SYMBOL(__stack_chk_fail);
|
||||
#endif
|
||||
|
||||
core_param(panic, panic_timeout, int, 0644);
|
||||
core_param(panic_print, panic_print, ulong, 0644);
|
||||
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
||||
core_param(panic_on_warn, panic_on_warn, int, 0644);
|
||||
core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
|
||||
core_param(panic_console_replay, panic_console_replay, bool, 0644);
|
||||
|
||||
static int panic_print_set(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
panic_print_deprecated();
|
||||
return param_set_ulong(val, kp);
|
||||
}
|
||||
|
||||
static int panic_print_get(char *val, const struct kernel_param *kp)
|
||||
{
|
||||
panic_print_deprecated();
|
||||
return param_get_ulong(val, kp);
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops panic_print_ops = {
|
||||
.set = panic_print_set,
|
||||
.get = panic_print_get,
|
||||
};
|
||||
__core_param_cb(panic_print, &panic_print_ops, &panic_print, 0644);
|
||||
|
||||
static int __init oops_setup(char *s)
|
||||
{
|
||||
if (!s)
|
||||
|
||||
@@ -332,7 +332,6 @@ struct printk_message {
|
||||
unsigned long dropped;
|
||||
};
|
||||
|
||||
bool other_cpu_in_panic(void);
|
||||
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
|
||||
bool is_extended, bool may_supress);
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/panic.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -254,7 +255,7 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
|
||||
* opportunity to perform any necessary cleanup if they were
|
||||
* interrupted by the panic CPU while printing.
|
||||
*/
|
||||
if (other_cpu_in_panic() &&
|
||||
if (panic_on_other_cpu() &&
|
||||
(!is_reacquire || cur->unsafe_takeover)) {
|
||||
return -EPERM;
|
||||
}
|
||||
@@ -309,7 +310,7 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
|
||||
* Event #2 implies the new context is PANIC.
|
||||
* Event #3 occurs when panic() has flushed the console.
|
||||
* Event #4 occurs when a non-panic CPU reacquires.
|
||||
* Event #5 is not possible due to the other_cpu_in_panic() check
|
||||
* Event #5 is not possible due to the panic_on_other_cpu() check
|
||||
* in nbcon_context_try_acquire_handover().
|
||||
*/
|
||||
|
||||
@@ -348,7 +349,7 @@ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt,
|
||||
struct nbcon_state new;
|
||||
|
||||
/* Note that the caller must still remove the request! */
|
||||
if (other_cpu_in_panic())
|
||||
if (panic_on_other_cpu())
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
@@ -446,7 +447,7 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
|
||||
* nbcon_waiter_matches(). In particular, the assumption that
|
||||
* lower priorities are ignored during panic.
|
||||
*/
|
||||
if (other_cpu_in_panic())
|
||||
if (panic_on_other_cpu())
|
||||
return -EPERM;
|
||||
|
||||
/* Handover is not possible on the same CPU. */
|
||||
@@ -589,7 +590,6 @@ static struct printk_buffers panic_nbcon_pbufs;
|
||||
*/
|
||||
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct console *con = ctxt->console;
|
||||
struct nbcon_state cur;
|
||||
int err;
|
||||
@@ -614,7 +614,7 @@ out:
|
||||
/* Acquire succeeded. */
|
||||
|
||||
/* Assign the appropriate buffer for this context. */
|
||||
if (atomic_read(&panic_cpu) == cpu)
|
||||
if (panic_on_this_cpu())
|
||||
ctxt->pbufs = &panic_nbcon_pbufs;
|
||||
else
|
||||
ctxt->pbufs = con->pbufs;
|
||||
@@ -1394,7 +1394,7 @@ enum nbcon_prio nbcon_get_default_prio(void)
|
||||
{
|
||||
unsigned int *cpu_emergency_nesting;
|
||||
|
||||
if (this_cpu_in_panic())
|
||||
if (panic_on_this_cpu())
|
||||
return NBCON_PRIO_PANIC;
|
||||
|
||||
cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/panic.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/sections.h>
|
||||
@@ -345,34 +346,6 @@ static void __up_console_sem(unsigned long ip)
|
||||
}
|
||||
#define up_console_sem() __up_console_sem(_RET_IP_)
|
||||
|
||||
static bool panic_in_progress(void)
|
||||
{
|
||||
return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
|
||||
}
|
||||
|
||||
/* Return true if a panic is in progress on the current CPU. */
|
||||
bool this_cpu_in_panic(void)
|
||||
{
|
||||
/*
|
||||
* We can use raw_smp_processor_id() here because it is impossible for
|
||||
* the task to be migrated to the panic_cpu, or away from it. If
|
||||
* panic_cpu has already been set, and we're not currently executing on
|
||||
* that CPU, then we never will be.
|
||||
*/
|
||||
return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if a panic is in progress on a remote CPU.
|
||||
*
|
||||
* On true, the local CPU should immediately release any printing resources
|
||||
* that may be needed by the panic CPU.
|
||||
*/
|
||||
bool other_cpu_in_panic(void)
|
||||
{
|
||||
return (panic_in_progress() && !this_cpu_in_panic());
|
||||
}
|
||||
|
||||
/*
|
||||
* This is used for debugging the mess that is the VT code by
|
||||
* keeping track if we have the console semaphore held. It's
|
||||
@@ -2407,7 +2380,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
* non-panic CPUs are generating any messages, they will be
|
||||
* silently dropped.
|
||||
*/
|
||||
if (other_cpu_in_panic() &&
|
||||
if (panic_on_other_cpu() &&
|
||||
!debug_non_panic_cpus &&
|
||||
!panic_triggering_all_cpu_backtrace)
|
||||
return 0;
|
||||
@@ -2843,7 +2816,7 @@ void console_lock(void)
|
||||
might_sleep();
|
||||
|
||||
/* On panic, the console_lock must be left to the panic cpu. */
|
||||
while (other_cpu_in_panic())
|
||||
while (panic_on_other_cpu())
|
||||
msleep(1000);
|
||||
|
||||
down_console_sem();
|
||||
@@ -2863,7 +2836,7 @@ EXPORT_SYMBOL(console_lock);
|
||||
int console_trylock(void)
|
||||
{
|
||||
/* On panic, the console_lock must be left to the panic cpu. */
|
||||
if (other_cpu_in_panic())
|
||||
if (panic_on_other_cpu())
|
||||
return 0;
|
||||
if (down_trylock_console_sem())
|
||||
return 0;
|
||||
@@ -3243,7 +3216,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
|
||||
any_progress = true;
|
||||
|
||||
/* Allow panic_cpu to take over the consoles safely. */
|
||||
if (other_cpu_in_panic())
|
||||
if (panic_on_other_cpu())
|
||||
goto abandon;
|
||||
|
||||
if (do_cond_resched)
|
||||
|
||||
@@ -2143,7 +2143,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
|
||||
* But it would have the sequence number returned
|
||||
* by "prb_next_reserve_seq() - 1".
|
||||
*/
|
||||
if (this_cpu_in_panic() &&
|
||||
if (panic_on_this_cpu() &&
|
||||
(!debug_non_panic_cpus || legacy_allow_panic_sync) &&
|
||||
((*seq + 1) < prb_next_reserve_seq(rb))) {
|
||||
(*seq)++;
|
||||
|
||||
32
kernel/sys.c
32
kernel/sys.c
@@ -1734,6 +1734,7 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
|
||||
struct rlimit old, new;
|
||||
struct task_struct *tsk;
|
||||
unsigned int checkflags = 0;
|
||||
bool need_tasklist;
|
||||
int ret;
|
||||
|
||||
if (old_rlim)
|
||||
@@ -1760,8 +1761,25 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
|
||||
get_task_struct(tsk);
|
||||
rcu_read_unlock();
|
||||
|
||||
ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
|
||||
old_rlim ? &old : NULL);
|
||||
need_tasklist = !same_thread_group(tsk, current);
|
||||
if (need_tasklist) {
|
||||
/*
|
||||
* Ensure we can't race with group exit or de_thread(),
|
||||
* so tsk->group_leader can't be freed or changed until
|
||||
* read_unlock(tasklist_lock) below.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
if (!pid_alive(tsk))
|
||||
ret = -ESRCH;
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
|
||||
old_rlim ? &old : NULL);
|
||||
}
|
||||
|
||||
if (need_tasklist)
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
if (!ret && old_rlim) {
|
||||
rlim_to_rlim64(&old, &old64);
|
||||
@@ -2515,7 +2533,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
error = -EINVAL;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Ensure that either:
|
||||
*
|
||||
* 1. Subsequent getppid() calls reflect the parent process having died.
|
||||
* 2. forget_original_parent() will send the new me->pdeath_signal.
|
||||
*
|
||||
* Also prevent the read of me->pdeath_signal from being a data race.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
me->pdeath_signal = arg2;
|
||||
read_unlock(&tasklist_lock);
|
||||
break;
|
||||
case PR_GET_PDEATHSIG:
|
||||
error = put_user(me->pdeath_signal, (int __user *)arg2);
|
||||
|
||||
@@ -425,7 +425,11 @@ static DEFINE_PER_CPU(u8, cpustat_tail);
|
||||
*/
|
||||
static u16 get_16bit_precision(u64 data_ns)
|
||||
{
|
||||
return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */
|
||||
/*
|
||||
* 2^24ns ~= 16.8ms
|
||||
* Round to the nearest multiple of 16.8 milliseconds.
|
||||
*/
|
||||
return (data_ns + (1 << 23)) >> 24LL;
|
||||
}
|
||||
|
||||
static void update_cpustat(void)
|
||||
@@ -444,6 +448,14 @@ static void update_cpustat(void)
|
||||
old_stat = __this_cpu_read(cpustat_old[i]);
|
||||
new_stat = get_16bit_precision(cpustat[tracked_stats[i]]);
|
||||
util = DIV_ROUND_UP(100 * (new_stat - old_stat), sample_period_16);
|
||||
/*
|
||||
* Since we use 16-bit precision, the raw data will undergo
|
||||
* integer division, which may sometimes result in data loss,
|
||||
* and then result might exceed 100%. To avoid confusion,
|
||||
* we enforce a 100% display cap when calculations exceed this threshold.
|
||||
*/
|
||||
if (util > 100)
|
||||
util = 100;
|
||||
__this_cpu_write(cpustat_util[tail][i], util);
|
||||
__this_cpu_write(cpustat_old[i], new_stat);
|
||||
}
|
||||
@@ -455,17 +467,17 @@ static void print_cpustat(void)
|
||||
{
|
||||
int i, group;
|
||||
u8 tail = __this_cpu_read(cpustat_tail);
|
||||
u64 sample_period_second = sample_period;
|
||||
u64 sample_period_msecond = sample_period;
|
||||
|
||||
do_div(sample_period_second, NSEC_PER_SEC);
|
||||
do_div(sample_period_msecond, NSEC_PER_MSEC);
|
||||
|
||||
/*
|
||||
* Outputting the "watchdog" prefix on every line is redundant and not
|
||||
* concise, and the original alarm information is sufficient for
|
||||
* positioning in logs, hence here printk() is used instead of pr_crit().
|
||||
*/
|
||||
printk(KERN_CRIT "CPU#%d Utilization every %llus during lockup:\n",
|
||||
smp_processor_id(), sample_period_second);
|
||||
printk(KERN_CRIT "CPU#%d Utilization every %llums during lockup:\n",
|
||||
smp_processor_id(), sample_period_msecond);
|
||||
|
||||
for (i = 0; i < NUM_SAMPLE_PERIODS; i++) {
|
||||
group = (tail + i) % NUM_SAMPLE_PERIODS;
|
||||
@@ -740,6 +752,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
if (!watchdog_enabled)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
/*
|
||||
* pass the buddy check if a panic is in process
|
||||
*/
|
||||
if (panic_in_progress())
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
watchdog_hardlockup_kick();
|
||||
|
||||
/* kick the softlockup detector */
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#define pr_fmt(fmt) "NMI watchdog: " fmt
|
||||
|
||||
#include <linux/panic.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/module.h>
|
||||
@@ -108,6 +109,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
||||
/* Ensure the watchdog never gets throttled */
|
||||
event->hw.interrupts = 0;
|
||||
|
||||
if (panic_in_progress())
|
||||
return;
|
||||
|
||||
if (!watchdog_check_timestamp())
|
||||
return;
|
||||
|
||||
|
||||
@@ -1067,12 +1067,6 @@ config PANIC_ON_OOPS
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
config PANIC_ON_OOPS_VALUE
|
||||
int
|
||||
range 0 1
|
||||
default 0 if !PANIC_ON_OOPS
|
||||
default 1 if PANIC_ON_OOPS
|
||||
|
||||
config PANIC_TIMEOUT
|
||||
int "panic timeout"
|
||||
default 0
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/string_choices.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kmemleak.h>
|
||||
|
||||
@@ -728,7 +729,7 @@ static int __init setup_early_mem_profiling(char *str)
|
||||
}
|
||||
mem_profiling_support = true;
|
||||
pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
|
||||
compressed ? "with" : "without", enable ? "on" : "off");
|
||||
compressed ? "with" : "without", str_on_off(enable));
|
||||
}
|
||||
|
||||
if (enable != mem_alloc_profiling_enabled()) {
|
||||
|
||||
@@ -653,9 +653,9 @@ int btree_merge(struct btree_head *target, struct btree_head *victim,
|
||||
* walks to remove a single object from the victim.
|
||||
*/
|
||||
for (;;) {
|
||||
if (!btree_last(victim, geo, key))
|
||||
val = btree_last(victim, geo, key);
|
||||
if (!val)
|
||||
break;
|
||||
val = btree_lookup(victim, geo, key);
|
||||
err = btree_insert(target, geo, key, val, gfp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -49,15 +49,15 @@ struct compress_format {
|
||||
};
|
||||
|
||||
static const struct compress_format compressed_formats[] __initconst = {
|
||||
{ {0x1f, 0x8b}, "gzip", gunzip },
|
||||
{ {0x1f, 0x9e}, "gzip", gunzip },
|
||||
{ {0x42, 0x5a}, "bzip2", bunzip2 },
|
||||
{ {0x5d, 0x00}, "lzma", unlzma },
|
||||
{ {0xfd, 0x37}, "xz", unxz },
|
||||
{ {0x89, 0x4c}, "lzo", unlzo },
|
||||
{ {0x02, 0x21}, "lz4", unlz4 },
|
||||
{ {0x28, 0xb5}, "zstd", unzstd },
|
||||
{ {0, 0}, NULL, NULL }
|
||||
{ .magic = {0x1f, 0x8b}, .name = "gzip", .decompressor = gunzip },
|
||||
{ .magic = {0x1f, 0x9e}, .name = "gzip", .decompressor = gunzip },
|
||||
{ .magic = {0x42, 0x5a}, .name = "bzip2", .decompressor = bunzip2 },
|
||||
{ .magic = {0x5d, 0x00}, .name = "lzma", .decompressor = unlzma },
|
||||
{ .magic = {0xfd, 0x37}, .name = "xz", .decompressor = unxz },
|
||||
{ .magic = {0x89, 0x4c}, .name = "lzo", .decompressor = unlzo },
|
||||
{ .magic = {0x02, 0x21}, .name = "lz4", .decompressor = unlz4 },
|
||||
{ .magic = {0x28, 0xb5}, .name = "zstd", .decompressor = unzstd },
|
||||
{ /* sentinel */ }
|
||||
};
|
||||
|
||||
decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
|
||||
@@ -73,11 +73,10 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
|
||||
|
||||
pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]);
|
||||
|
||||
for (cf = compressed_formats; cf->name; cf++) {
|
||||
for (cf = compressed_formats; cf->name; cf++)
|
||||
if (!memcmp(inbuf, cf->magic, 2))
|
||||
break;
|
||||
|
||||
}
|
||||
if (name)
|
||||
*name = cf->name;
|
||||
return cf->decompressor;
|
||||
|
||||
@@ -159,7 +159,6 @@ static int digsig_verify_rsa(struct key *key,
|
||||
|
||||
len = mlen;
|
||||
head = len - l;
|
||||
memset(out1, 0, head);
|
||||
memcpy(out1 + head, p, l);
|
||||
|
||||
kfree(p);
|
||||
|
||||
@@ -102,7 +102,7 @@ static void __dump_stack(const char *log_lvl)
|
||||
*/
|
||||
asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
|
||||
{
|
||||
bool in_panic = this_cpu_in_panic();
|
||||
bool in_panic = panic_on_this_cpu();
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
|
||||
@@ -22,10 +22,8 @@ static int __init fail_usercopy_debugfs(void)
|
||||
|
||||
dir = fault_create_debugfs_attr("fail_usercopy", NULL,
|
||||
&fail_usercopy.attr);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
return 0;
|
||||
return PTR_ERR_OR_ZERO(dir);
|
||||
}
|
||||
|
||||
late_initcall(fail_usercopy_debugfs);
|
||||
|
||||
@@ -899,8 +899,11 @@ struct gen_pool *of_gen_pool_get(struct device_node *np,
|
||||
if (!name)
|
||||
name = of_node_full_name(np_pool);
|
||||
}
|
||||
if (pdev)
|
||||
if (pdev) {
|
||||
pool = gen_pool_get(&pdev->dev, name);
|
||||
put_device(&pdev->dev);
|
||||
}
|
||||
|
||||
of_node_put(np_pool);
|
||||
|
||||
return pool;
|
||||
|
||||
@@ -75,7 +75,7 @@ ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
|
||||
struct ref_tracker *tracker;
|
||||
|
||||
stats = kmalloc(struct_size(stats, stacks, limit),
|
||||
GFP_NOWAIT | __GFP_NOWARN);
|
||||
GFP_NOWAIT);
|
||||
if (!stats)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
stats->total = 0;
|
||||
@@ -159,7 +159,7 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir,
|
||||
return;
|
||||
}
|
||||
|
||||
sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
|
||||
sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT);
|
||||
|
||||
for (i = 0, skipped = stats->total; i < stats->count; ++i) {
|
||||
stack = stats->stacks[i].stack_handle;
|
||||
@@ -306,7 +306,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
|
||||
}
|
||||
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
|
||||
stack_handle = stack_depot_save(entries, nr_entries,
|
||||
GFP_NOWAIT | __GFP_NOWARN);
|
||||
GFP_NOWAIT);
|
||||
|
||||
spin_lock_irqsave(&dir->lock, flags);
|
||||
if (tracker->dead) {
|
||||
|
||||
@@ -55,7 +55,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
|
||||
void *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
char names[sizeof(sys_info_avail) + 1];
|
||||
char names[sizeof(sys_info_avail)];
|
||||
struct ctl_table table;
|
||||
unsigned long *si_bits_global;
|
||||
|
||||
@@ -81,6 +81,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
|
||||
char *delim = "";
|
||||
int i, len = 0;
|
||||
|
||||
names[0] = '\0';
|
||||
for (i = 0; i < ARRAY_SIZE(si_names); i++) {
|
||||
if (*si_bits_global & si_names[i].bit) {
|
||||
len += scnprintf(names + len, sizeof(names) - len,
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/efi_embedded_fw.h>
|
||||
#include <linux/string_choices.h>
|
||||
|
||||
MODULE_IMPORT_NS("TEST_FIRMWARE");
|
||||
|
||||
@@ -304,17 +305,17 @@ static ssize_t config_show(struct device *dev,
|
||||
"FW_ACTION_NOUEVENT");
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"into_buf:\t\t%s\n",
|
||||
test_fw_config->into_buf ? "true" : "false");
|
||||
str_true_false(test_fw_config->into_buf));
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"buf_size:\t%zu\n", test_fw_config->buf_size);
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"file_offset:\t%zu\n", test_fw_config->file_offset);
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"partial:\t\t%s\n",
|
||||
test_fw_config->partial ? "true" : "false");
|
||||
str_true_false(test_fw_config->partial));
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"sync_direct:\t\t%s\n",
|
||||
test_fw_config->sync_direct ? "true" : "false");
|
||||
str_true_false(test_fw_config->sync_direct));
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
|
||||
if (test_fw_config->upload_name)
|
||||
|
||||
@@ -2636,6 +2636,11 @@ sub exclude_global_initialisers {
|
||||
$realfile =~ m@/bpf/.*\.bpf\.c$@;
|
||||
}
|
||||
|
||||
sub is_userspace {
|
||||
my ($realfile) = @_;
|
||||
return ($realfile =~ m@^tools/@ || $realfile =~ m@^scripts/@);
|
||||
}
|
||||
|
||||
sub process {
|
||||
my $filename = shift;
|
||||
|
||||
@@ -3294,7 +3299,7 @@ sub process {
|
||||
# file delta changes
|
||||
$line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ ||
|
||||
# filename then :
|
||||
$line =~ /^\s*(?:Fixes:|$link_tags_search|$signature_tags)/i ||
|
||||
$line =~ /^\s*(?:Fixes:|https?:|$link_tags_search|$signature_tags)/i ||
|
||||
# A Fixes:, link or signature tag line
|
||||
$commit_log_possible_stack_dump)) {
|
||||
WARN("COMMIT_LOG_LONG_LINE",
|
||||
@@ -7018,21 +7023,20 @@ sub process {
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
# strcpy uses that should likely be strscpy
|
||||
if ($line =~ /\bstrcpy\s*\(/) {
|
||||
if ($line =~ /\bstrcpy\s*\(/ && !is_userspace($realfile)) {
|
||||
WARN("STRCPY",
|
||||
"Prefer strscpy over strcpy - see: https://github.com/KSPP/linux/issues/88\n" . $herecurr);
|
||||
}
|
||||
|
||||
# strlcpy uses that should likely be strscpy
|
||||
if ($line =~ /\bstrlcpy\s*\(/) {
|
||||
if ($line =~ /\bstrlcpy\s*\(/ && !is_userspace($realfile)) {
|
||||
WARN("STRLCPY",
|
||||
"Prefer strscpy over strlcpy - see: https://github.com/KSPP/linux/issues/89\n" . $herecurr);
|
||||
}
|
||||
|
||||
# strncpy uses that should likely be strscpy or strscpy_pad
|
||||
if ($line =~ /\bstrncpy\s*\(/) {
|
||||
if ($line =~ /\bstrncpy\s*\(/ && !is_userspace($realfile)) {
|
||||
WARN("STRNCPY",
|
||||
"Prefer strscpy, strscpy_pad, or __nonstring over strncpy - see: https://github.com/KSPP/linux/issues/90\n" . $herecurr);
|
||||
}
|
||||
|
||||
@@ -10,12 +10,21 @@ virtual org
|
||||
virtual report
|
||||
|
||||
@match1@
|
||||
declarer name builtin_i2c_driver;
|
||||
declarer name builtin_platform_driver;
|
||||
declarer name builtin_platform_driver_probe;
|
||||
declarer name module_i2c_driver;
|
||||
declarer name module_platform_driver;
|
||||
declarer name module_platform_driver_probe;
|
||||
identifier __driver;
|
||||
@@
|
||||
(
|
||||
builtin_i2c_driver(__driver);
|
||||
|
|
||||
builtin_platform_driver(__driver);
|
||||
|
|
||||
builtin_platform_driver_probe(__driver, ...);
|
||||
|
|
||||
module_i2c_driver(__driver);
|
||||
|
|
||||
module_platform_driver(__driver);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/// Make sure (of/i2c/platform)_device_id tables are NULL terminated
|
||||
/// Make sure (of/i2c/platform/spi)_device_id tables are NULL terminated
|
||||
//
|
||||
// Keywords: of_table i2c_table platform_table
|
||||
// Confidence: Medium
|
||||
@@ -15,14 +15,14 @@ identifier var, arr;
|
||||
expression E;
|
||||
@@
|
||||
(
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
|
||||
...,
|
||||
{
|
||||
.var = E,
|
||||
* }
|
||||
};
|
||||
|
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
|
||||
...,
|
||||
* { ..., E, ... },
|
||||
};
|
||||
@@ -33,7 +33,7 @@ identifier var, arr;
|
||||
expression E;
|
||||
@@
|
||||
(
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
|
||||
...,
|
||||
{
|
||||
.var = E,
|
||||
@@ -42,7 +42,7 @@ struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
+ { }
|
||||
};
|
||||
|
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
|
||||
...,
|
||||
{ ..., E, ... },
|
||||
+ { },
|
||||
@@ -55,7 +55,7 @@ identifier var, arr;
|
||||
expression E;
|
||||
@@
|
||||
(
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
|
||||
...,
|
||||
{
|
||||
.var = E,
|
||||
@@ -63,7 +63,7 @@ struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
@p1
|
||||
};
|
||||
|
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
|
||||
struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
|
||||
...,
|
||||
{ ..., E, ... }
|
||||
@p1
|
||||
|
||||
@@ -42,14 +42,13 @@
|
||||
#include <linux/genetlink.h>
|
||||
#include <linux/taskstats.h>
|
||||
#include <linux/cgroupstats.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define PSI_CPU_SOME "/proc/pressure/cpu"
|
||||
#define PSI_CPU_FULL "/proc/pressure/cpu"
|
||||
#define PSI_MEMORY_SOME "/proc/pressure/memory"
|
||||
#define PSI_MEMORY_FULL "/proc/pressure/memory"
|
||||
#define PSI_IO_SOME "/proc/pressure/io"
|
||||
#define PSI_IO_FULL "/proc/pressure/io"
|
||||
#define PSI_IRQ_FULL "/proc/pressure/irq"
|
||||
#define PSI_PATH "/proc/pressure"
|
||||
#define PSI_CPU_PATH "/proc/pressure/cpu"
|
||||
#define PSI_MEMORY_PATH "/proc/pressure/memory"
|
||||
#define PSI_IO_PATH "/proc/pressure/io"
|
||||
#define PSI_IRQ_PATH "/proc/pressure/irq"
|
||||
|
||||
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
|
||||
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
|
||||
@@ -61,24 +60,28 @@
|
||||
#define TASK_COMM_LEN 16
|
||||
#define MAX_MSG_SIZE 1024
|
||||
#define MAX_TASKS 1000
|
||||
#define MAX_BUF_LEN 256
|
||||
#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
|
||||
#define BOOL_FPRINT(stream, fmt, ...) \
|
||||
({ \
|
||||
int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
|
||||
ret >= 0; \
|
||||
})
|
||||
#define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count)
|
||||
#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
|
||||
#define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
|
||||
#define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
|
||||
#define SORT_FIELD(name, cmd, modes) \
|
||||
{#name, #cmd, \
|
||||
offsetof(struct task_info, name##_delay_total), \
|
||||
offsetof(struct task_info, name##_count), \
|
||||
modes}
|
||||
#define END_FIELD {NULL, 0, 0}
|
||||
|
||||
/* Program settings structure */
|
||||
struct config {
|
||||
int delay; /* Update interval in seconds */
|
||||
int iterations; /* Number of iterations, 0 == infinite */
|
||||
int max_processes; /* Maximum number of processes to show */
|
||||
char sort_field; /* Field to sort by */
|
||||
int output_one_time; /* Output once and exit */
|
||||
int monitor_pid; /* Monitor specific PID */
|
||||
char *container_path; /* Path to container cgroup */
|
||||
};
|
||||
/* Display mode types */
|
||||
#define MODE_TYPE_ALL (0xFFFFFFFF)
|
||||
#define MODE_DEFAULT (1 << 0)
|
||||
#define MODE_MEMVERBOSE (1 << 1)
|
||||
|
||||
/* PSI statistics structure */
|
||||
struct psi_stats {
|
||||
@@ -119,6 +122,8 @@ struct task_info {
|
||||
unsigned long long wpcopy_delay_total;
|
||||
unsigned long long irq_count;
|
||||
unsigned long long irq_delay_total;
|
||||
unsigned long long mem_count;
|
||||
unsigned long long mem_delay_total;
|
||||
};
|
||||
|
||||
/* Container statistics structure */
|
||||
@@ -130,6 +135,27 @@ struct container_stats {
|
||||
int nr_io_wait; /* Number of processes in IO wait */
|
||||
};
|
||||
|
||||
/* Delay field structure */
|
||||
struct field_desc {
|
||||
const char *name; /* Field name for cmdline argument */
|
||||
const char *cmd_char; /* Interactive command */
|
||||
unsigned long total_offset; /* Offset of total delay in task_info */
|
||||
unsigned long count_offset; /* Offset of count in task_info */
|
||||
size_t supported_modes; /* Supported display modes */
|
||||
};
|
||||
|
||||
/* Program settings structure */
|
||||
struct config {
|
||||
int delay; /* Update interval in seconds */
|
||||
int iterations; /* Number of iterations, 0 == infinite */
|
||||
int max_processes; /* Maximum number of processes to show */
|
||||
int output_one_time; /* Output once and exit */
|
||||
int monitor_pid; /* Monitor specific PID */
|
||||
char *container_path; /* Path to container cgroup */
|
||||
const struct field_desc *sort_field; /* Current sort field */
|
||||
size_t display_mode; /* Current display mode */
|
||||
};
|
||||
|
||||
/* Global variables */
|
||||
static struct config cfg;
|
||||
static struct psi_stats psi;
|
||||
@@ -137,6 +163,19 @@ static struct task_info tasks[MAX_TASKS];
|
||||
static int task_count;
|
||||
static int running = 1;
|
||||
static struct container_stats container_stats;
|
||||
static const struct field_desc sort_fields[] = {
|
||||
SORT_FIELD(cpu, c, MODE_DEFAULT),
|
||||
SORT_FIELD(blkio, i, MODE_DEFAULT),
|
||||
SORT_FIELD(irq, q, MODE_DEFAULT),
|
||||
SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE),
|
||||
SORT_FIELD(swapin, s, MODE_MEMVERBOSE),
|
||||
SORT_FIELD(freepages, r, MODE_MEMVERBOSE),
|
||||
SORT_FIELD(thrashing, t, MODE_MEMVERBOSE),
|
||||
SORT_FIELD(compact, p, MODE_MEMVERBOSE),
|
||||
SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE),
|
||||
END_FIELD
|
||||
};
|
||||
static int sort_selected;
|
||||
|
||||
/* Netlink socket variables */
|
||||
static int nl_sd = -1;
|
||||
@@ -158,18 +197,75 @@ static void disable_raw_mode(void)
|
||||
tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
|
||||
}
|
||||
|
||||
/* Find field descriptor by command line */
|
||||
static const struct field_desc *get_field_by_cmd_char(char ch)
|
||||
{
|
||||
const struct field_desc *field;
|
||||
|
||||
for (field = sort_fields; field->name != NULL; field++) {
|
||||
if (field->cmd_char[0] == ch)
|
||||
return field;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Find field descriptor by name with string comparison */
|
||||
static const struct field_desc *get_field_by_name(const char *name)
|
||||
{
|
||||
const struct field_desc *field;
|
||||
size_t field_len;
|
||||
|
||||
for (field = sort_fields; field->name != NULL; field++) {
|
||||
field_len = strlen(field->name);
|
||||
if (field_len != strlen(name))
|
||||
continue;
|
||||
if (strncmp(field->name, name, field_len) == 0)
|
||||
return field;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Find display name for a field descriptor */
|
||||
static const char *get_name_by_field(const struct field_desc *field)
|
||||
{
|
||||
return field ? field->name : "UNKNOWN";
|
||||
}
|
||||
|
||||
/* Generate string of available field names */
|
||||
static void display_available_fields(size_t mode)
|
||||
{
|
||||
const struct field_desc *field;
|
||||
char buf[MAX_BUF_LEN];
|
||||
|
||||
buf[0] = '\0';
|
||||
|
||||
for (field = sort_fields; field->name != NULL; field++) {
|
||||
if (!(field->supported_modes & mode))
|
||||
continue;
|
||||
strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
|
||||
strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
|
||||
buf[MAX_BUF_LEN - 1] = '\0';
|
||||
}
|
||||
|
||||
fprintf(stderr, "Available fields: %s\n", buf);
|
||||
}
|
||||
|
||||
/* Display usage information and command line options */
|
||||
static void usage(void)
|
||||
{
|
||||
printf("Usage: delaytop [Options]\n"
|
||||
"Options:\n"
|
||||
" -h, --help Show this help message and exit\n"
|
||||
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
|
||||
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
|
||||
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
|
||||
" -o, --once Display once and exit\n"
|
||||
" -p, --pid=PID Monitor only the specified PID\n"
|
||||
" -C, --container=PATH Monitor the container at specified cgroup path\n");
|
||||
" -h, --help Show this help message and exit\n"
|
||||
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
|
||||
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
|
||||
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
|
||||
" -o, --once Display once and exit\n"
|
||||
" -p, --pid=PID Monitor only the specified PID\n"
|
||||
" -C, --container=PATH Monitor the container at specified cgroup path\n"
|
||||
" -s, --sort=FIELD Sort by delay field (default: cpu)\n"
|
||||
" -M, --memverbose Display memory detailed information\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@@ -177,6 +273,7 @@ static void usage(void)
|
||||
static void parse_args(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
const struct field_desc *field;
|
||||
struct option long_options[] = {
|
||||
{"help", no_argument, 0, 'h'},
|
||||
{"delay", required_argument, 0, 'd'},
|
||||
@@ -184,7 +281,9 @@ static void parse_args(int argc, char **argv)
|
||||
{"pid", required_argument, 0, 'p'},
|
||||
{"once", no_argument, 0, 'o'},
|
||||
{"processes", required_argument, 0, 'P'},
|
||||
{"sort", required_argument, 0, 's'},
|
||||
{"container", required_argument, 0, 'C'},
|
||||
{"memverbose", no_argument, 0, 'M'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
@@ -192,15 +291,16 @@ static void parse_args(int argc, char **argv)
|
||||
cfg.delay = 2;
|
||||
cfg.iterations = 0;
|
||||
cfg.max_processes = 20;
|
||||
cfg.sort_field = 'c'; /* Default sort by CPU delay */
|
||||
cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */
|
||||
cfg.output_one_time = 0;
|
||||
cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
|
||||
cfg.container_path = NULL;
|
||||
cfg.display_mode = MODE_DEFAULT;
|
||||
|
||||
while (1) {
|
||||
int option_index = 0;
|
||||
|
||||
c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
|
||||
c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
@@ -247,6 +347,26 @@ static void parse_args(int argc, char **argv)
|
||||
case 'C':
|
||||
cfg.container_path = strdup(optarg);
|
||||
break;
|
||||
case 's':
|
||||
if (strlen(optarg) == 0) {
|
||||
fprintf(stderr, "Error: empty sort field\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
field = get_field_by_name(optarg);
|
||||
/* Show available fields if invalid option provided */
|
||||
if (!field) {
|
||||
fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
|
||||
display_available_fields(MODE_TYPE_ALL);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cfg.sort_field = field;
|
||||
break;
|
||||
case 'M':
|
||||
cfg.display_mode = MODE_MEMVERBOSE;
|
||||
cfg.sort_field = get_field_by_name("mem");
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Try 'delaytop --help' for more information.\n");
|
||||
exit(1);
|
||||
@@ -254,6 +374,25 @@ static void parse_args(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate average delay in milliseconds for overall memory */
|
||||
static void set_mem_delay_total(struct task_info *t)
|
||||
{
|
||||
t->mem_delay_total = t->swapin_delay_total +
|
||||
t->freepages_delay_total +
|
||||
t->thrashing_delay_total +
|
||||
t->compact_delay_total +
|
||||
t->wpcopy_delay_total;
|
||||
}
|
||||
|
||||
static void set_mem_count(struct task_info *t)
|
||||
{
|
||||
t->mem_count = t->swapin_count +
|
||||
t->freepages_count +
|
||||
t->thrashing_count +
|
||||
t->compact_count +
|
||||
t->wpcopy_count;
|
||||
}
|
||||
|
||||
/* Create a raw netlink socket and bind */
|
||||
static int create_nl_socket(void)
|
||||
{
|
||||
@@ -358,87 +497,134 @@ static int get_family_id(int sd)
|
||||
return id;
|
||||
}
|
||||
|
||||
static void read_psi_stats(void)
|
||||
static int read_psi_stats(void)
|
||||
{
|
||||
FILE *fp;
|
||||
char line[256];
|
||||
int ret = 0;
|
||||
int error_count = 0;
|
||||
|
||||
/* Check if PSI path exists */
|
||||
if (access(PSI_PATH, F_OK) != 0) {
|
||||
fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH);
|
||||
fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Zero all fields */
|
||||
memset(&psi, 0, sizeof(psi));
|
||||
|
||||
/* CPU pressure */
|
||||
fp = fopen(PSI_CPU_SOME, "r");
|
||||
fp = fopen(PSI_CPU_PATH, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "some", 4) == 0) {
|
||||
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
|
||||
&psi.cpu_some_avg300, &psi.cpu_some_total);
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse CPU some PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
} else if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
|
||||
&psi.cpu_full_avg300, &psi.cpu_full_total);
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse CPU full PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
} else {
|
||||
fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
/* Memory pressure */
|
||||
fp = fopen(PSI_MEMORY_SOME, "r");
|
||||
fp = fopen(PSI_MEMORY_PATH, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "some", 4) == 0) {
|
||||
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.memory_some_avg10, &psi.memory_some_avg60,
|
||||
&psi.memory_some_avg300, &psi.memory_some_total);
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse Memory some PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
} else if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.memory_full_avg10, &psi.memory_full_avg60,
|
||||
&psi.memory_full_avg300, &psi.memory_full_total);
|
||||
}
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse Memory full PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
} else {
|
||||
fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
/* IO pressure */
|
||||
fp = fopen(PSI_IO_SOME, "r");
|
||||
fp = fopen(PSI_IO_PATH, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "some", 4) == 0) {
|
||||
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.io_some_avg10, &psi.io_some_avg60,
|
||||
&psi.io_some_avg300, &psi.io_some_total);
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse IO some PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
} else if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.io_full_avg10, &psi.io_full_avg60,
|
||||
&psi.io_full_avg300, &psi.io_full_total);
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse IO full PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
} else {
|
||||
fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
/* IRQ pressure (only full) */
|
||||
fp = fopen(PSI_IRQ_FULL, "r");
|
||||
fp = fopen(PSI_IRQ_PATH, "r");
|
||||
if (fp) {
|
||||
while (fgets(line, sizeof(line), fp)) {
|
||||
if (strncmp(line, "full", 4) == 0) {
|
||||
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
|
||||
&psi.irq_full_avg10, &psi.irq_full_avg60,
|
||||
&psi.irq_full_avg300, &psi.irq_full_total);
|
||||
if (ret != 4)
|
||||
if (ret != 4) {
|
||||
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
} else {
|
||||
fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
/* Return error count: 0 means success, >0 means warnings, -1 means fatal error */
|
||||
if (error_count > 0) {
|
||||
fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count);
|
||||
return error_count;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_comm(int pid, char *comm_buf, size_t buf_size)
|
||||
@@ -527,6 +713,8 @@ static void fetch_and_fill_task_info(int pid, const char *comm)
|
||||
SET_TASK_STAT(task_count, wpcopy_delay_total);
|
||||
SET_TASK_STAT(task_count, irq_count);
|
||||
SET_TASK_STAT(task_count, irq_delay_total);
|
||||
set_mem_count(&tasks[task_count]);
|
||||
set_mem_delay_total(&tasks[task_count]);
|
||||
task_count++;
|
||||
}
|
||||
break;
|
||||
@@ -587,19 +775,23 @@ static int compare_tasks(const void *a, const void *b)
|
||||
{
|
||||
const struct task_info *t1 = (const struct task_info *)a;
|
||||
const struct task_info *t2 = (const struct task_info *)b;
|
||||
unsigned long long total1;
|
||||
unsigned long long total2;
|
||||
unsigned long count1;
|
||||
unsigned long count2;
|
||||
double avg1, avg2;
|
||||
|
||||
switch (cfg.sort_field) {
|
||||
case 'c': /* CPU */
|
||||
avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
|
||||
avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
|
||||
if (avg1 != avg2)
|
||||
return avg2 > avg1 ? 1 : -1;
|
||||
return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
|
||||
total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
|
||||
total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
|
||||
count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
|
||||
count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
|
||||
|
||||
default:
|
||||
return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
|
||||
}
|
||||
avg1 = average_ms(total1, count1);
|
||||
avg2 = average_ms(total2, count2);
|
||||
if (avg1 != avg2)
|
||||
return avg2 > avg1 ? 1 : -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sort tasks by selected field */
|
||||
@@ -673,7 +865,7 @@ static void get_container_stats(void)
|
||||
}
|
||||
|
||||
/* Display results to stdout or log file */
|
||||
static void display_results(void)
|
||||
static void display_results(int psi_ret)
|
||||
{
|
||||
time_t now = time(NULL);
|
||||
struct tm *tm_now = localtime(&now);
|
||||
@@ -686,49 +878,53 @@ static void display_results(void)
|
||||
suc &= BOOL_FPRINT(out, "\033[H\033[J");
|
||||
|
||||
/* PSI output (one-line, no cat style) */
|
||||
suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"CPU some:",
|
||||
psi.cpu_some_avg10,
|
||||
psi.cpu_some_avg60,
|
||||
psi.cpu_some_avg300,
|
||||
psi.cpu_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"CPU full:",
|
||||
psi.cpu_full_avg10,
|
||||
psi.cpu_full_avg60,
|
||||
psi.cpu_full_avg300,
|
||||
psi.cpu_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"Memory full:",
|
||||
psi.memory_full_avg10,
|
||||
psi.memory_full_avg60,
|
||||
psi.memory_full_avg300,
|
||||
psi.memory_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"Memory some:",
|
||||
psi.memory_some_avg10,
|
||||
psi.memory_some_avg60,
|
||||
psi.memory_some_avg300,
|
||||
psi.memory_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IO full:",
|
||||
psi.io_full_avg10,
|
||||
psi.io_full_avg60,
|
||||
psi.io_full_avg300,
|
||||
psi.io_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IO some:",
|
||||
psi.io_some_avg10,
|
||||
psi.io_some_avg60,
|
||||
psi.io_some_avg300,
|
||||
psi.io_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IRQ full:",
|
||||
psi.irq_full_avg10,
|
||||
psi.irq_full_avg60,
|
||||
psi.irq_full_avg300,
|
||||
psi.irq_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n");
|
||||
if (psi_ret) {
|
||||
suc &= BOOL_FPRINT(out, " PSI not found: check if psi=1 enabled in cmdline\n");
|
||||
} else {
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"CPU some:",
|
||||
psi.cpu_some_avg10,
|
||||
psi.cpu_some_avg60,
|
||||
psi.cpu_some_avg300,
|
||||
psi.cpu_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"CPU full:",
|
||||
psi.cpu_full_avg10,
|
||||
psi.cpu_full_avg60,
|
||||
psi.cpu_full_avg300,
|
||||
psi.cpu_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"Memory full:",
|
||||
psi.memory_full_avg10,
|
||||
psi.memory_full_avg60,
|
||||
psi.memory_full_avg300,
|
||||
psi.memory_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"Memory some:",
|
||||
psi.memory_some_avg10,
|
||||
psi.memory_some_avg60,
|
||||
psi.memory_some_avg300,
|
||||
psi.memory_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IO full:",
|
||||
psi.io_full_avg10,
|
||||
psi.io_full_avg60,
|
||||
psi.io_full_avg300,
|
||||
psi.io_full_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IO some:",
|
||||
psi.io_some_avg10,
|
||||
psi.io_some_avg60,
|
||||
psi.io_some_avg300,
|
||||
psi.io_some_total / 1000);
|
||||
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
|
||||
"IRQ full:",
|
||||
psi.irq_full_avg10,
|
||||
psi.irq_full_avg60,
|
||||
psi.irq_full_avg300,
|
||||
psi.irq_full_total / 1000);
|
||||
}
|
||||
|
||||
if (cfg.container_path) {
|
||||
suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
|
||||
@@ -738,29 +934,59 @@ static void display_results(void)
|
||||
container_stats.nr_stopped, container_stats.nr_uninterruptible,
|
||||
container_stats.nr_io_wait);
|
||||
}
|
||||
suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
|
||||
cfg.max_processes);
|
||||
suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND");
|
||||
suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
|
||||
"CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
|
||||
"THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
|
||||
|
||||
suc &= BOOL_FPRINT(out, "-----------------------------------------------");
|
||||
suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
|
||||
/* Interacive command */
|
||||
suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n");
|
||||
if (sort_selected) {
|
||||
if (cfg.display_mode == MODE_MEMVERBOSE)
|
||||
suc &= BOOL_FPRINT(out,
|
||||
"sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n");
|
||||
else
|
||||
suc &= BOOL_FPRINT(out,
|
||||
"sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n");
|
||||
}
|
||||
|
||||
/* Task delay output */
|
||||
suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
|
||||
cfg.max_processes, get_name_by_field(cfg.sort_field));
|
||||
|
||||
suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND");
|
||||
if (cfg.display_mode == MODE_MEMVERBOSE) {
|
||||
suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n",
|
||||
"MEM(ms)", "SWAP(ms)", "RCL(ms)",
|
||||
"THR(ms)", "CMP(ms)", "WP(ms)");
|
||||
suc &= BOOL_FPRINT(out, "-----------------------");
|
||||
suc &= BOOL_FPRINT(out, "-----------------------");
|
||||
suc &= BOOL_FPRINT(out, "-----------------------");
|
||||
suc &= BOOL_FPRINT(out, "---------------------\n");
|
||||
} else {
|
||||
suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n",
|
||||
"CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)");
|
||||
suc &= BOOL_FPRINT(out, "-----------------------");
|
||||
suc &= BOOL_FPRINT(out, "-----------------------");
|
||||
suc &= BOOL_FPRINT(out, "--------------------------\n");
|
||||
}
|
||||
|
||||
count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
suc &= BOOL_FPRINT(out, "%5d %5d %-15s",
|
||||
suc &= BOOL_FPRINT(out, "%8d %8d %-15s",
|
||||
tasks[i].pid, tasks[i].tgid, tasks[i].command);
|
||||
suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
|
||||
average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
|
||||
average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
|
||||
average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
|
||||
average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
|
||||
average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
|
||||
average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
|
||||
average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
|
||||
average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
|
||||
if (cfg.display_mode == MODE_MEMVERBOSE) {
|
||||
suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE,
|
||||
TASK_AVG(tasks[i], mem),
|
||||
TASK_AVG(tasks[i], swapin),
|
||||
TASK_AVG(tasks[i], freepages),
|
||||
TASK_AVG(tasks[i], thrashing),
|
||||
TASK_AVG(tasks[i], compact),
|
||||
TASK_AVG(tasks[i], wpcopy));
|
||||
} else {
|
||||
suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT,
|
||||
TASK_AVG(tasks[i], cpu),
|
||||
TASK_AVG(tasks[i], blkio),
|
||||
TASK_AVG(tasks[i], irq),
|
||||
TASK_AVG(tasks[i], mem));
|
||||
}
|
||||
}
|
||||
|
||||
suc &= BOOL_FPRINT(out, "\n");
|
||||
@@ -769,11 +995,79 @@ static void display_results(void)
|
||||
perror("Error writing to output");
|
||||
}
|
||||
|
||||
/* Check for keyboard input with timeout based on cfg.delay */
|
||||
static char check_for_keypress(void)
|
||||
{
|
||||
struct timeval tv = {cfg.delay, 0};
|
||||
fd_set readfds;
|
||||
char ch = 0;
|
||||
|
||||
FD_ZERO(&readfds);
|
||||
FD_SET(STDIN_FILENO, &readfds);
|
||||
int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv);
|
||||
|
||||
if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
|
||||
read(STDIN_FILENO, &ch, 1);
|
||||
return ch;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_MODE_SIZE 2
|
||||
static void toggle_display_mode(void)
|
||||
{
|
||||
static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE};
|
||||
static size_t cur_index;
|
||||
|
||||
cur_index = (cur_index + 1) % MAX_MODE_SIZE;
|
||||
cfg.display_mode = modes[cur_index];
|
||||
}
|
||||
|
||||
/* Handle keyboard input: sorting selection, mode toggle, or quit */
|
||||
static void handle_keypress(char ch, int *running)
|
||||
{
|
||||
const struct field_desc *field;
|
||||
|
||||
/* Change sort field */
|
||||
if (sort_selected) {
|
||||
field = get_field_by_cmd_char(ch);
|
||||
if (field && (field->supported_modes & cfg.display_mode))
|
||||
cfg.sort_field = field;
|
||||
|
||||
sort_selected = 0;
|
||||
/* Handle mode changes or quit */
|
||||
} else {
|
||||
switch (ch) {
|
||||
case 'o':
|
||||
sort_selected = 1;
|
||||
break;
|
||||
case 'M':
|
||||
toggle_display_mode();
|
||||
for (field = sort_fields; field->name != NULL; field++) {
|
||||
if (field->supported_modes & cfg.display_mode) {
|
||||
cfg.sort_field = field;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'q':
|
||||
case 'Q':
|
||||
*running = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Main function */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
const struct field_desc *field;
|
||||
int iterations = 0;
|
||||
int use_q_quit = 0;
|
||||
int psi_ret = 0;
|
||||
char keypress;
|
||||
|
||||
/* Parse command line arguments */
|
||||
parse_args(argc, argv);
|
||||
@@ -793,17 +1087,24 @@ int main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!cfg.output_one_time) {
|
||||
use_q_quit = 1;
|
||||
enable_raw_mode();
|
||||
printf("Press 'q' to quit.\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
/* Set terminal to non-canonical mode for interaction */
|
||||
enable_raw_mode();
|
||||
|
||||
/* Main loop */
|
||||
while (running) {
|
||||
/* Auto-switch sort field when not matching display mode */
|
||||
if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
|
||||
for (field = sort_fields; field->name != NULL; field++) {
|
||||
if (field->supported_modes & cfg.display_mode) {
|
||||
cfg.sort_field = field;
|
||||
printf("Auto-switched sort field to: %s\n", field->name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Read PSI statistics */
|
||||
read_psi_stats();
|
||||
psi_ret = read_psi_stats();
|
||||
|
||||
/* Get container stats if container path provided */
|
||||
if (cfg.container_path)
|
||||
@@ -816,7 +1117,7 @@ int main(int argc, char **argv)
|
||||
sort_tasks();
|
||||
|
||||
/* Display results to stdout or log file */
|
||||
display_results();
|
||||
display_results(psi_ret);
|
||||
|
||||
/* Check for iterations */
|
||||
if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
|
||||
@@ -826,32 +1127,14 @@ int main(int argc, char **argv)
|
||||
if (cfg.output_one_time)
|
||||
break;
|
||||
|
||||
/* Check for 'q' key to quit */
|
||||
if (use_q_quit) {
|
||||
struct timeval tv = {cfg.delay, 0};
|
||||
fd_set readfds;
|
||||
|
||||
FD_ZERO(&readfds);
|
||||
FD_SET(STDIN_FILENO, &readfds);
|
||||
int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
|
||||
|
||||
if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
|
||||
char ch = 0;
|
||||
|
||||
read(STDIN_FILENO, &ch, 1);
|
||||
if (ch == 'q' || ch == 'Q') {
|
||||
running = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sleep(cfg.delay);
|
||||
}
|
||||
/* Keypress for interactive usage */
|
||||
keypress = check_for_keypress();
|
||||
if (keypress)
|
||||
handle_keypress(keypress, &running);
|
||||
}
|
||||
|
||||
/* Restore terminal mode */
|
||||
if (use_q_quit)
|
||||
disable_raw_mode();
|
||||
disable_raw_mode();
|
||||
|
||||
/* Cleanup */
|
||||
close(nl_sd);
|
||||
|
||||
@@ -499,19 +499,17 @@ void ida_check_random(void)
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
void ida_simple_get_remove_test(void)
|
||||
void ida_alloc_free_test(void)
|
||||
{
|
||||
DEFINE_IDA(ida);
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < 10000; i++) {
|
||||
assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
|
||||
}
|
||||
assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
|
||||
for (i = 0; i < 10000; i++)
|
||||
assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i);
|
||||
assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0);
|
||||
|
||||
for (i = 0; i < 10000; i++) {
|
||||
ida_simple_remove(&ida, i);
|
||||
}
|
||||
for (i = 0; i < 10000; i++)
|
||||
ida_free(&ida, i);
|
||||
assert(ida_is_empty(&ida));
|
||||
|
||||
ida_destroy(&ida);
|
||||
@@ -524,7 +522,7 @@ void user_ida_checks(void)
|
||||
ida_check_nomem();
|
||||
ida_check_conv_user();
|
||||
ida_check_random();
|
||||
ida_simple_get_remove_test();
|
||||
ida_alloc_free_test();
|
||||
|
||||
radix_tree_cpu_dead(1);
|
||||
}
|
||||
|
||||
1
tools/testing/selftests/proc/.gitignore
vendored
1
tools/testing/selftests/proc/.gitignore
vendored
@@ -7,6 +7,7 @@
|
||||
/proc-loadavg-001
|
||||
/proc-maps-race
|
||||
/proc-multiple-procfs
|
||||
/proc-net-dev-lseek
|
||||
/proc-empty-vm
|
||||
/proc-pid-vm
|
||||
/proc-self-map-files-001
|
||||
|
||||
@@ -10,6 +10,7 @@ TEST_GEN_PROGS += fd-003-kthread
|
||||
TEST_GEN_PROGS += proc-2-is-kthread
|
||||
TEST_GEN_PROGS += proc-loadavg-001
|
||||
TEST_GEN_PROGS += proc-maps-race
|
||||
TEST_GEN_PROGS += proc-net-dev-lseek
|
||||
TEST_GEN_PROGS += proc-empty-vm
|
||||
TEST_GEN_PROGS += proc-pid-vm
|
||||
TEST_GEN_PROGS += proc-self-map-files-001
|
||||
|
||||
68
tools/testing/selftests/proc/proc-net-dev-lseek.c
Normal file
68
tools/testing/selftests/proc/proc-net-dev-lseek.c
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Alexey Dobriyan <adobriyan@gmail.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
#undef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
/*
|
||||
* Test that lseek("/proc/net/dev/", 0, SEEK_SET)
|
||||
* a) works,
|
||||
* b) does what you think it does.
|
||||
*/
|
||||
int main(void)
|
||||
{
|
||||
/* /proc/net/dev output is deterministic in fresh netns only. */
|
||||
if (unshare(CLONE_NEWNET) == -1) {
|
||||
if (errno == ENOSYS || errno == EPERM) {
|
||||
return 4;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int fd = open("/proc/net/dev", O_RDONLY);
|
||||
assert(fd >= 0);
|
||||
|
||||
char buf1[4096];
|
||||
const ssize_t rv1 = read(fd, buf1, sizeof(buf1));
|
||||
/*
|
||||
* Not "<=", this file can't be empty:
|
||||
* there is header, "lo" interface with some zeroes.
|
||||
*/
|
||||
assert(0 < rv1);
|
||||
assert(rv1 <= sizeof(buf1));
|
||||
|
||||
/* Believe it or not, this line broke one day. */
|
||||
assert(lseek(fd, 0, SEEK_SET) == 0);
|
||||
|
||||
char buf2[4096];
|
||||
const ssize_t rv2 = read(fd, buf2, sizeof(buf2));
|
||||
/* Not "<=", see above. */
|
||||
assert(0 < rv2);
|
||||
assert(rv2 <= sizeof(buf2));
|
||||
|
||||
/* Test that lseek rewinds to the beginning of the file. */
|
||||
assert(rv1 == rv2);
|
||||
assert(memcmp(buf1, buf2, rv1) == 0);
|
||||
|
||||
/* Contents of the file is not validated: this test is about lseek(). */
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -47,6 +47,10 @@
|
||||
#include <sys/resource.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#ifndef __maybe_unused
|
||||
#define __maybe_unused __attribute__((__unused__))
|
||||
#endif
|
||||
|
||||
#include "../kselftest.h"
|
||||
|
||||
static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
|
||||
@@ -218,12 +222,12 @@ static int make_exe(const uint8_t *payload, size_t len)
|
||||
* 2: vsyscall VMA is r-xp vsyscall=emulate
|
||||
*/
|
||||
static volatile int g_vsyscall;
|
||||
static const char *str_vsyscall;
|
||||
static const char *str_vsyscall __maybe_unused;
|
||||
|
||||
static const char str_vsyscall_0[] = "";
|
||||
static const char str_vsyscall_1[] =
|
||||
static const char str_vsyscall_0[] __maybe_unused = "";
|
||||
static const char str_vsyscall_1[] __maybe_unused =
|
||||
"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
|
||||
static const char str_vsyscall_2[] =
|
||||
static const char str_vsyscall_2[] __maybe_unused =
|
||||
"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
Reference in New Issue
Block a user