mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Microbenchmark protected by a config FIND_BIT_BENCHMARK_RUST,
following `find_bit_benchmark.c` but testing the Rust Bitmap API.
We add a fill_random() method protected by the config in order to
maintain the abstraction.
The sample output from the benchmark, both C and Rust version:
find_bit_benchmark.c output:
```
Start testing find_bit() with random-filled bitmap
[ 438.101937] find_next_bit: 860188 ns, 163419 iterations
[ 438.109471] find_next_zero_bit: 912342 ns, 164262 iterations
[ 438.116820] find_last_bit: 726003 ns, 163419 iterations
[ 438.130509] find_nth_bit: 7056993 ns, 16269 iterations
[ 438.139099] find_first_bit: 1963272 ns, 16270 iterations
[ 438.173043] find_first_and_bit: 27314224 ns, 32654 iterations
[ 438.180065] find_next_and_bit: 398752 ns, 73705 iterations
[ 438.186689]
Start testing find_bit() with sparse bitmap
[ 438.193375] find_next_bit: 9675 ns, 656 iterations
[ 438.201765] find_next_zero_bit: 1766136 ns, 327025 iterations
[ 438.208429] find_last_bit: 9017 ns, 656 iterations
[ 438.217816] find_nth_bit: 2749742 ns, 655 iterations
[ 438.225168] find_first_bit: 721799 ns, 656 iterations
[ 438.231797] find_first_and_bit: 2819 ns, 1 iterations
[ 438.238441] find_next_and_bit: 3159 ns, 1 iterations
```
find_bit_benchmark_rust.rs output:
```
[ 451.182459] find_bit_benchmark_rust:
[ 451.186688] Start testing find_bit() Rust with random-filled bitmap
[ 451.194450] next_bit: 777950 ns, 163644 iterations
[ 451.201997] next_zero_bit: 918889 ns, 164036 iterations
[ 451.208642] Start testing find_bit() Rust with sparse bitmap
[ 451.214300] next_bit: 9181 ns, 654 iterations
[ 451.222806] next_zero_bit: 1855504 ns, 327026 iterations
```
Here are the results from 32 samples, with 95% confidence interval.
The microbenchmark was built with RUST_BITMAP_HARDENED=n and run on a
machine that did not execute other processes.
Random-filled bitmap:
+-----------+-------+-----------+--------------+-----------+-----------+
| Benchmark | Lang | Mean (ms) | Std Dev (ms) | 95% CI Lo | 95% CI Hi |
+-----------+-------+-----------+--------------+-----------+-----------+
| find_bit/ | C | 825.07 | 53.89 | 806.40 | 843.74 |
| next_bit | Rust | 870.91 | 46.29 | 854.88 | 886.95 |
+-----------+-------+-----------+--------------+-----------+-----------+
| find_zero/| C | 933.56 | 56.34 | 914.04 | 953.08 |
| next_zero | Rust | 945.85 | 60.44 | 924.91 | 966.79 |
+-----------+-------+-----------+--------------+-----------+-----------+
Rust appears 5.5% slower for next_bit, 1.3% slower for next_zero.
Sparse bitmap:
+-----------+-------+-----------+--------------+-----------+-----------+
| Benchmark | Lang | Mean (ms) | Std Dev (ms) | 95% CI Lo | 95% CI Hi |
+-----------+-------+-----------+--------------+-----------+-----------+
| find_bit/ | C | 13.17 | 6.21 | 11.01 | 15.32 |
| next_bit | Rust | 14.30 | 8.27 | 11.43 | 17.17 |
+-----------+-------+-----------+--------------+-----------+-----------+
| find_zero/| C | 1859.31 | 82.30 | 1830.80 | 1887.83 |
| next_zero | Rust | 1908.09 | 139.82 | 1859.65 | 1956.54 |
+-----------+-------+-----------+--------------+-----------+-----------+
Rust appears 8.5% slower for next_bit, 2.6% slower for next_zero.
In summary, taking the arithmetic mean of all slow-downs, we can say
the Rust API has a 4.5% slowdown.
Suggested-by: Alice Ryhl <aliceryhl@google.com>
Suggested-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Reviewed-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Burak Emir <bqe@google.com>
Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
104 lines
3.4 KiB
C
104 lines
3.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Header that contains the code (mostly headers) for which Rust bindings
|
|
* will be automatically generated by `bindgen`.
|
|
*
|
|
* Sorted alphabetically.
|
|
*/
|
|
|
|
/*
|
|
* First, avoid forward references to `enum` types.
|
|
*
|
|
* This workarounds a `bindgen` issue with them:
|
|
* <https://github.com/rust-lang/rust-bindgen/issues/3179>.
|
|
*
|
|
* Without this, the generated Rust type may be the wrong one (`i32`) or
|
|
* the proper one (typically `c_uint`) depending on how the headers are
|
|
* included, which in turn may depend on the particular kernel configuration
|
|
* or the architecture.
|
|
*
|
|
* The alternative would be to use casts and likely an
|
|
* `#[allow(clippy::unnecessary_cast)]` in the Rust source files. Instead,
|
|
* this approach allows us to keep the correct code in the source files and
|
|
* simply remove this section when the issue is fixed upstream and we bump
|
|
* the minimum `bindgen` version.
|
|
*
|
|
* This workaround may not be possible in some cases, depending on how the C
|
|
* headers are set up.
|
|
*/
|
|
#include <linux/hrtimer_types.h>
|
|
|
|
#include <linux/acpi.h>
|
|
#include <drm/drm_device.h>
|
|
#include <drm/drm_drv.h>
|
|
#include <drm/drm_file.h>
|
|
#include <drm/drm_gem.h>
|
|
#include <drm/drm_ioctl.h>
|
|
#include <kunit/test.h>
|
|
#include <linux/auxiliary_bus.h>
|
|
#include <linux/bitmap.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/blk_types.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/clk.h>
|
|
#include <linux/completion.h>
|
|
#include <linux/configfs.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpufreq.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/cred.h>
|
|
#include <linux/device/faux.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/errname.h>
|
|
#include <linux/ethtool.h>
|
|
#include <linux/file.h>
|
|
#include <linux/firmware.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/jump_label.h>
|
|
#include <linux/mdio.h>
|
|
#include <linux/miscdevice.h>
|
|
#include <linux/of_device.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/phy.h>
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/pm_opp.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/property.h>
|
|
#include <linux/random.h>
|
|
#include <linux/refcount.h>
|
|
#include <linux/regulator/consumer.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/security.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/tracepoint.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/xarray.h>
|
|
#include <trace/events/rust_sample.h>
|
|
|
|
#if defined(CONFIG_DRM_PANIC_SCREEN_QR_CODE)
|
|
// Used by `#[export]` in `drivers/gpu/drm/drm_panic_qr.rs`.
|
|
#include <drm/drm_panic.h>
|
|
#endif
|
|
|
|
/* `bindgen` gets confused at certain things. */
|
|
const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
|
|
const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE;
|
|
const gfp_t RUST_CONST_HELPER_GFP_ATOMIC = GFP_ATOMIC;
|
|
const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL;
|
|
const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT;
|
|
const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT;
|
|
const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO;
|
|
const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM;
|
|
const gfp_t RUST_CONST_HELPER___GFP_NOWARN = ___GFP_NOWARN;
|
|
const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL;
|
|
const fop_flags_t RUST_CONST_HELPER_FOP_UNSIGNED_OFFSET = FOP_UNSIGNED_OFFSET;
|
|
|
|
const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT;
|
|
|
|
const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC;
|
|
const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1;
|