mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Pull directory delegations update from Christian Brauner:
"This contains the work for recall-only directory delegations for
knfsd.
Add support for simple, recallable-only directory delegations. This
was decided at the fall NFS Bakeathon where the NFS client and server
maintainers discussed how to merge directory delegation support.
The approach starts with recallable-only delegations for several reasons:
1. RFC8881 has gaps that are being addressed in RFC8881bis. In
particular, it requires directory position information for
CB_NOTIFY callbacks, which is difficult to implement properly
under Linux. The spec is being extended to allow that information
to be omitted.
2. Client-side support for CB_NOTIFY still lags. The client side
involves heuristics about when to request a delegation.
3. Early indication shows simple, recallable-only delegations can
help performance. Anna Schumaker mentioned seeing a multi-minute
speedup in xfstests runs with them enabled.
With these changes, userspace can also request a read lease on a
directory that will be recalled on conflicting accesses. This may be
useful for applications like Samba. Users can disable leases
altogether via the fs.leases-enable sysctl if needed.
VFS changes:
- Dedicated Type for Delegations
Introduce struct delegated_inode to track inodes that may have
delegations that need to be broken. This replaces the previous
approach of passing raw inode pointers through the delegation
breaking code paths, providing better type safety and clearer
semantics for the delegation machinery.
- Break parent directory delegations in open(..., O_CREAT) codepath
- Allow mkdir to wait for delegation break on parent
- Allow rmdir to wait for delegation break on parent
- Add try_break_deleg calls for parents to vfs_link(), vfs_rename(),
and vfs_unlink()
- Make vfs_create(), vfs_mknod(), and vfs_symlink() break delegations
on parent directory
- Clean up argument list for vfs_create()
- Expose delegation support to userland
Filelock changes:
- Make lease_alloc() take a flags argument
- Rework the __break_lease API to use flags
- Add struct delegated_inode
- Push the S_ISREG check down to ->setlease handlers
- Lift the ban on directory leases in generic_setlease
NFSD changes:
- Allow filecache to hold S_IFDIR files
- Allow DELEGRETURN on directories
- Wire up GET_DIR_DELEGATION handling
Fixes:
- Fix kernel-doc warnings in __fcntl_getlease
- Add needed headers for new struct delegation definition"
* tag 'vfs-6.19-rc1.directory.delegations' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
vfs: add needed headers for new struct delegation definition
filelock: __fcntl_getlease: fix kernel-doc warnings
vfs: expose delegation support to userland
nfsd: wire up GET_DIR_DELEGATION handling
nfsd: allow DELEGRETURN on directories
nfsd: allow filecache to hold S_IFDIR files
filelock: lift the ban on directory leases in generic_setlease
vfs: make vfs_symlink break delegations on parent dir
vfs: make vfs_mknod break delegations on parent directory
vfs: make vfs_create break delegations on parent directory
vfs: clean up argument list for vfs_create()
vfs: break parent dir delegations in open(..., O_CREAT) codepath
vfs: allow rmdir to wait for delegation break on parent
vfs: allow mkdir to wait for delegation break on parent
vfs: add try_break_deleg calls for parents to vfs_{link,rename,unlink}
filelock: push the S_ISREG check down to ->setlease handlers
filelock: add struct delegated_inode
filelock: rework the __break_lease API to use flags
filelock: make lease_alloc() take a flags argument
297 lines
7.7 KiB
C
297 lines
7.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/file.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/utime.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/compat.h>
|
|
#include <asm/unistd.h>
|
|
#include <linux/filelock.h>
|
|
|
|
static bool nsec_valid(long nsec)
|
|
{
|
|
if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
|
|
return true;
|
|
|
|
return nsec >= 0 && nsec <= 999999999;
|
|
}
|
|
|
|
int vfs_utimes(const struct path *path, struct timespec64 *times)
|
|
{
|
|
int error;
|
|
struct iattr newattrs;
|
|
struct inode *inode = path->dentry->d_inode;
|
|
struct delegated_inode delegated_inode = { };
|
|
|
|
if (times) {
|
|
if (!nsec_valid(times[0].tv_nsec) ||
|
|
!nsec_valid(times[1].tv_nsec))
|
|
return -EINVAL;
|
|
if (times[0].tv_nsec == UTIME_NOW &&
|
|
times[1].tv_nsec == UTIME_NOW)
|
|
times = NULL;
|
|
}
|
|
|
|
error = mnt_want_write(path->mnt);
|
|
if (error)
|
|
goto out;
|
|
|
|
newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
|
|
if (times) {
|
|
if (times[0].tv_nsec == UTIME_OMIT)
|
|
newattrs.ia_valid &= ~ATTR_ATIME;
|
|
else if (times[0].tv_nsec != UTIME_NOW) {
|
|
newattrs.ia_atime = times[0];
|
|
newattrs.ia_valid |= ATTR_ATIME_SET;
|
|
}
|
|
|
|
if (times[1].tv_nsec == UTIME_OMIT)
|
|
newattrs.ia_valid &= ~ATTR_MTIME;
|
|
else if (times[1].tv_nsec != UTIME_NOW) {
|
|
newattrs.ia_mtime = times[1];
|
|
newattrs.ia_valid |= ATTR_MTIME_SET;
|
|
}
|
|
/*
|
|
* Tell setattr_prepare(), that this is an explicit time
|
|
* update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
|
|
* were used.
|
|
*/
|
|
newattrs.ia_valid |= ATTR_TIMES_SET;
|
|
} else {
|
|
newattrs.ia_valid |= ATTR_TOUCH;
|
|
}
|
|
retry_deleg:
|
|
inode_lock(inode);
|
|
error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs,
|
|
&delegated_inode);
|
|
inode_unlock(inode);
|
|
if (is_delegated(&delegated_inode)) {
|
|
error = break_deleg_wait(&delegated_inode);
|
|
if (!error)
|
|
goto retry_deleg;
|
|
}
|
|
|
|
mnt_drop_write(path->mnt);
|
|
out:
|
|
return error;
|
|
}
|
|
EXPORT_SYMBOL_GPL(vfs_utimes);
|
|
|
|
static int do_utimes_path(int dfd, const char __user *filename,
|
|
struct timespec64 *times, int flags)
|
|
{
|
|
struct path path;
|
|
int lookup_flags = 0, error;
|
|
|
|
if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
|
|
return -EINVAL;
|
|
|
|
if (!(flags & AT_SYMLINK_NOFOLLOW))
|
|
lookup_flags |= LOOKUP_FOLLOW;
|
|
if (flags & AT_EMPTY_PATH)
|
|
lookup_flags |= LOOKUP_EMPTY;
|
|
|
|
retry:
|
|
error = user_path_at(dfd, filename, lookup_flags, &path);
|
|
if (error)
|
|
return error;
|
|
|
|
error = vfs_utimes(&path, times);
|
|
path_put(&path);
|
|
if (retry_estale(error, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
static int do_utimes_fd(int fd, struct timespec64 *times, int flags)
|
|
{
|
|
if (flags)
|
|
return -EINVAL;
|
|
|
|
CLASS(fd, f)(fd);
|
|
if (fd_empty(f))
|
|
return -EBADF;
|
|
return vfs_utimes(&fd_file(f)->f_path, times);
|
|
}
|
|
|
|
/*
|
|
* do_utimes - change times on filename or file descriptor
|
|
* @dfd: open file descriptor, -1 or AT_FDCWD
|
|
* @filename: path name or NULL
|
|
* @times: new times or NULL
|
|
* @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
|
|
*
|
|
* If filename is NULL and dfd refers to an open file, then operate on
|
|
* the file. Otherwise look up filename, possibly using dfd as a
|
|
* starting point.
|
|
*
|
|
* If times==NULL, set access and modification to current time,
|
|
* must be owner or have write permission.
|
|
* Else, update from *times, must be owner or super user.
|
|
*/
|
|
long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
|
|
int flags)
|
|
{
|
|
if (filename == NULL && dfd != AT_FDCWD)
|
|
return do_utimes_fd(dfd, times, flags);
|
|
return do_utimes_path(dfd, filename, times, flags);
|
|
}
|
|
|
|
SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
|
|
struct __kernel_timespec __user *, utimes, int, flags)
|
|
{
|
|
struct timespec64 tstimes[2];
|
|
|
|
if (utimes) {
|
|
if ((get_timespec64(&tstimes[0], &utimes[0]) ||
|
|
get_timespec64(&tstimes[1], &utimes[1])))
|
|
return -EFAULT;
|
|
|
|
/* Nothing to do, we must not even check the path. */
|
|
if (tstimes[0].tv_nsec == UTIME_OMIT &&
|
|
tstimes[1].tv_nsec == UTIME_OMIT)
|
|
return 0;
|
|
}
|
|
|
|
return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
|
|
}
|
|
|
|
#ifdef __ARCH_WANT_SYS_UTIME
|
|
/*
|
|
* futimesat(), utimes() and utime() are older versions of utimensat()
|
|
* that are provided for compatibility with traditional C libraries.
|
|
* On modern architectures, we always use libc wrappers around
|
|
* utimensat() instead.
|
|
*/
|
|
static long do_futimesat(int dfd, const char __user *filename,
|
|
struct __kernel_old_timeval __user *utimes)
|
|
{
|
|
struct __kernel_old_timeval times[2];
|
|
struct timespec64 tstimes[2];
|
|
|
|
if (utimes) {
|
|
if (copy_from_user(×, utimes, sizeof(times)))
|
|
return -EFAULT;
|
|
|
|
/* This test is needed to catch all invalid values. If we
|
|
would test only in do_utimes we would miss those invalid
|
|
values truncated by the multiplication with 1000. Note
|
|
that we also catch UTIME_{NOW,OMIT} here which are only
|
|
valid for utimensat. */
|
|
if (times[0].tv_usec >= 1000000 || times[0].tv_usec < 0 ||
|
|
times[1].tv_usec >= 1000000 || times[1].tv_usec < 0)
|
|
return -EINVAL;
|
|
|
|
tstimes[0].tv_sec = times[0].tv_sec;
|
|
tstimes[0].tv_nsec = 1000 * times[0].tv_usec;
|
|
tstimes[1].tv_sec = times[1].tv_sec;
|
|
tstimes[1].tv_nsec = 1000 * times[1].tv_usec;
|
|
}
|
|
|
|
return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0);
|
|
}
|
|
|
|
|
|
SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
|
|
struct __kernel_old_timeval __user *, utimes)
|
|
{
|
|
return do_futimesat(dfd, filename, utimes);
|
|
}
|
|
|
|
SYSCALL_DEFINE2(utimes, char __user *, filename,
|
|
struct __kernel_old_timeval __user *, utimes)
|
|
{
|
|
return do_futimesat(AT_FDCWD, filename, utimes);
|
|
}
|
|
|
|
SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
|
|
{
|
|
struct timespec64 tv[2];
|
|
|
|
if (times) {
|
|
if (get_user(tv[0].tv_sec, ×->actime) ||
|
|
get_user(tv[1].tv_sec, ×->modtime))
|
|
return -EFAULT;
|
|
tv[0].tv_nsec = 0;
|
|
tv[1].tv_nsec = 0;
|
|
}
|
|
return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_COMPAT_32BIT_TIME
|
|
/*
|
|
* Not all architectures have sys_utime, so implement this in terms
|
|
* of sys_utimes.
|
|
*/
|
|
#ifdef __ARCH_WANT_SYS_UTIME32
|
|
SYSCALL_DEFINE2(utime32, const char __user *, filename,
|
|
struct old_utimbuf32 __user *, t)
|
|
{
|
|
struct timespec64 tv[2];
|
|
|
|
if (t) {
|
|
if (get_user(tv[0].tv_sec, &t->actime) ||
|
|
get_user(tv[1].tv_sec, &t->modtime))
|
|
return -EFAULT;
|
|
tv[0].tv_nsec = 0;
|
|
tv[1].tv_nsec = 0;
|
|
}
|
|
return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
|
|
}
|
|
#endif
|
|
|
|
SYSCALL_DEFINE4(utimensat_time32, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
|
|
{
|
|
struct timespec64 tv[2];
|
|
|
|
if (t) {
|
|
if (get_old_timespec32(&tv[0], &t[0]) ||
|
|
get_old_timespec32(&tv[1], &t[1]))
|
|
return -EFAULT;
|
|
|
|
if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
|
|
return 0;
|
|
}
|
|
return do_utimes(dfd, filename, t ? tv : NULL, flags);
|
|
}
|
|
|
|
#ifdef __ARCH_WANT_SYS_UTIME32
|
|
static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
|
|
struct old_timeval32 __user *t)
|
|
{
|
|
struct timespec64 tv[2];
|
|
|
|
if (t) {
|
|
if (get_user(tv[0].tv_sec, &t[0].tv_sec) ||
|
|
get_user(tv[0].tv_nsec, &t[0].tv_usec) ||
|
|
get_user(tv[1].tv_sec, &t[1].tv_sec) ||
|
|
get_user(tv[1].tv_nsec, &t[1].tv_usec))
|
|
return -EFAULT;
|
|
if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 ||
|
|
tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0)
|
|
return -EINVAL;
|
|
tv[0].tv_nsec *= 1000;
|
|
tv[1].tv_nsec *= 1000;
|
|
}
|
|
return do_utimes(dfd, filename, t ? tv : NULL, 0);
|
|
}
|
|
|
|
SYSCALL_DEFINE3(futimesat_time32, unsigned int, dfd,
|
|
const char __user *, filename,
|
|
struct old_timeval32 __user *, t)
|
|
{
|
|
return do_compat_futimesat(dfd, filename, t);
|
|
}
|
|
|
|
SYSCALL_DEFINE2(utimes_time32, const char __user *, filename, struct old_timeval32 __user *, t)
|
|
{
|
|
return do_compat_futimesat(AT_FDCWD, filename, t);
|
|
}
|
|
#endif
|
|
#endif
|