diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst
index 95c2c009874c..a29a212b5b4d 100644
--- a/Documentation/filesystems/nfs/index.rst
+++ b/Documentation/filesystems/nfs/index.rst
@@ -13,5 +13,6 @@ NFS
    rpc-cache
    rpc-server-gss
    nfs41-server
+   nfsd-io-modes
    knfsd-stats
    reexport
diff --git a/Documentation/filesystems/nfs/nfsd-io-modes.rst b/Documentation/filesystems/nfs/nfsd-io-modes.rst
new file mode 100644
index 000000000000..0fd6e82478fe
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfsd-io-modes.rst
@@ -0,0 +1,153 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+NFSD IO MODES
+=============
+
+Overview
+========
+
+NFSD has historically always used buffered IO when servicing READ and
+WRITE operations. BUFFERED is NFSD's default IO mode, but it is possible
+to override that default to use either DONTCACHE or DIRECT IO modes.
+
+Experimental NFSD debugfs interfaces are available to allow the NFSD IO
+mode used for READ and WRITE to be configured independently. See both:
+
+- /sys/kernel/debug/nfsd/io_cache_read
+- /sys/kernel/debug/nfsd/io_cache_write
+
+The default value for both io_cache_read and io_cache_write reflects
+NFSD's default IO mode (which is NFSD_IO_BUFFERED=0).
+
+Based on the configured settings, NFSD's IO will either be:
+
+- cached using page cache (NFSD_IO_BUFFERED=0)
+- cached but removed from page cache on completion (NFSD_IO_DONTCACHE=1)
+- not cached stable_how=NFS_UNSTABLE (NFSD_IO_DIRECT=2)
+
+To set an NFSD IO mode, write a supported value (0 - 2) to the
+corresponding IO operation's debugfs interface, e.g.::
+
+  echo 2 > /sys/kernel/debug/nfsd/io_cache_read
+  echo 2 > /sys/kernel/debug/nfsd/io_cache_write
+
+To check which IO mode NFSD is using for READ or WRITE, simply read the
+corresponding IO operation's debugfs interface, e.g.::
+
+  cat /sys/kernel/debug/nfsd/io_cache_read
+  cat /sys/kernel/debug/nfsd/io_cache_write
+
+If you experiment with NFSD's IO modes on a recent kernel and have
+interesting results, please report them to linux-nfs@vger.kernel.org
+
+NFSD DONTCACHE
+==============
+
+DONTCACHE offers a hybrid approach to servicing IO that aims to offer
+the benefits of using DIRECT IO without any of the strict alignment
+requirements that DIRECT IO imposes. To achieve this buffered IO is used
+but the IO is flagged to "drop behind" (meaning associated pages are
+dropped from the page cache) when IO completes.
+
+DONTCACHE aims to avoid what has proven to be a fairly significant
+limition of Linux's memory management subsystem if/when large amounts of
+data is infrequently accessed (e.g. read once _or_ written once but not
+read until much later). Such use-cases are particularly problematic
+because the page cache will eventually become a bottleneck to servicing
+new IO requests.
+
+For more context on DONTCACHE, please see these Linux commit headers:
+
+- Overview:  9ad6344568cc3 ("mm/filemap: change filemap_create_folio()
+  to take a struct kiocb")
+- for READ:  8026e49bff9b1 ("mm/filemap: add read support for
+  RWF_DONTCACHE")
+- for WRITE: 974c5e6139db3 ("xfs: flag as supporting FOP_DONTCACHE")
+
+NFSD_IO_DONTCACHE will fall back to NFSD_IO_BUFFERED if the underlying
+filesystem doesn't indicate support by setting FOP_DONTCACHE.
+
+NFSD DIRECT
+===========
+
+DIRECT IO doesn't make use of the page cache, as such it is able to
+avoid the Linux memory management's page reclaim scalability problems
+without resorting to the hybrid use of page cache that DONTCACHE does.
+
+Some workloads benefit from NFSD avoiding the page cache, particularly
+those with a working set that is significantly larger than available
+system memory. The pathological worst-case workload that NFSD DIRECT has
+proven to help most is: NFS client issuing large sequential IO to a file
+that is 2-3 times larger than the NFS server's available system memory.
+The reason for such improvement is NFSD DIRECT eliminates a lot of work
+that the memory management subsystem would otherwise be required to
+perform (e.g. page allocation, dirty writeback, page reclaim). When
+using NFSD DIRECT, kswapd and kcompactd are no longer commanding CPU
+time trying to find adequate free pages so that forward IO progress can
+be made.
+
+The performance win associated with using NFSD DIRECT was previously
+discussed on linux-nfs, see:
+https://lore.kernel.org/linux-nfs/aEslwqa9iMeZjjlV@kernel.org/
+
+But in summary:
+
+- NFSD DIRECT can significantly reduce memory requirements
+- NFSD DIRECT can reduce CPU load by avoiding costly page reclaim work
+- NFSD DIRECT can offer more deterministic IO performance
+
+As always, your mileage may vary and so it is important to carefully
+consider if/when it is beneficial to make use of NFSD DIRECT. When
+assessing comparative performance of your workload please be sure to log
+relevant performance metrics during testing (e.g. memory usage, cpu
+usage, IO performance). Using perf to collect perf data that may be used
+to generate a "flamegraph" for work Linux must perform on behalf of your
+test is a really meaningful way to compare the relative health of the
+system and how switching NFSD's IO mode changes what is observed.
+
+If NFSD_IO_DIRECT is specified by writing 2 (or 3 and 4 for WRITE) to
+NFSD's debugfs interfaces, ideally the IO will be aligned relative to
+the underlying block device's logical_block_size. Also the memory buffer
+used to store the READ or WRITE payload must be aligned relative to the
+underlying block device's dma_alignment.
+
+But NFSD DIRECT does handle misaligned IO in terms of O_DIRECT as best
+it can:
+
+Misaligned READ:
+    If NFSD_IO_DIRECT is used, expand any misaligned READ to the next
+    DIO-aligned block (on either end of the READ). The expanded READ is
+    verified to have proper offset/len (logical_block_size) and
+    dma_alignment checking.
+
+Misaligned WRITE:
+    If NFSD_IO_DIRECT is used, split any misaligned WRITE into a start,
+    middle and end as needed. The large middle segment is DIO-aligned
+    and the start and/or end are misaligned. Buffered IO is used for the
+    misaligned segments and O_DIRECT is used for the middle DIO-aligned
+    segment. DONTCACHE buffered IO is _not_ used for the misaligned
+    segments because using normal buffered IO offers significant RMW
+    performance benefit when handling streaming misaligned WRITEs.
+
+Tracing:
+    The nfsd_read_direct trace event shows how NFSD expands any
+    misaligned READ to the next DIO-aligned block (on either end of the
+    original READ, as needed).
+
+    This combination of trace events is useful for READs::
+
+      echo 1 > /sys/kernel/tracing/events/nfsd/nfsd_read_vector/enable
+      echo 1 > /sys/kernel/tracing/events/nfsd/nfsd_read_direct/enable
+      echo 1 > /sys/kernel/tracing/events/nfsd/nfsd_read_io_done/enable
+      echo 1 > /sys/kernel/tracing/events/xfs/xfs_file_direct_read/enable
+
+    The nfsd_write_direct trace event shows how NFSD splits a given
+    misaligned WRITE into a DIO-aligned middle segment.
+
+    This combination of trace events is useful for WRITEs::
+
+      echo 1 > /sys/kernel/tracing/events/nfsd/nfsd_write_opened/enable
+      echo 1 > /sys/kernel/tracing/events/nfsd/nfsd_write_direct/enable
+      echo 1 > /sys/kernel/tracing/events/nfsd/nfsd_write_io_done/enable
+      echo 1 > /sys/kernel/tracing/events/xfs/xfs_file_direct_write/enable
diff --git a/Documentation/filesystems/nfs/nfsd-maintainer-entry-profile.rst b/Documentation/filesystems/nfs/nfsd-maintainer-entry-profile.rst
new file mode 100644
index 000000000000..4d6b57dbab2a
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfsd-maintainer-entry-profile.rst
@@ -0,0 +1,547 @@
+NFSD Maintainer Entry Profile
+=============================
+
+A Maintainer Entry Profile supplements the top-level process
+documents (found in Documentation/process/) with customs that are
+specific to a subsystem and its maintainers. A contributor may use
+this document to set their expectations and avoid common mistakes.
+A maintainer may use these profiles to look across subsystems for
+opportunities to converge on best common practices.
+
+Overview
+--------
+The Network File System (NFS) is a standardized family of network
+protocols that enable access to files across a set of network-
+connected peer hosts. Applications on NFS clients access files that
+reside on file systems that are shared by NFS servers. A single
+network peer can act as both an NFS client and an NFS server.
+
+NFSD refers to the NFS server implementation included in the Linux
+kernel. An in-kernel NFS server has fast access to files stored
+in file systems local to that server. NFSD can share files stored
+on most of the file system types native to Linux, including xfs,
+ext4, btrfs, and tmpfs.
+
+Mailing list
+------------
+The linux-nfs@vger.kernel.org mailing list is a public list. Its
+purpose is to enable collaboration among developers working on the
+Linux NFS stack, both client and server. It is not a place for
+conversations that are not related directly to the Linux NFS stack.
+
+The linux-nfs mailing list is archived on `lore.kernel.org <https://lore.kernel.org/linux-nfs/>`_.
+
+The Linux NFS community does not have any chat room.
+
+Reporting bugs
+--------------
+If you experience an NFSD-related bug on a distribution-built
+kernel, please start by working with your Linux distributor.
+
+Bug reports against upstream Linux code bases are welcome on the
+linux-nfs@vger.kernel.org mailing list, where some active triage
+can be done. NFSD bugs may also be reported in the Linux kernel
+community's bugzilla at:
+
+  https://bugzilla.kernel.org
+
+Please file NFSD-related bugs under the "Filesystems/NFSD"
+component. In general, including as much detail as possible is a
+good start, including pertinent system log messages from both
+the client and server.
+
+User space software related to NFSD, such as mountd or the exportfs
+command, is contained in the nfs-utils package. Report problems
+with those components to linux-nfs@vger.kernel.org. You might be
+directed to move the report to a specific bug tracker.
+
+Contributor's Guide
+-------------------
+
+Standards compliance
+~~~~~~~~~~~~~~~~~~~~
+The priority is for NFSD to interoperate fully with the Linux NFS
+client. We also test against other popular NFS client implementa-
+tions regularly at NFS bake-a-thon events (also known as plug-
+fests). Non-Linux NFS clients are not part of upstream NFSD CI/CD.
+
+The NFSD community strives to provide an NFS server implementation
+that interoperates with all standards-compliant NFS client
+implementations. This is done by staying as close as is sensible to
+the normative mandates in the IETF's published NFS, RPC, and GSS-API
+standards.
+
+It is always useful to reference an RFC and section number in a code
+comment where behavior deviates from the standard (and even when the
+behavior is compliant but the implementation is obfuscatory).
+
+On the rare occasion when a deviation from standard-mandated
+behavior is needed, brief documentation of the use case or
+deficiencies in the standard is a required part of in-code
+documentation.
+
+Care must always be taken to avoid leaking local error codes (ie,
+errnos) to clients of NFSD. A proper NFS status code is always
+required in NFS protocol replies.
+
+NFSD administrative interfaces
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NFSD administrative interfaces include:
+
+- an NFSD or SUNRPC module parameter
+
+- export options in /etc/exports
+
+- files under /proc/fs/nfsd/ or /proc/sys/sunrpc/
+
+- the NFSD netlink protocol
+
+Frequently, a request is made to introduce or modify one of NFSD's
+traditional administrative interfaces. Certainly it is technically
+easy to introduce a new administrative setting. However, there are
+good reasons why the NFSD maintainers prefer to leave that as a last
+resort:
+
+- As with any API, administrative interfaces are difficult to get
+  right.
+
+- Once they are documented and have a legacy of use, administrative
+  interfaces become difficult to modify or remove.
+
+- Every new administrative setting multiplies the NFSD test matrix.
+
+- The cost of one administrative interface is incremental, but costs
+  add up across all of the existing interfaces.
+
+It is often better for everyone if effort is made up front to
+understanding the underlying requirement of the new setting, and
+then trying to make it tune itself (or to become otherwise
+unnecessary).
+
+If a new setting is indeed necessary, first consider adding it to
+the NFSD netlink protocol. Or if it doesn't need to be a reliable
+long term user space feature, it can be added to NFSD's menagerie of
+experimental settings which reside under /sys/kernel/debug/nfsd/ .
+
+Field observability
+~~~~~~~~~~~~~~~~~~~
+NFSD employs several different mechanisms for observing operation,
+including counters, printks, WARNings, and static trace points. Each
+have their strengths and weaknesses. Contributors should select the
+most appropriate tool for their task.
+
+- BUG must be avoided if at all possible, as it will frequently
+  result in a full system crash.
+
+- WARN is appropriate only when a full stack trace is useful.
+
+- printk can show detailed information. These must not be used
+  in code paths where they can be triggered repeatedly by remote
+  users.
+
+- dprintk can show detailed information, but can be enabled only
+  in pre-set groups. The overhead of emitting output makes dprintk
+  inappropriate for frequent operations like I/O.
+
+- Counters are always on, but provide little information about
+  individual events other than how frequently they occur.
+
+- static trace points can be enabled individually or in groups
+  (via a glob). These are generally low overhead, and thus are
+  favored for use in hot paths.
+
+- dynamic tracing, such as kprobes or eBPF, are quite flexible but
+  cannot be used in certain environments (eg, full kernel lock-
+  down).
+
+Testing
+~~~~~~~
+The kdevops project
+
+  https://github.com/linux-kdevops/kdevops
+
+contains several NFS-specific workflows, as well as the community
+standard fstests suite. These workflows are based on open source
+testing tools such as ltp and fio. Contributors are encouraged to
+use these tools without kdevops, or contributors should install and
+use kdevops themselves to verify their patches before submission.
+
+Coding style
+~~~~~~~~~~~~
+Follow the coding style preferences described in
+
+  Documentation/process/coding-style.rst
+
+with the following exceptions:
+
+- Add new local variables to a function in reverse Christmas tree
+  order
+
+- Use the kdoc comment style for
+  + non-static functions
+  + static inline functions
+  + static functions that are callbacks/virtual functions
+
+- All new function names start with ``nfsd_`` for non-NFS-version-
+  specific functions.
+
+- New function names that are specific to NFSv2 or NFSv3, or are
+  used by all minor versions of NFSv4, use ``nfsdN_`` where N is
+  the version.
+
+- New function names specific to an NFSv4 minor version can be
+  named with ``nfsd4M_`` where M is the minor version.
+
+Patch preparation
+~~~~~~~~~~~~~~~~~
+Read and follow all guidelines in
+
+  Documentation/process/submitting-patches.rst
+
+Use tagging to identify all patch authors. However, reviewers and
+testers should be added by replying to the email patch submission.
+Email is extensively used in order to publicly archive review and
+testing attributions. These tags are automatically inserted into
+your patches when they are applied.
+
+The code in the body of the diff already shows /what/ is being
+changed. Thus it is not necessary to repeat that in the patch
+description. Instead, the description should contain one or more
+of:
+
+- A brief problem statement ("what is this patch trying to fix?")
+  with a root-cause analysis.
+
+- End-user visible symptoms or items that a support engineer might
+  use to search for the patch, like stack traces.
+
+- A brief explanation of why the patch is the best way to address
+  the problem.
+
+- Any context that reviewers might need to understand the changes
+  made by the patch.
+
+- Any relevant benchmarking results, and/or functional test results.
+
+As detailed in Documentation/process/submitting-patches.rst,
+identify the point in history that the issue being addressed was
+introduced by using a Fixes: tag.
+
+Mention in the patch description if that point in history cannot be
+determined -- that is, no Fixes: tag can be provided. In this case,
+please make it clear to maintainers whether an LTS backport is
+needed even though there is no Fixes: tag.
+
+The NFSD maintainers prefer to add stable tagging themselves, after
+public discussion in response to the patch submission. Contributors
+may suggest stable tagging, but be aware that many version
+management tools add such stable Cc's when you post your patches.
+Don't add "Cc: stable" unless you are absolutely sure the patch
+needs to go to stable during the initial submission process.
+
+Patch submission
+~~~~~~~~~~~~~~~~
+Patches to NFSD are submitted via the kernel's email-based review
+process that is common to most other kernel subsystems.
+
+Just before each submission, rebase your patch or series on the
+nfsd-testing branch at
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
+
+The NFSD subsystem is maintained separately from the Linux in-kernel
+NFS client. The NFSD maintainers do not normally take submissions
+for client changes, nor can they respond authoritatively to bug
+reports or feature requests for NFS client code.
+
+This means that contributors might be asked to resubmit patches if
+they were emailed to the incorrect set of maintainers and reviewers.
+This is not a rejection, but simply a correction of the submission
+process.
+
+When in doubt, consult the NFSD entry in the MAINTAINERS file to
+see which files and directories fall under the NFSD subsystem.
+
+The proper set of email addresses for NFSD patches are:
+
+To: the NFSD maintainers and reviewers listed in MAINTAINERS
+Cc: linux-nfs@vger.kernel.org and optionally linux-kernel@
+
+If there are other subsystems involved in the patches (for example
+MM or RDMA) their primary mailing list address can be included in
+the Cc: field. Other contributors and interested parties may be
+included there as well.
+
+In general we prefer that contributors use common patch email tools
+such as "git send-email" or "stg email format/send", which tend to
+get the details right without a lot of fuss.
+
+A series consisting of a single patch is not required to have a
+cover letter. However, a cover letter can be included if there is
+substantial context that is not appropriate to include in the
+patch description.
+
+Please note that, with an e-mail based submission process, series
+cover letters are not part of the work that is committed to the
+kernel source code base or its commit history. Therefore always try
+to keep pertinent information in the patch descriptions.
+
+Design documentation is welcome, but as cover letters are not
+preserved, a perhaps better option is to include a patch that adds
+such documentation under Documentation/filesystems/nfs/.
+
+Reviewers will ask about test coverage and what use cases the
+patches are expected to address. Please be prepared to answer these
+questions.
+
+Review comments from maintainers might be politely stated, but in
+general, these are not optional to address when they are actionable.
+If necessary, the maintainers retain the right to not apply patches
+when contributors refuse to address reasonable requests.
+
+Post changes to kernel source code and user space source code as
+separate series. You can connect the two series with comments in
+your cover letters.
+
+Generally the NFSD maintainers ask for a reposts even for simple
+modifications in order to publicly archive the request and the
+resulting repost before it is pulled into the NFSD trees. This
+also enables us to rebuild a patch series quickly without missing
+changes that might have been discussed via email.
+
+Avoid frequently reposting large series with only small changes. As
+a rule of thumb, posting substantial changes more than once a week
+will result in reviewer overload.
+
+Remember, there are only a handful of subsystem maintainers and
+reviewers, but potentially many sources of contributions. The
+maintainers and reviewers, therefore, are always the less scalable
+resource. Be kind to your friendly neighborhood maintainer.
+
+Patch Acceptance
+~~~~~~~~~~~~~~~~
+There isn't a formal review process for NFSD, but we like to see
+at least two Reviewed-by: notices for patches that are more than
+simple clean-ups. Reviews are done in public on
+linux-nfs@vger.kernel.org and are archived on lore.kernel.org.
+
+Currently the NFSD patch queues are maintained in branches here:
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
+
+The NFSD maintainers apply patches initially to the nfsd-testing
+branch, which is always open to new submissions. Patches can be
+applied while review is ongoing. nfsd-testing is a topic branch,
+so it can change frequently, it will be rebased, and your patch
+might get dropped if there is a problem with it.
+
+Generally a script-generated "thank you" email will indicate when
+your patch has been added to the nfsd-testing branch. You can track
+the progress of your patch using the linux-nfs patchworks instance:
+
+  https://patchwork.kernel.org/project/linux-nfs/list/
+
+While your patch is in nfsd-testing, it is exposed to a variety of
+test environments, including community zero-day bots, static
+analysis tools, and NFSD continuous integration testing. The soak
+period is three to four weeks.
+
+Each patch that survives in nfsd-testing for the soak period without
+changes is moved to the nfsd-next branch.
+
+The nfsd-next branch is automatically merged into linux-next and
+fs-next on a nightly basis.
+
+Patches that survive in nfsd-next are included in the next NFSD
+merge window pull request. These windows typically occur once every
+63 days (nine weeks).
+
+When the upstream merge window closes, the nfsd-next branch is
+renamed nfsd-fixes, and a new nfsd-next branch is created, based on
+the upstream -rc1 tag.
+
+Fixes that are destined for an upstream -rc release also run the
+nfsd-testing gauntlet, but are then applied to the nfsd-fixes
+branch. That branch is made available for Linus to pull after a
+short time. In order to limit the risk of introducing regressions,
+we limit such fixes to emergency situations or fixes to breakage
+that occurred during the most recent upstream merge.
+
+Please make it clear when submitting an emergency patch that
+immediate action (either application to -rc or LTS backport) is
+needed.
+
+Sensitive patch submissions and bug reports
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+CVEs are generated by specific members of the Linux kernel community
+and several external entities. The Linux NFS community does not emit
+or assign CVEs. CVEs are assigned after an issue and its fix are
+known.
+
+However, the NFSD maintainers sometimes receive sensitive security
+reports, and at times these are significant enough to need to be
+embargoed. In such rare cases, fixes can be developed and reviewed
+out of the public eye.
+
+Please be aware that many version management tools add the stable
+Cc's when you post your patches. This is generally a nuisance, but
+it can result in outing an embargoed security issue accidentally.
+Don't add "Cc: stable" unless you are absolutely sure the patch
+needs to go to stable@ during the initial submission process.
+
+Patches that are merged without ever appearing on any list, and
+which carry a Reported-by: or Fixes: tag are detected as suspicious
+by security-focused people. We encourage that, after any private
+review, security-sensitive patches should be posted to linux-nfs@
+for the usual public review, archiving, and test period.
+
+LLM-generated submissions
+~~~~~~~~~~~~~~~~~~~~~~~~~
+The Linux kernel community as a whole is still exploring the new
+world of LLM-generated code. The NFSD maintainers will entertain
+submission of patches that are partially or wholly generated by
+LLM-based development tools. Such submissions are held to the
+same standards as submissions created entirely by human authors:
+
+- The human contributor identifies themselves via a Signed-off-by:
+  tag. This tag counts as a DoC.
+
+- The human contributor is solely responsible for code provenance
+  and any contamination by inadvertently-included code with a
+  conflicting license, as usual.
+
+- The human contributor must be able to answer and address review
+  questions. A patch description such as "This fixed my problem
+  but I don't know why" is not acceptable.
+
+- The contribution is subjected to the same test regimen as all
+  other submissions.
+
+- An indication (via a Generated-by: tag or otherwise) that the
+  contribution is LLM-generated is not required.
+
+It is easy to address review comments and fix requests in LLM
+generated code. So easy, in fact, that it becomes tempting to repost
+refreshed code immediately. Please resist that temptation.
+
+As always, please avoid reposting series revisions more than once
+every 24 hours.
+
+Clean-up patches
+~~~~~~~~~~~~~~~~
+The NFSD maintainers discourage patches which perform simple clean-
+ups, which are not in the context of other work. For example:
+
+* Addressing ``checkpatch.pl`` warnings after merge
+* Addressing :ref:`Local variable ordering<rcs>` issues
+* Addressing long-standing whitespace damage
+
+This is because it is felt that the churn that such changes produce
+comes at a greater cost than the value of such clean-ups.
+
+Conversely, spelling and grammar fixes are encouraged.
+
+Stable and LTS support
+----------------------
+Upstream NFSD continuous integration testing runs against LTS trees
+whenever they are updated.
+
+Please indicate when a patch containing a fix needs to be considered
+for LTS kernels, either via a Fixes: tag or explicit mention.
+
+Feature requests
+----------------
+There is no one way to make an official feature request, but
+discussion about the request should eventually make its way to
+the linux-nfs@vger.kernel.org mailing list for public review by
+the community.
+
+Subsystem boundaries
+~~~~~~~~~~~~~~~~~~~~
+NFSD itself is not much more than a protocol engine. This means its
+primary responsibility is to translate the NFS protocol into API
+calls in the Linux kernel. For example, NFSD is not responsible for
+knowing exactly how bytes or file attributes are managed on a block
+device. It relies on other kernel subsystems for that.
+
+If the subsystems on which NFSD relies do not implement a particular
+feature, even if the standard NFS protocols do support that feature,
+that usually means NFSD cannot provide that feature without
+substantial development work in other areas of the kernel.
+
+Specificity
+~~~~~~~~~~~
+Feature requests can come from anywhere, and thus can often be
+nebulous. A requester might not understand what a "use case" or
+"user story" is. These descriptive paradigms are often used by
+developers and architects to understand what is required of a
+design, but are terms of art in the software trade, not used in
+the everyday world.
+
+In order to prevent contributors and maintainers from becoming
+overwhelmed, we won't be afraid of saying "no" politely to
+underspecified requests.
+
+Community roles and their authority
+-----------------------------------
+The purpose of Linux subsystem communities is to provide expertise
+and active stewardship of a narrow set of source files in the Linux
+kernel. This can include managing user space tooling as well.
+
+To contextualize the structure of the Linux NFS community that
+is responsible for stewardship of the NFS server code base, we
+define the community roles here.
+
+- **Contributor** : Anyone who submits a code change, bug fix,
+  recommendation, documentation fix, and so on. A contributor can
+  submit regularly or infrequently.
+
+- **Outside Contributor** : A contributor who is not a regular actor
+  in the Linux NFS community. This can mean someone who contributes
+  to other parts of the kernel, or someone who just noticed a
+  misspelling in a comment and sent a patch.
+
+- **Reviewer** : Someone who is named in the MAINTAINERS file as a
+  reviewer is an area expert who can request changes to contributed
+  code, and expects that contributors will address the request.
+
+- **External Reviewer** : Someone who is not named in the
+  MAINTAINERS file as a reviewer, but who is an area expert.
+  Examples include Linux kernel contributors with networking,
+  security, or persistent storage expertise, or developers who
+  contribute primarily to other NFS implementations.
+
+One or more people will take on the following roles. These people
+are often generically referred to as "maintainers", and are
+identified in the MAINTAINERS file with the "M:" tag under the NFSD
+subsystem.
+
+- **Upstream Release Manager** : This role is responsible for
+  curating contributions into a branch, reviewing test results, and
+  then sending a pull request during merge windows. There is a
+  trust relationship between the release manager and Linus.
+
+- **Bug Triager** : Someone who is a first responder to bug reports
+  submitted to the linux-nfs mailing list or bug trackers, and helps
+  troubleshoot and identify next steps.
+
+- **Security Lead** : The security lead handles contacts from the
+  security community to resolve immediate issues, as well as dealing
+  with long-term security issues such as supply chain concerns. For
+  upstream, that's usually whether contributions violate licensing
+  or other intellectual property agreements.
+
+- **Testing Lead** : The testing lead builds and runs the test
+  infrastructure for the subsystem. The testing lead may ask for
+  patches to be dropped because of ongoing high defect rates.
+
+- **LTS Maintainer** : The LTS maintainer is responsible for managing
+  the Fixes: and Cc: stable annotations on patches, and seeing that
+  patches that cannot be automatically applied to LTS kernels get
+  proper manual backports as necessary.
+
+- **Community Manager** : This umpire role can be asked to call balls
+  and strikes during conflicts, but is also responsible for ensuring
+  the health of the relationships within the community and for
+  facilitating discussions on long-term topics such as how to manage
+  growing technical debt.
diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst
index d36dd892a78a..6020d188e13d 100644
--- a/Documentation/maintainer/maintainer-entry-profile.rst
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@@ -110,5 +110,6 @@ to do something different in the near future.
    ../process/maintainer-netdev
    ../driver-api/vfio-pci-device-specific-driver-acceptance
    ../nvme/feature-and-quirk-policy
+   ../filesystems/nfs/nfsd-maintainer-entry-profile
    ../filesystems/xfs/xfs-maintainer-entry-profile
    ../mm/damon/maintainer-profile
diff --git a/MAINTAINERS b/MAINTAINERS
index 6a28883348ca..78b32a60849a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13654,6 +13654,7 @@ R:	Dai Ngo <Dai.Ngo@oracle.com>
 R:	Tom Talpey <tom@talpey.com>
 L:	linux-nfs@vger.kernel.org
 S:	Supported
+P:	Documentation/filesystems/nfs/nfsd-maintainer-entry-profile.rst
 B:	https://bugzilla.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
 F:	Documentation/filesystems/nfs/
@@ -13673,6 +13674,10 @@ F:	include/uapi/linux/sunrpc/
 F:	net/sunrpc/
 F:	tools/net/sunrpc/
 
+KERNEL NFSD BLOCK and SCSI LAYOUT DRIVER
+R:	Christoph Hellwig <hch@lst.de>
+F:	fs/nfsd/blocklayout*
+
 KERNEL PACMAN PACKAGING (in addition to generic KERNEL BUILD)
 M:	Thomas Weißschuh <linux@weissschuh.net>
 R:	Christian Heusel <christian@heusel.eu>
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index a31dc9588eb8..3a3d05cfe09a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -495,6 +495,9 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_end,
 				wait);
 
+	if (nlmsvc_file_cannot_lock(file))
+		return nlm_lck_denied_nolocks;
+
 	if (!locks_can_async_lock(nlmsvc_file_file(file)->f_op)) {
 		async_block = wait;
 		wait = 0;
@@ -621,6 +624,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
+	if (nlmsvc_file_cannot_lock(file))
+		return nlm_lck_denied_nolocks;
+
 	if (locks_in_grace(SVC_NET(rqstp))) {
 		ret = nlm_lck_denied_grace_period;
 		goto out;
@@ -678,6 +684,9 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
+	if (nlmsvc_file_cannot_lock(file))
+		return nlm_lck_denied_nolocks;
+
 	/* First, cancel any lock that might be there */
 	nlmsvc_cancel_blocked(net, file, lock);
 
@@ -715,6 +724,9 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
+	if (nlmsvc_file_cannot_lock(file))
+		return nlm_lck_denied_nolocks;
+
 	if (locks_in_grace(net))
 		return nlm_lck_denied_grace_period;
 
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index ade4931b2da2..88c81ce1148d 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -32,6 +32,9 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
 	struct xdr_netobj	*oh = &argp->lock.oh;
 	u8			*ohdata;
 
+	if (nlmsvc_file_cannot_lock(file))
+		return nlm_lck_denied_nolocks;
+
 	for (share = file->f_shares; share; share = share->s_next) {
 		if (share->s_host == host && nlm_cmp_owner(share, oh))
 			goto update;
@@ -72,6 +75,9 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
 	struct nlm_share	*share, **shpp;
 	struct xdr_netobj	*oh = &argp->lock.oh;
 
+	if (nlmsvc_file_cannot_lock(file))
+		return nlm_lck_denied_nolocks;
+
 	for (shpp = &file->f_shares; (share = *shpp) != NULL;
 					shpp = &share->s_next) {
 		if (share->s_host == host && nlm_cmp_owner(share, oh)) {
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index e134dce45e35..0b5c1a0bf1cf 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -5,6 +5,7 @@ config NFSD
 	depends on FILE_LOCKING
 	depends on FSNOTIFY
 	select CRC32
+	select CRYPTO_LIB_MD5 if NFSD_LEGACY_CLIENT_TRACKING
 	select CRYPTO_LIB_SHA256 if NFSD_V4
 	select LOCKD
 	select SUNRPC
@@ -77,8 +78,7 @@ config NFSD_V4
 	depends on NFSD && PROC_FS
 	select FS_POSIX_ACL
 	select RPCSEC_GSS_KRB5
-	select CRYPTO
-	select CRYPTO_MD5
+	select CRYPTO # required by RPCSEC_GSS_KRB5
 	select GRACE_PERIOD
 	select NFS_V4_2_SSC_HELPER if NFS_V4_2
 	help
@@ -164,7 +164,7 @@ config NFSD_V4_SECURITY_LABEL
 config NFSD_LEGACY_CLIENT_TRACKING
 	bool "Support legacy NFSv4 client tracking methods (DEPRECATED)"
 	depends on NFSD_V4
-	default y
+	default n
 	help
 	  The NFSv4 server needs to store a small amount of information on
 	  stable storage in order to handle state recovery after reboot. Most
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fde5539cf6a6..afa16d7a8013 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -13,70 +13,49 @@
 #include "pnfs.h"
 #include "filecache.h"
 #include "vfs.h"
+#include "trace.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
 
 
+/*
+ * Get an extent from the file system that starts at offset or below
+ * and may be shorter than the requested length.
+ */
 static __be32
-nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
-		const struct svc_fh *fhp, struct nfsd4_layoutget *args)
+nfsd4_block_map_extent(struct inode *inode, const struct svc_fh *fhp,
+		u64 offset, u64 length, u32 iomode, u64 minlength,
+		struct pnfs_block_extent *bex)
 {
-	struct nfsd4_layout_seg *seg = &args->lg_seg;
 	struct super_block *sb = inode->i_sb;
-	u32 block_size = i_blocksize(inode);
-	struct pnfs_block_extent *bex;
 	struct iomap iomap;
 	u32 device_generation = 0;
 	int error;
 
-	if (locks_in_grace(SVC_NET(rqstp)))
-		return nfserr_grace;
-
-	if (seg->offset & (block_size - 1)) {
-		dprintk("pnfsd: I/O misaligned\n");
-		goto out_layoutunavailable;
-	}
-
-	/*
-	 * Some clients barf on non-zero block numbers for NONE or INVALID
-	 * layouts, so make sure to zero the whole structure.
-	 */
-	error = -ENOMEM;
-	bex = kzalloc(sizeof(*bex), GFP_KERNEL);
-	if (!bex)
-		goto out_error;
-	args->lg_content = bex;
-
-	error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length,
-					    &iomap, seg->iomode != IOMODE_READ,
-					    &device_generation);
+	error = sb->s_export_op->map_blocks(inode, offset, length, &iomap,
+			iomode != IOMODE_READ, &device_generation);
 	if (error) {
 		if (error == -ENXIO)
-			goto out_layoutunavailable;
-		goto out_error;
-	}
-
-	if (iomap.length < args->lg_minlength) {
-		dprintk("pnfsd: extent smaller than minlength\n");
-		goto out_layoutunavailable;
+			return nfserr_layoutunavailable;
+		return nfserrno(error);
 	}
 
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
-		if (seg->iomode == IOMODE_READ)
+		if (iomode == IOMODE_READ)
 			bex->es = PNFS_BLOCK_READ_DATA;
 		else
 			bex->es = PNFS_BLOCK_READWRITE_DATA;
 		bex->soff = iomap.addr;
 		break;
 	case IOMAP_UNWRITTEN:
-		if (seg->iomode & IOMODE_RW) {
+		if (iomode & IOMODE_RW) {
 			/*
 			 * Crack monkey special case from section 2.3.1.
 			 */
-			if (args->lg_minlength == 0) {
+			if (minlength == 0) {
 				dprintk("pnfsd: no soup for you!\n");
-				goto out_layoutunavailable;
+				return nfserr_layoutunavailable;
 			}
 
 			bex->es = PNFS_BLOCK_INVALID_DATA;
@@ -85,7 +64,7 @@ nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
 		}
 		fallthrough;
 	case IOMAP_HOLE:
-		if (seg->iomode == IOMODE_READ) {
+		if (iomode == IOMODE_READ) {
 			bex->es = PNFS_BLOCK_NONE_DATA;
 			break;
 		}
@@ -93,27 +72,107 @@ nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
 	case IOMAP_DELALLOC:
 	default:
 		WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
-		goto out_layoutunavailable;
+		return nfserr_layoutunavailable;
 	}
 
 	error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
 	if (error)
-		goto out_error;
+		return nfserrno(error);
+
 	bex->foff = iomap.offset;
 	bex->len = iomap.length;
+	return nfs_ok;
+}
 
-	seg->offset = iomap.offset;
-	seg->length = iomap.length;
+static __be32
+nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
+		const struct svc_fh *fhp, struct nfsd4_layoutget *args)
+{
+	struct nfsd4_layout_seg *seg = &args->lg_seg;
+	struct pnfs_block_layout *bl;
+	struct pnfs_block_extent *first_bex, *last_bex;
+	u64 offset = seg->offset, length = seg->length;
+	u32 i, nr_extents_max, block_size = i_blocksize(inode);
+	__be32 nfserr;
 
-	dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
-	return 0;
+	if (locks_in_grace(SVC_NET(rqstp)))
+		return nfserr_grace;
+
+	nfserr = nfserr_layoutunavailable;
+	if (seg->offset & (block_size - 1)) {
+		dprintk("pnfsd: I/O misaligned\n");
+		goto out_error;
+	}
+
+	/*
+	 * RFC 8881, section 3.3.17:
+	 *   The layout4 data type defines a layout for a file.
+	 *
+	 * RFC 8881, section 18.43.3:
+	 *   The loga_maxcount field specifies the maximum layout size
+	 *   (in bytes) that the client can handle. If the size of the
+	 *   layout structure exceeds the size specified by maxcount,
+	 *   the metadata server will return the NFS4ERR_TOOSMALL error.
+	 */
+	nfserr = nfserr_toosmall;
+	if (args->lg_maxcount < PNFS_BLOCK_LAYOUT4_SIZE +
+				PNFS_BLOCK_EXTENT_SIZE)
+		goto out_error;
+
+	/*
+	 * Limit the maximum layout size to avoid allocating
+	 * a large buffer on the server for each layout request.
+	 */
+	nr_extents_max = (min(args->lg_maxcount, PAGE_SIZE) -
+			  PNFS_BLOCK_LAYOUT4_SIZE) / PNFS_BLOCK_EXTENT_SIZE;
+
+	/*
+	 * Some clients barf on non-zero block numbers for NONE or INVALID
+	 * layouts, so make sure to zero the whole structure.
+	 */
+	nfserr = nfserrno(-ENOMEM);
+	bl = kzalloc(struct_size(bl, extents, nr_extents_max), GFP_KERNEL);
+	if (!bl)
+		goto out_error;
+	bl->nr_extents = nr_extents_max;
+	args->lg_content = bl;
+
+	for (i = 0; i < bl->nr_extents; i++) {
+		struct pnfs_block_extent *bex = bl->extents + i;
+		u64 bex_length;
+
+		nfserr = nfsd4_block_map_extent(inode, fhp, offset, length,
+				seg->iomode, args->lg_minlength, bex);
+		if (nfserr != nfs_ok)
+			goto out_error;
+
+		bex_length = bex->len - (offset - bex->foff);
+		if (bex_length >= length) {
+			bl->nr_extents = i + 1;
+			break;
+		}
+
+		offset = bex->foff + bex->len;
+		length -= bex_length;
+	}
+
+	first_bex = bl->extents;
+	last_bex = bl->extents + bl->nr_extents - 1;
+
+	nfserr = nfserr_layoutunavailable;
+	length = last_bex->foff + last_bex->len - seg->offset;
+	if (length < args->lg_minlength) {
+		dprintk("pnfsd: extent smaller than minlength\n");
+		goto out_error;
+	}
+
+	seg->offset = first_bex->foff;
+	seg->length = last_bex->foff - first_bex->foff + last_bex->len;
+	return nfs_ok;
 
 out_error:
 	seg->length = 0;
-	return nfserrno(error);
-out_layoutunavailable:
-	seg->length = 0;
-	return nfserr_layoutunavailable;
+	return nfserr;
 }
 
 static __be32
@@ -340,9 +399,12 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 {
 	struct nfs4_client *clp = ls->ls_stid.sc_client;
 	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
+	int status;
 
-	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
-			nfsd4_scsi_pr_key(clp), 0, true);
+	status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+			nfsd4_scsi_pr_key(clp),
+			PR_EXCLUSIVE_ACCESS_REG_ONLY, true);
+	trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
 }
 
 const struct nfsd4_layout_ops scsi_layout_ops = {
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index e50afe340737..196ef4245604 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -14,12 +14,25 @@
 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
 
 
+/**
+ * nfsd4_block_encode_layoutget - encode block/scsi layout extent array
+ * @xdr: stream for data encoding
+ * @lgp: layoutget content, actually an array of extents to encode
+ *
+ * Encode the opaque loc_body field in the layoutget response. Since the
+ * pnfs_block_layout4 and pnfs_scsi_layout4 structures on the wire are
+ * the same, this function is used by both layout drivers.
+ *
+ * Return values:
+ *   %nfs_ok: Success, all extents encoded into @xdr
+ *   %nfserr_toosmall: Not enough space in @xdr to encode all the data
+ */
 __be32
 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
 		const struct nfsd4_layoutget *lgp)
 {
-	const struct pnfs_block_extent *b = lgp->lg_content;
-	int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32);
+	const struct pnfs_block_layout *bl = lgp->lg_content;
+	u32 i, len = sizeof(__be32) + bl->nr_extents * PNFS_BLOCK_EXTENT_SIZE;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, sizeof(__be32) + len);
@@ -27,14 +40,19 @@ nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
 		return nfserr_toosmall;
 
 	*p++ = cpu_to_be32(len);
-	*p++ = cpu_to_be32(1);		/* we always return a single extent */
+	*p++ = cpu_to_be32(bl->nr_extents);
 
-	p = svcxdr_encode_deviceid4(p, &b->vol_id);
-	p = xdr_encode_hyper(p, b->foff);
-	p = xdr_encode_hyper(p, b->len);
-	p = xdr_encode_hyper(p, b->soff);
-	*p++ = cpu_to_be32(b->es);
-	return 0;
+	for (i = 0; i < bl->nr_extents; i++) {
+		const struct pnfs_block_extent *bex = bl->extents + i;
+
+		p = svcxdr_encode_deviceid4(p, &bex->vol_id);
+		p = xdr_encode_hyper(p, bex->foff);
+		p = xdr_encode_hyper(p, bex->len);
+		p = xdr_encode_hyper(p, bex->soff);
+		*p++ = cpu_to_be32(bex->es);
+	}
+
+	return nfs_ok;
 }
 
 static int
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 7d25ef689671..2e0c6c7d2b42 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -8,6 +8,15 @@
 struct iomap;
 struct xdr_stream;
 
+/* On the wire size of the layout4 struct with zero number of extents */
+#define PNFS_BLOCK_LAYOUT4_SIZE \
+	(sizeof(__be32) * 2 +	/* offset4 */ \
+	 sizeof(__be32) * 2 +	/* length4 */ \
+	 sizeof(__be32) +	/* layoutiomode4 */ \
+	 sizeof(__be32) +	/* layouttype4 */ \
+	 sizeof(__be32) +	/* number of bytes */ \
+	 sizeof(__be32))	/* number of extents */
+
 struct pnfs_block_extent {
 	struct nfsd4_deviceid		vol_id;
 	u64				foff;
@@ -21,6 +30,11 @@ struct pnfs_block_range {
 	u64				len;
 };
 
+struct pnfs_block_layout {
+	u32				nr_extents;
+	struct pnfs_block_extent	extents[] __counted_by(nr_extents);
+};
+
 /*
  * Random upper cap for the uuid length to avoid unbounded allocation.
  * Not actually limited by the protocol.
diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
index ed2b9e066206..7f44689e0a53 100644
--- a/fs/nfsd/debugfs.c
+++ b/fs/nfsd/debugfs.c
@@ -44,6 +44,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n");
  * Contents:
  *   %0: NFS READ will use buffered IO
  *   %1: NFS READ will use dontcache (buffered IO w/ dropbehind)
+ *   %2: NFS READ will use direct IO
  *
  * This setting takes immediate effect for all NFS versions,
  * all exports, and in all NFSD net namespaces.
@@ -64,6 +65,7 @@ static int nfsd_io_cache_read_set(void *data, u64 val)
 		nfsd_io_cache_read = NFSD_IO_BUFFERED;
 		break;
 	case NFSD_IO_DONTCACHE:
+	case NFSD_IO_DIRECT:
 		/*
 		 * Must disable splice_read when enabling
 		 * NFSD_IO_DONTCACHE.
@@ -106,6 +108,7 @@ static int nfsd_io_cache_write_set(void *data, u64 val)
 	switch (val) {
 	case NFSD_IO_BUFFERED:
 	case NFSD_IO_DONTCACHE:
+	case NFSD_IO_DIRECT:
 		nfsd_io_cache_write = val;
 		break;
 	default:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b39d4cbdfd35..441dfbfe2d2b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -32,7 +32,7 @@
 *
 */
 
-#include <crypto/hash.h>
+#include <crypto/md5.h>
 #include <crypto/sha2.h>
 #include <linux/file.h>
 #include <linux/slab.h>
@@ -92,79 +92,29 @@ nfs4_reset_creds(const struct cred *original)
 	put_cred(revert_creds(original));
 }
 
-static int
+static void
 nfs4_make_rec_clidname(char dname[HEXDIR_LEN], const struct xdr_netobj *clname)
 {
 	u8 digest[MD5_DIGEST_SIZE];
-	struct crypto_shash *tfm;
-	int status;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
-	tfm = crypto_alloc_shash("md5", 0, 0);
-	if (IS_ERR(tfm)) {
-		status = PTR_ERR(tfm);
-		goto out_no_tfm;
-	}
 
-	status = crypto_shash_tfm_digest(tfm, clname->data, clname->len,
-					 digest);
-	if (status)
-		goto out;
+	md5(clname->data, clname->len, digest);
 
 	static_assert(HEXDIR_LEN == 2 * MD5_DIGEST_SIZE + 1);
 	sprintf(dname, "%*phN", MD5_DIGEST_SIZE, digest);
-
-	status = 0;
-out:
-	crypto_free_shash(tfm);
-out_no_tfm:
-	return status;
-}
-
-/*
- * If we had an error generating the recdir name for the legacy tracker
- * then warn the admin. If the error doesn't appear to be transient,
- * then disable recovery tracking.
- */
-static void
-legacy_recdir_name_error(struct nfs4_client *clp, int error)
-{
-	printk(KERN_ERR "NFSD: unable to generate recoverydir "
-			"name (%d).\n", error);
-
-	/*
-	 * if the algorithm just doesn't exist, then disable the recovery
-	 * tracker altogether. The crypto libs will generally return this if
-	 * FIPS is enabled as well.
-	 */
-	if (error == -ENOENT) {
-		printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
-			"Reboot recovery will not function correctly!\n");
-		nfsd4_client_tracking_exit(clp->net);
-	}
 }
 
 static void
 __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
-		const char *dname, int len, struct nfsd_net *nn)
+				    char *dname, struct nfsd_net *nn)
 {
-	struct xdr_netobj name;
+	struct xdr_netobj name = { .len = strlen(dname), .data = dname };
 	struct xdr_netobj princhash = { .len = 0, .data = NULL };
 	struct nfs4_client_reclaim *crp;
 
-	name.data = kmemdup(dname, len, GFP_KERNEL);
-	if (!name.data) {
-		dprintk("%s: failed to allocate memory for name.data!\n",
-			__func__);
-		return;
-	}
-	name.len = len;
 	crp = nfs4_client_to_reclaim(name, princhash, nn);
-	if (!crp) {
-		kfree(name.data);
-		return;
-	}
 	crp->cr_clp = clp;
 }
 
@@ -182,9 +132,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	if (!nn->rec_file)
 		return;
 
-	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
-	if (status)
-		return legacy_recdir_name_error(clp, status);
+	nfs4_make_rec_clidname(dname, &clp->cl_name);
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
@@ -219,8 +167,7 @@ out_end:
 out:
 	if (status == 0) {
 		if (nn->in_grace)
-			__nfsd4_create_reclaim_record_grace(clp, dname,
-					HEXDIR_LEN, nn);
+			__nfsd4_create_reclaim_record_grace(clp, dname, nn);
 		vfs_fsync(nn->rec_file, 0);
 	} else {
 		printk(KERN_ERR "NFSD: failed to write recovery record"
@@ -233,7 +180,7 @@ out_creds:
 	nfs4_reset_creds(original_cred);
 }
 
-typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
+typedef int (recdir_func)(struct dentry *, char *, struct nfsd_net *);
 
 struct name_list {
 	char name[HEXDIR_LEN];
@@ -287,24 +234,14 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 	}
 
 	status = iterate_dir(nn->rec_file, &ctx.ctx);
-	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
 	list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
-		if (!status) {
-			struct dentry *dentry;
-			dentry = lookup_one(&nop_mnt_idmap,
-					    &QSTR(entry->name), dir);
-			if (IS_ERR(dentry)) {
-				status = PTR_ERR(dentry);
-				break;
-			}
-			status = f(dir, dentry, nn);
-			dput(dentry);
-		}
+		if (!status)
+			status = f(dir, entry->name, nn);
+
 		list_del(&entry->list);
 		kfree(entry);
 	}
-	inode_unlock(d_inode(dir));
 	nfs4_reset_creds(original_cred);
 
 	list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -364,9 +301,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
 
-	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
-	if (status)
-		return legacy_recdir_name_error(clp, status);
+	nfs4_make_rec_clidname(dname, &clp->cl_name);
 
 	status = mnt_want_write_file(nn->rec_file);
 	if (status)
@@ -394,18 +329,19 @@ out:
 }
 
 static int
-purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
+purge_old(struct dentry *parent, char *cname, struct nfsd_net *nn)
 {
 	int status;
+	struct dentry *child;
 	struct xdr_netobj name;
 
-	if (child->d_name.len != HEXDIR_LEN - 1) {
-		printk("%s: illegal name %pd in recovery directory\n",
-				__func__, child);
+	if (strlen(cname) != HEXDIR_LEN - 1) {
+		printk("%s: illegal name %s in recovery directory\n",
+				__func__, cname);
 		/* Keep trying; maybe the others are OK: */
 		return 0;
 	}
-	name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+	name.data = kstrdup(cname, GFP_KERNEL);
 	if (!name.data) {
 		dprintk("%s: failed to allocate memory for name.data!\n",
 			__func__);
@@ -415,10 +351,17 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 	if (nfs4_has_reclaimed_state(name, nn))
 		goto out_free;
 
-	status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL);
-	if (status)
-		printk("failed to remove client recovery directory %pd\n",
-				child);
+	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
+	child = lookup_one(&nop_mnt_idmap, &QSTR(cname), parent);
+	if (!IS_ERR(child)) {
+		status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL);
+		if (status)
+			printk("failed to remove client recovery directory %pd\n",
+			       child);
+		dput(child);
+	}
+	inode_unlock(d_inode(parent));
+
 out_free:
 	kfree(name.data);
 out:
@@ -449,27 +392,18 @@ out:
 }
 
 static int
-load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
+load_recdir(struct dentry *parent, char *cname, struct nfsd_net *nn)
 {
-	struct xdr_netobj name;
+	struct xdr_netobj name = { .len = HEXDIR_LEN, .data = cname };
 	struct xdr_netobj princhash = { .len = 0, .data = NULL };
 
-	if (child->d_name.len != HEXDIR_LEN - 1) {
-		printk("%s: illegal name %pd in recovery directory\n",
-				__func__, child);
+	if (strlen(cname) != HEXDIR_LEN - 1) {
+		printk("%s: illegal name %s in recovery directory\n",
+				__func__, cname);
 		/* Keep trying; maybe the others are OK: */
 		return 0;
 	}
-	name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
-	if (!name.data) {
-		dprintk("%s: failed to allocate memory for name.data!\n",
-			__func__);
-		goto out;
-	}
-	name.len = HEXDIR_LEN;
-	if (!nfs4_client_to_reclaim(name, princhash, nn))
-		kfree(name.data);
-out:
+	nfs4_client_to_reclaim(name, princhash, nn);
 	return 0;
 }
 
@@ -647,7 +581,6 @@ nfs4_recoverydir(void)
 static int
 nfsd4_check_legacy_client(struct nfs4_client *clp)
 {
-	int status;
 	char dname[HEXDIR_LEN];
 	struct nfs4_client_reclaim *crp;
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -657,11 +590,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return 0;
 
-	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
-	if (status) {
-		legacy_recdir_name_error(clp, status);
-		return status;
-	}
+	nfs4_make_rec_clidname(dname, &clp->cl_name);
 
 	/* look for it in the reclaim hashtable otherwise */
 	name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
@@ -767,6 +696,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 {
 	uint8_t cmd, princhashlen;
 	struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
+	char *namecopy __free(kfree) = NULL;
+	char *princhashcopy __free(kfree) = NULL;
 	uint16_t namelen;
 
 	if (get_user(cmd, &cmsg->cm_cmd)) {
@@ -784,19 +715,19 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 				dprintk("%s: invalid namelen (%u)", __func__, namelen);
 				return -EINVAL;
 			}
-			name.data = memdup_user(&ci->cc_name.cn_id, namelen);
-			if (IS_ERR(name.data))
-				return PTR_ERR(name.data);
+			namecopy = memdup_user(&ci->cc_name.cn_id, namelen);
+			if (IS_ERR(namecopy))
+				return PTR_ERR(namecopy);
+			name.data = namecopy;
 			name.len = namelen;
 			get_user(princhashlen, &ci->cc_princhash.cp_len);
 			if (princhashlen > 0) {
-				princhash.data = memdup_user(
-						&ci->cc_princhash.cp_data,
-						princhashlen);
-				if (IS_ERR(princhash.data)) {
-					kfree(name.data);
-					return PTR_ERR(princhash.data);
-				}
+				princhashcopy = memdup_user(
+					&ci->cc_princhash.cp_data,
+					princhashlen);
+				if (IS_ERR(princhashcopy))
+					return PTR_ERR(princhashcopy);
+				princhash.data = princhashcopy;
 				princhash.len = princhashlen;
 			} else
 				princhash.len = 0;
@@ -810,9 +741,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 				dprintk("%s: invalid namelen (%u)", __func__, namelen);
 				return -EINVAL;
 			}
-			name.data = memdup_user(&cnm->cn_id, namelen);
-			if (IS_ERR(name.data))
-				return PTR_ERR(name.data);
+			namecopy = memdup_user(&cnm->cn_id, namelen);
+			if (IS_ERR(namecopy))
+				return PTR_ERR(namecopy);
+			name.data = namecopy;
 			name.len = namelen;
 		}
 #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
@@ -820,15 +752,12 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 			struct cld_net *cn = nn->cld_net;
 
 			name.len = name.len - 5;
-			memmove(name.data, name.data + 5, name.len);
+			name.data = name.data + 5;
 			cn->cn_has_legacy = true;
 		}
 #endif
-		if (!nfs4_client_to_reclaim(name, princhash, nn)) {
-			kfree(name.data);
-			kfree(princhash.data);
+		if (!nfs4_client_to_reclaim(name, princhash, nn))
 			return -EFAULT;
-		}
 		return nn->client_tracking_ops->msglen;
 	}
 	return -EFAULT;
@@ -1254,13 +1183,10 @@ nfsd4_cld_check(struct nfs4_client *clp)
 
 #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
 	if (nn->cld_net->cn_has_legacy) {
-		int status;
 		char dname[HEXDIR_LEN];
 		struct xdr_netobj name;
 
-		status = nfs4_make_rec_clidname(dname, &clp->cl_name);
-		if (status)
-			return -ENOENT;
+		nfs4_make_rec_clidname(dname, &clp->cl_name);
 
 		name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
 		if (!name.data) {
@@ -1305,11 +1231,8 @@ nfsd4_cld_check_v2(struct nfs4_client *clp)
 	if (cn->cn_has_legacy) {
 		struct xdr_netobj name;
 		char dname[HEXDIR_LEN];
-		int status;
 
-		status = nfs4_make_rec_clidname(dname, &clp->cl_name);
-		if (status)
-			return -ENOENT;
+		nfs4_make_rec_clidname(dname, &clp->cl_name);
 
 		name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
 		if (!name.data) {
@@ -1682,11 +1605,7 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
 		return NULL;
 	}
 
-	copied = nfs4_make_rec_clidname(result + copied, name);
-	if (copied) {
-		kfree(result);
-		return NULL;
-	}
+	nfs4_make_rec_clidname(result + copied, name);
 
 	return result;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6791fc239dbd..808c24fb5c9a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3508,7 +3508,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 	free_svc_cred(&slot->sl_cred);
 	copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
 
-	if (!nfsd4_cache_this(resp)) {
+	if (!(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
 		slot->sl_flags &= ~NFSD4_SLOT_CACHED;
 		return;
 	}
@@ -3522,41 +3522,6 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 	return;
 }
 
-/*
- * Encode the replay sequence operation from the slot values.
- * If cachethis is FALSE encode the uncached rep error on the next
- * operation which sets resp->p and increments resp->opcnt for
- * nfs4svc_encode_compoundres.
- *
- */
-static __be32
-nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
-			  struct nfsd4_compoundres *resp)
-{
-	struct nfsd4_op *op;
-	struct nfsd4_slot *slot = resp->cstate.slot;
-
-	/* Encode the replayed sequence operation */
-	op = &args->ops[resp->opcnt - 1];
-	nfsd4_encode_operation(resp, op);
-
-	if (slot->sl_flags & NFSD4_SLOT_CACHED)
-		return op->status;
-	if (args->opcnt == 1) {
-		/*
-		 * The original operation wasn't a solo sequence--we
-		 * always cache those--so this retry must not match the
-		 * original:
-		 */
-		op->status = nfserr_seq_false_retry;
-	} else {
-		op = &args->ops[resp->opcnt++];
-		op->status = nfserr_retry_uncached_rep;
-		nfsd4_encode_operation(resp, op);
-	}
-	return op->status;
-}
-
 /*
  * The sequence operation is not cached because we can use the slot and
  * session values.
@@ -3565,17 +3530,30 @@ static __be32
 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 			 struct nfsd4_sequence *seq)
 {
+	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
 	struct nfsd4_slot *slot = resp->cstate.slot;
 	struct xdr_stream *xdr = resp->xdr;
 	__be32 *p;
-	__be32 status;
 
 	dprintk("--> %s slot %p\n", __func__, slot);
 
-	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
-	if (status)
-		return status;
+	/* Always encode the SEQUENCE response. */
+	nfsd4_encode_operation(resp, &args->ops[0]);
+	if (args->opcnt == 1)
+		/* A solo SEQUENCE - nothing was cached */
+		return args->ops[0].status;
 
+	if (!(slot->sl_flags & NFSD4_SLOT_CACHED)) {
+		/* We weren't asked to cache this. */
+		struct nfsd4_op *op;
+
+		op = &args->ops[resp->opcnt++];
+		op->status = nfserr_retry_uncached_rep;
+		nfsd4_encode_operation(resp, op);
+		return op->status;
+	}
+
+	/* return reply from cache */
 	p = xdr_reserve_space(xdr, slot->sl_datalen);
 	if (!p) {
 		WARN_ON_ONCE(1);
@@ -6362,11 +6340,6 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
 	return;
 out_no_deleg:
 	open->op_delegate_type = OPEN_DELEGATE_NONE;
-	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
-	    open->op_delegate_type != OPEN_DELEGATE_NONE) {
-		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
-		open->op_recall = true;
-	}
 
 	/* 4.1 client asking for a delegation? */
 	if (open->op_deleg_want)
@@ -8802,9 +8775,6 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
 
 /*
  * failure => all reset bets are off, nfserr_no_grace...
- *
- * The caller is responsible for freeing name.data if NULL is returned (it
- * will be freed in nfs4_remove_reclaim_record in the normal case).
  */
 struct nfs4_client_reclaim *
 nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
@@ -8813,6 +8783,22 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp;
 
+	name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+	if (!name.data) {
+		dprintk("%s: failed to allocate memory for name.data!\n",
+			__func__);
+		return NULL;
+	}
+	if (princhash.len) {
+		princhash.data = kmemdup(princhash.data, princhash.len, GFP_KERNEL);
+		if (!princhash.data) {
+			dprintk("%s: failed to allocate memory for princhash.data!\n",
+				__func__);
+			kfree(name.data);
+			return NULL;
+		}
+	} else
+		princhash.data = NULL;
 	crp = alloc_reclaim();
 	if (crp) {
 		strhashval = clientstr_hashval(name);
@@ -8824,6 +8810,9 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
 		crp->cr_princhash.len = princhash.len;
 		crp->cr_clp = NULL;
 		nn->reclaim_str_hashtbl_size++;
+	} else {
+		kfree(name.data);
+		kfree(princhash.data);
 	}
 	return crp;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 67bb9c0b9fcb..30ce5851fe4c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4472,7 +4472,7 @@ out_err:
 
 static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 				 struct nfsd4_read *read,
-				 struct file *file, unsigned long maxcount)
+				 unsigned long maxcount)
 {
 	struct xdr_stream *xdr = resp->xdr;
 	unsigned int base = xdr->buf->page_len & ~PAGE_MASK;
@@ -4480,18 +4480,30 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	__be32 zero = xdr_zero;
 	__be32 nfserr;
 
-	if (xdr_reserve_space_vec(xdr, maxcount) < 0)
-		return nfserr_resource;
-
-	nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
+	nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, read->rd_nf,
 				read->rd_offset, &maxcount, base,
 				&read->rd_eof);
 	read->rd_length = maxcount;
 	if (nfserr)
 		return nfserr;
+
+	/*
+	 * svcxdr_encode_opaque_pages() is not used here because
+	 * we don't want to encode subsequent results in this
+	 * COMPOUND into the xdr->buf's tail, but rather those
+	 * results should follow the NFS READ payload in the
+	 * buf's pages.
+	 */
+	if (xdr_reserve_space_vec(xdr, maxcount) < 0)
+		return nfserr_resource;
+
+	/*
+	 * Mark the buffer location of the NFS READ payload so that
+	 * direct placement-capable transports send only the
+	 * payload bytes out-of-band.
+	 */
 	if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount))
 		return nfserr_io;
-	xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount));
 
 	write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero,
 			       xdr_pad_size(maxcount));
@@ -4530,7 +4542,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (file->f_op->splice_read && splice_ok)
 		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
 	else
-		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+		nfserr = nfsd4_encode_readv(resp, read, maxcount);
 	if (nfserr) {
 		xdr_truncate_encode(xdr, eof_offset);
 		return nfserr;
@@ -5426,7 +5438,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 	if (file->f_op->splice_read && splice_ok)
 		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
 	else
-		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+		nfserr = nfsd4_encode_readv(resp, read, maxcount);
 	if (nfserr)
 		return nfserr;
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b752433c3c2c..e4263326ca4a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -160,6 +160,7 @@ enum {
 	/* Any new NFSD_IO enum value must be added at the end */
 	NFSD_IO_BUFFERED,
 	NFSD_IO_DONTCACHE,
+	NFSD_IO_DIRECT,
 };
 
 extern u64 nfsd_io_cache_read __read_mostly;
@@ -397,14 +398,13 @@ enum {
 #define	NFSD_CB_GETATTR_TIMEOUT		NFSD_DELEGRETURN_TIMEOUT
 
 /*
- * The following attributes are currently not supported by the NFSv4 server:
+ * The following attributes are not implemented by NFSD:
  *    ARCHIVE       (deprecated anyway)
  *    HIDDEN        (unlikely to be supported any time soon)
  *    MIMETYPE      (unlikely to be supported any time soon)
  *    QUOTA_*       (will be supported in a forthcoming patch)
  *    SYSTEM        (unlikely to be supported any time soon)
  *    TIME_BACKUP   (unlikely to be supported any time soon)
- *    TIME_CREATE   (unlikely to be supported any time soon)
  */
 #define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
 (FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 7057ddd7a0a8..b08ae85d53ef 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -249,27 +249,6 @@ int nfsd_nrthreads(struct net *net)
 	return rv;
 }
 
-static int nfsd_init_socks(struct net *net, const struct cred *cred)
-{
-	int error;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
-	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
-		return 0;
-
-	error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
-				SVC_SOCK_DEFAULTS, cred);
-	if (error < 0)
-		return error;
-
-	error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
-				SVC_SOCK_DEFAULTS, cred);
-	if (error < 0)
-		return error;
-
-	return 0;
-}
-
 static int nfsd_users = 0;
 
 static int nfsd_startup_generic(void)
@@ -377,9 +356,12 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred)
 	ret = nfsd_startup_generic();
 	if (ret)
 		return ret;
-	ret = nfsd_init_socks(net, cred);
-	if (ret)
+
+	if (list_empty(&nn->nfsd_serv->sv_permsocks)) {
+		pr_warn("NFSD: Failed to start, no listeners configured.\n");
+		ret = -EIO;
 		goto out_socks;
+	}
 
 	if (nfsd_needs_lockd(nn) && !nn->lockd_up) {
 		ret = lockd_up(net, cred);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 6e2c8e2aab10..5ae2a611e57f 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -464,10 +464,13 @@ DEFINE_EVENT(nfsd_io_class, nfsd_##name,	\
 DEFINE_NFSD_IO_EVENT(read_start);
 DEFINE_NFSD_IO_EVENT(read_splice);
 DEFINE_NFSD_IO_EVENT(read_vector);
+DEFINE_NFSD_IO_EVENT(read_direct);
 DEFINE_NFSD_IO_EVENT(read_io_done);
 DEFINE_NFSD_IO_EVENT(read_done);
 DEFINE_NFSD_IO_EVENT(write_start);
 DEFINE_NFSD_IO_EVENT(write_opened);
+DEFINE_NFSD_IO_EVENT(write_direct);
+DEFINE_NFSD_IO_EVENT(write_vector);
 DEFINE_NFSD_IO_EVENT(write_io_done);
 DEFINE_NFSD_IO_EVENT(write_done);
 DEFINE_NFSD_IO_EVENT(commit_start);
@@ -2613,6 +2616,44 @@ DEFINE_EVENT(nfsd_vfs_getattr_class, __name,		\
 DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr);
 DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs);
 
+DECLARE_EVENT_CLASS(nfsd_pnfs_class,
+	TP_PROTO(
+		const struct nfs4_client *clp,
+		const char *dev,
+		int error
+	),
+	TP_ARGS(clp, dev, error),
+	TP_STRUCT__entry(
+		__sockaddr(addr, sizeof(struct sockaddr_in6))
+		__field(unsigned int, netns_ino)
+		__string(dev, dev)
+		__field(int, error)
+	),
+	TP_fast_assign(
+		__assign_sockaddr(addr, &clp->cl_addr,
+				sizeof(struct sockaddr_in6));
+		__entry->netns_ino = clp->net->ns.inum;
+		__assign_str(dev);
+		__entry->error = error;
+	),
+	TP_printk("client=%pISpc nn=%d dev=%s error=%d",
+		__get_sockaddr(addr),
+		__entry->netns_ino,
+		__get_str(dev),
+		__entry->error
+	)
+);
+
+#define DEFINE_NFSD_PNFS_ERR_EVENT(name)		\
+DEFINE_EVENT(nfsd_pnfs_class, nfsd_pnfs_##name,	\
+	TP_PROTO(					\
+		const struct nfs4_client *clp,		\
+		const char *dev,				\
+		int error				\
+	),						\
+	TP_ARGS(clp, dev, error))
+
+DEFINE_NFSD_PNFS_ERR_EVENT(fence);
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 31cbf46b47b1..964cf922ad83 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1075,11 +1075,88 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
+/*
+ * The byte range of the client's READ request is expanded on both ends
+ * until it meets the underlying file system's direct I/O alignment
+ * requirements. After the internal read is complete, the byte range of
+ * the NFS READ payload is reduced to the byte range that was originally
+ * requested.
+ *
+ * Note that a direct read can be done only when the xdr_buf containing
+ * the NFS READ reply does not already have contents in its .pages array.
+ * This is due to potentially restrictive alignment requirements on the
+ * read buffer. When .page_len and @base are zero, the .pages array is
+ * guaranteed to be page-aligned.
+ */
+static noinline_for_stack __be32
+nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		 struct nfsd_file *nf, loff_t offset, unsigned long *count,
+		 u32 *eof)
+{
+	u64 dio_start, dio_end;
+	unsigned long v, total;
+	struct iov_iter iter;
+	struct kiocb kiocb;
+	ssize_t host_err;
+	size_t len;
+
+	init_sync_kiocb(&kiocb, nf->nf_file);
+	kiocb.ki_flags |= IOCB_DIRECT;
+
+	/* Read a properly-aligned region of bytes into rq_bvec */
+	dio_start = round_down(offset, nf->nf_dio_read_offset_align);
+	dio_end = round_up((u64)offset + *count, nf->nf_dio_read_offset_align);
+
+	kiocb.ki_pos = dio_start;
+
+	v = 0;
+	total = dio_end - dio_start;
+	while (total && v < rqstp->rq_maxpages &&
+	       rqstp->rq_next_page < rqstp->rq_page_end) {
+		len = min_t(size_t, total, PAGE_SIZE);
+		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
+			      len, 0);
+
+		total -= len;
+		++rqstp->rq_next_page;
+		++v;
+	}
+
+	trace_nfsd_read_direct(rqstp, fhp, offset, *count - total);
+	iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v,
+		      dio_end - dio_start - total);
+
+	host_err = vfs_iocb_iter_read(nf->nf_file, &kiocb, &iter);
+	if (host_err >= 0) {
+		unsigned int pad = offset - dio_start;
+
+		/* The returned payload starts after the pad */
+		rqstp->rq_res.page_base = pad;
+
+		/* Compute the count of bytes to be returned */
+		if (host_err > pad + *count)
+			host_err = *count;
+		else if (host_err > pad)
+			host_err -= pad;
+		else
+			host_err = 0;
+	} else if (unlikely(host_err == -EINVAL)) {
+		struct inode *inode = d_inode(fhp->fh_dentry);
+
+		pr_info_ratelimited("nfsd: Direct I/O alignment failure on %s/%ld\n",
+				    inode->i_sb->s_id, inode->i_ino);
+		host_err = -ESERVERFAULT;
+	}
+
+	return nfsd_finish_read(rqstp, fhp, nf->nf_file, offset, count,
+				eof, host_err);
+}
+
 /**
  * nfsd_iter_read - Perform a VFS read using an iterator
  * @rqstp: RPC transaction context
  * @fhp: file handle of file to be read
- * @file: opened struct file of file to be read
+ * @nf: opened struct nfsd_file of file to be read
  * @offset: starting byte offset
  * @count: IN: requested number of bytes; OUT: number of bytes read
  * @base: offset in first page of read buffer
@@ -1092,9 +1169,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
  * returned.
  */
 __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		      struct file *file, loff_t offset, unsigned long *count,
+		      struct nfsd_file *nf, loff_t offset, unsigned long *count,
 		      unsigned int base, u32 *eof)
 {
+	struct file *file = nf->nf_file;
 	unsigned long v, total;
 	struct iov_iter iter;
 	struct kiocb kiocb;
@@ -1106,6 +1184,12 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	switch (nfsd_io_cache_read) {
 	case NFSD_IO_BUFFERED:
 		break;
+	case NFSD_IO_DIRECT:
+		/* When dio_read_offset_align is zero, dio is not supported */
+		if (nf->nf_dio_read_offset_align && !rqstp->rq_res.page_len)
+			return nfsd_direct_read(rqstp, fhp, nf, offset,
+						count, eof);
+		fallthrough;
 	case NFSD_IO_DONTCACHE:
 		if (file->f_op->fop_flags & FOP_DONTCACHE)
 			kiocb.ki_flags = IOCB_DONTCACHE;
@@ -1116,18 +1200,20 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
 	v = 0;
 	total = *count;
-	while (total) {
+	while (total && v < rqstp->rq_maxpages &&
+	       rqstp->rq_next_page < rqstp->rq_page_end) {
 		len = min_t(size_t, total, PAGE_SIZE - base);
-		bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++),
+		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
 			      len, base);
+
 		total -= len;
+		++rqstp->rq_next_page;
 		++v;
 		base = 0;
 	}
-	WARN_ON_ONCE(v > rqstp->rq_maxpages);
 
-	trace_nfsd_read_vector(rqstp, fhp, offset, *count);
-	iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count);
+	trace_nfsd_read_vector(rqstp, fhp, offset, *count - total);
+	iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count - total);
 	host_err = vfs_iocb_iter_read(file, &kiocb, &iter);
 	return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
@@ -1169,6 +1255,136 @@ static int wait_for_concurrent_writes(struct file *file)
 	return err;
 }
 
+struct nfsd_write_dio_seg {
+	struct iov_iter			iter;
+	int				flags;
+};
+
+static unsigned long
+iov_iter_bvec_offset(const struct iov_iter *iter)
+{
+	return (unsigned long)(iter->bvec->bv_offset + iter->iov_offset);
+}
+
+static void
+nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
+			struct bio_vec *bvec, unsigned int nvecs,
+			unsigned long total, size_t start, size_t len,
+			struct kiocb *iocb)
+{
+	iov_iter_bvec(&segment->iter, ITER_SOURCE, bvec, nvecs, total);
+	if (start)
+		iov_iter_advance(&segment->iter, start);
+	iov_iter_truncate(&segment->iter, len);
+	segment->flags = iocb->ki_flags;
+}
+
+static unsigned int
+nfsd_write_dio_iters_init(struct nfsd_file *nf, struct bio_vec *bvec,
+			  unsigned int nvecs, struct kiocb *iocb,
+			  unsigned long total,
+			  struct nfsd_write_dio_seg segments[3])
+{
+	u32 offset_align = nf->nf_dio_offset_align;
+	loff_t prefix_end, orig_end, middle_end;
+	u32 mem_align = nf->nf_dio_mem_align;
+	size_t prefix, middle, suffix;
+	loff_t offset = iocb->ki_pos;
+	unsigned int nsegs = 0;
+
+	/*
+	 * Check if direct I/O is feasible for this write request.
+	 * If alignments are not available, the write is too small,
+	 * or no alignment can be found, fall back to buffered I/O.
+	 */
+	if (unlikely(!mem_align || !offset_align) ||
+	    unlikely(total < max(offset_align, mem_align)))
+		goto no_dio;
+
+	prefix_end = round_up(offset, offset_align);
+	orig_end = offset + total;
+	middle_end = round_down(orig_end, offset_align);
+
+	prefix = prefix_end - offset;
+	middle = middle_end - prefix_end;
+	suffix = orig_end - middle_end;
+
+	if (!middle)
+		goto no_dio;
+
+	if (prefix)
+		nfsd_write_dio_seg_init(&segments[nsegs++], bvec,
+					nvecs, total, 0, prefix, iocb);
+
+	nfsd_write_dio_seg_init(&segments[nsegs], bvec, nvecs,
+				total, prefix, middle, iocb);
+
+	/*
+	 * Check if the bvec iterator is aligned for direct I/O.
+	 *
+	 * bvecs generated from RPC receive buffers are contiguous: After
+	 * the first bvec, all subsequent bvecs start at bv_offset zero
+	 * (page-aligned). Therefore, only the first bvec is checked.
+	 */
+	if (iov_iter_bvec_offset(&segments[nsegs].iter) & (mem_align - 1))
+		goto no_dio;
+	segments[nsegs].flags |= IOCB_DIRECT;
+	nsegs++;
+
+	if (suffix)
+		nfsd_write_dio_seg_init(&segments[nsegs++], bvec, nvecs, total,
+					prefix + middle, suffix, iocb);
+
+	return nsegs;
+
+no_dio:
+	/* No DIO alignment possible - pack into single non-DIO segment. */
+	nfsd_write_dio_seg_init(&segments[0], bvec, nvecs, total, 0,
+				total, iocb);
+	return 1;
+}
+
+static noinline_for_stack int
+nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		  struct nfsd_file *nf, unsigned int nvecs,
+		  unsigned long *cnt, struct kiocb *kiocb)
+{
+	struct nfsd_write_dio_seg segments[3];
+	struct file *file = nf->nf_file;
+	unsigned int nsegs, i;
+	ssize_t host_err;
+
+	nsegs = nfsd_write_dio_iters_init(nf, rqstp->rq_bvec, nvecs,
+					  kiocb, *cnt, segments);
+
+	*cnt = 0;
+	for (i = 0; i < nsegs; i++) {
+		kiocb->ki_flags = segments[i].flags;
+		if (kiocb->ki_flags & IOCB_DIRECT)
+			trace_nfsd_write_direct(rqstp, fhp, kiocb->ki_pos,
+						segments[i].iter.count);
+		else {
+			trace_nfsd_write_vector(rqstp, fhp, kiocb->ki_pos,
+						segments[i].iter.count);
+			/*
+			 * Mark the I/O buffer as evict-able to reduce
+			 * memory contention.
+			 */
+			if (nf->nf_file->f_op->fop_flags & FOP_DONTCACHE)
+				kiocb->ki_flags |= IOCB_DONTCACHE;
+		}
+
+		host_err = vfs_iocb_iter_write(file, kiocb, &segments[i].iter);
+		if (host_err < 0)
+			return host_err;
+		*cnt += host_err;
+		if (host_err < segments[i].iter.count)
+			break;	/* partial write */
+	}
+
+	return 0;
+}
+
 /**
  * nfsd_vfs_write - write data to an already-open file
  * @rqstp: RPC execution context
@@ -1229,29 +1445,46 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		stable = NFS_UNSTABLE;
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = offset;
-	if (stable && !fhp->fh_use_wgather)
-		kiocb.ki_flags |= IOCB_DSYNC;
+	if (likely(!fhp->fh_use_wgather)) {
+		switch (stable) {
+		case NFS_FILE_SYNC:
+			/* persist data and timestamps */
+			kiocb.ki_flags |= IOCB_DSYNC | IOCB_SYNC;
+			break;
+		case NFS_DATA_SYNC:
+			/* persist data only */
+			kiocb.ki_flags |= IOCB_DSYNC;
+			break;
+		}
+	}
 
 	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
-	iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
+
 	since = READ_ONCE(file->f_wb_err);
 	if (verf)
 		nfsd_copy_write_verifier(verf, nn);
 
 	switch (nfsd_io_cache_write) {
-	case NFSD_IO_BUFFERED:
+	case NFSD_IO_DIRECT:
+		host_err = nfsd_direct_write(rqstp, fhp, nf, nvecs,
+					     cnt, &kiocb);
 		break;
 	case NFSD_IO_DONTCACHE:
 		if (file->f_op->fop_flags & FOP_DONTCACHE)
 			kiocb.ki_flags |= IOCB_DONTCACHE;
+		fallthrough;
+	case NFSD_IO_BUFFERED:
+		iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
+		host_err = vfs_iocb_iter_write(file, &kiocb, &iter);
+		if (host_err < 0)
+			break;
+		*cnt = host_err;
 		break;
 	}
-	host_err = vfs_iocb_iter_write(file, &kiocb, &iter);
 	if (host_err < 0) {
 		commit_reset_write_verifier(nn, rqstp, host_err);
 		goto out_nfserr;
 	}
-	*cnt = host_err;
 	nfsd_stats_io_write_add(nn, exp, *cnt);
 	fsnotify_modify(file);
 	host_err = filemap_check_wb_err(file->f_mapping, since);
@@ -1335,7 +1568,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (file->f_op->splice_read && nfsd_read_splice_ok(rqstp))
 		err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
 	else
-		err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
+		err = nfsd_iter_read(rqstp, fhp, nf, offset, count, 0, eof);
 
 	nfsd_file_put(nf);
 	trace_nfsd_read_done(rqstp, fhp, offset, *count);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 09de48c50cbe..ded2900d423f 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -121,7 +121,7 @@ __be32		nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				unsigned long *count,
 				u32 *eof);
 __be32		nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-				struct file *file, loff_t offset,
+				struct nfsd_file *nf, loff_t offset,
 				unsigned long *count, unsigned int base,
 				u32 *eof);
 bool		nfsd_read_splice_ok(struct svc_rqst *rqstp);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 1ce8e12ae335..ae75846b3cd7 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -924,27 +924,6 @@ struct nfsd4_compoundres {
 	struct nfsd4_compound_state	cstate;
 };
 
-static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
-{
-	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
-}
-
-/*
- * The session reply cache only needs to cache replies that the client
- * actually asked us to.  But it's almost free for us to cache compounds
- * consisting of only a SEQUENCE op, so we may as well cache those too.
- * Also, the protocol doesn't give us a convenient response in the case
- * of a replay of a solo SEQUENCE op that wasn't cached
- * (RETRY_UNCACHED_REP can only be returned in the second op of a
- * compound).
- */
-static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
-{
-	return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
-		|| nfsd4_is_solo_sequence(resp);
-}
-
 static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
 {
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c8f0f9458f2c..330e38776bb2 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -12,6 +12,7 @@
 
 /* XXX: a lot of this should really be under fs/lockd. */
 
+#include <linux/exportfs.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <net/ipv6.h>
@@ -307,7 +308,7 @@ void		  nlmsvc_invalidate_all(void);
 int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
 int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
-static inline struct file *nlmsvc_file_file(struct nlm_file *file)
+static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
 {
 	return file->f_file[O_RDONLY] ?
 	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
@@ -318,6 +319,12 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 	return file_inode(nlmsvc_file_file(file));
 }
 
+static inline bool
+nlmsvc_file_cannot_lock(const struct nlm_file *file)
+{
+	return exportfs_cannot_lock(nlmsvc_file_file(file)->f_path.dentry->d_sb->s_export_op);
+}
+
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 {
 	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 22704c2e5b9b..57f4fd94166a 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -131,7 +131,7 @@ static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp)
  */
 enum {
 	RPCRDMA_LISTEN_BACKLOG	= 10,
-	RPCRDMA_MAX_REQUESTS	= 64,
+	RPCRDMA_MAX_REQUESTS	= 128,
 	RPCRDMA_MAX_BC_REQUESTS	= 2,
 };
 
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 963bbe251e52..de37069aba90 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,6 +26,9 @@ struct svc_sock {
 	void			(*sk_odata)(struct sock *);
 	void			(*sk_owspace)(struct sock *);
 
+	/* For sends (protected by xpt_mutex) */
+	struct bio_vec		*sk_bvec;
+
 	/* private TCP part */
 	/* On-the-wire fragment header: */
 	__be32			sk_marker;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 16ff6c100821..d61cd9b40491 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -68,6 +68,17 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+/*
+ * For UDP:
+ * 1 for header page
+ * enough pages for RPCSVC_MAXPAYLOAD_UDP
+ * 1 in case payload is not aligned
+ * 1 for tail page
+ */
+enum {
+	SUNRPC_MAX_UDP_SENDPAGES = 1 + RPCSVC_MAXPAYLOAD_UDP / PAGE_SIZE + 1 + 1
+};
+
 /* To-do: to avoid tying up an nfsd thread while waiting for a
  * handshake request, the request could instead be deferred.
  */
@@ -740,14 +751,14 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 	if (svc_xprt_is_dead(xprt))
 		goto out_notconn;
 
-	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
+	count = xdr_buf_to_bvec(svsk->sk_bvec, SUNRPC_MAX_UDP_SENDPAGES, xdr);
 
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
 		      count, rqstp->rq_res.len);
 	err = sock_sendmsg(svsk->sk_sock, &msg);
 	if (err == -ECONNREFUSED) {
 		/* ICMP error on earlier request. */
-		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
 			      count, rqstp->rq_res.len);
 		err = sock_sendmsg(svsk->sk_sock, &msg);
 	}
@@ -1062,9 +1073,10 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
 	return svc_sock_reclen(svsk);
 
 err_too_large:
-	net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
-			       __func__, svsk->sk_xprt.xpt_server->sv_name,
-			       svc_sock_reclen(svsk));
+	net_notice_ratelimited("svc: %s oversized RPC fragment (%u octets) from %pISpc\n",
+			       svsk->sk_xprt.xpt_server->sv_name,
+			       svc_sock_reclen(svsk),
+			       (struct sockaddr *)&svsk->sk_xprt.xpt_remote);
 	svc_xprt_deferred_close(&svsk->sk_xprt);
 err_short:
 	return -EAGAIN;
@@ -1235,19 +1247,19 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
 	int ret;
 
 	/* The stream record marker is copied into a temporary page
-	 * fragment buffer so that it can be included in rq_bvec.
+	 * fragment buffer so that it can be included in sk_bvec.
 	 */
 	buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
 			      GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 	memcpy(buf, &marker, sizeof(marker));
-	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
+	bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker));
 
-	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
+	count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages,
 				&rqstp->rq_res);
 
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
 		      1 + count, sizeof(marker) + rqstp->rq_res.len);
 	ret = sock_sendmsg(svsk->sk_sock, &msg);
 	page_frag_free(buf);
@@ -1392,6 +1404,20 @@ void svc_sock_update_bufs(struct svc_serv *serv)
 	spin_unlock_bh(&serv->sv_lock);
 }
 
+static int svc_sock_sendpages(struct svc_serv *serv, struct socket *sock, int flags)
+{
+	switch (sock->type) {
+	case SOCK_STREAM:
+		/* +1 for TCP record marker */
+		if (flags & SVC_SOCK_TEMPORARY)
+			return svc_serv_maxpages(serv) + 1;
+		return 0;
+	case SOCK_DGRAM:
+		return SUNRPC_MAX_UDP_SENDPAGES;
+	}
+	return -EINVAL;
+}
+
 /*
  * Initialize socket for RPC use and create svc_sock struct
  */
@@ -1402,12 +1428,26 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	struct svc_sock	*svsk;
 	struct sock	*inet;
 	int		pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+	int		sendpages;
 	unsigned long	pages;
 
+	sendpages = svc_sock_sendpages(serv, sock, flags);
+	if (sendpages < 0)
+		return ERR_PTR(sendpages);
+
 	pages = svc_serv_maxpages(serv);
 	svsk = kzalloc(struct_size(svsk, sk_pages, pages), GFP_KERNEL);
 	if (!svsk)
 		return ERR_PTR(-ENOMEM);
+
+	if (sendpages) {
+		svsk->sk_bvec = kcalloc(sendpages, sizeof(*svsk->sk_bvec), GFP_KERNEL);
+		if (!svsk->sk_bvec) {
+			kfree(svsk);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
 	svsk->sk_maxpages = pages;
 
 	inet = sock->sk;
@@ -1419,6 +1459,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 				     inet->sk_protocol,
 				     ntohs(inet_sk(inet)->inet_sport));
 		if (err < 0) {
+			kfree(svsk->sk_bvec);
 			kfree(svsk);
 			return ERR_PTR(err);
 		}
@@ -1636,5 +1677,6 @@ static void svc_sock_free(struct svc_xprt *xprt)
 		sock_release(sock);
 
 	page_frag_cache_drain(&svsk->sk_frag_cache);
+	kfree(svsk->sk_bvec);
 	kfree(svsk);
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3d7f1413df02..b7b318ad25c4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -591,12 +591,18 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
 	rdma_disconnect(rdma->sc_cm_id);
 }
 
-static void __svc_rdma_free(struct work_struct *work)
+/**
+ * svc_rdma_free - Release class-specific transport resources
+ * @xprt: Generic svc transport object
+ */
+static void svc_rdma_free(struct svc_xprt *xprt)
 {
 	struct svcxprt_rdma *rdma =
-		container_of(work, struct svcxprt_rdma, sc_work);
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	struct ib_device *device = rdma->sc_cm_id->device;
 
+	might_sleep();
+
 	/* This blocks until the Completion Queues are empty */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
 		ib_drain_qp(rdma->sc_qp);
@@ -629,15 +635,6 @@ static void __svc_rdma_free(struct work_struct *work)
 	kfree(rdma);
 }
 
-static void svc_rdma_free(struct svc_xprt *xprt)
-{
-	struct svcxprt_rdma *rdma =
-		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-
-	INIT_WORK(&rdma->sc_work, __svc_rdma_free);
-	schedule_work(&rdma->sc_work);
-}
-
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 {
 	struct svcxprt_rdma *rdma =
diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py
index b98574a36a4a..e22632cf38fb 100644
--- a/tools/net/sunrpc/xdrgen/generators/__init__.py
+++ b/tools/net/sunrpc/xdrgen/generators/__init__.py
@@ -2,7 +2,7 @@
 
 """Define a base code generator class"""
 
-import sys
+from pathlib import Path
 from jinja2 import Environment, FileSystemLoader, Template
 
 from xdr_ast import _XdrAst, Specification, _RpcProgram, _XdrTypeSpecifier
@@ -14,8 +14,11 @@ def create_jinja2_environment(language: str, xdr_type: str) -> Environment:
     """Open a set of templates based on output language"""
     match language:
         case "C":
+            templates_dir = (
+                Path(__file__).parent.parent / "templates" / language / xdr_type
+            )
             environment = Environment(
-                loader=FileSystemLoader(sys.path[0] + "/templates/C/" + xdr_type + "/"),
+                loader=FileSystemLoader(templates_dir),
                 trim_blocks=True,
                 lstrip_blocks=True,
             )
@@ -48,9 +51,7 @@ def find_xdr_program_name(root: Specification) -> str:
 
 def header_guard_infix(filename: str) -> str:
     """Extract the header guard infix from the specification filename"""
-    basename = filename.split("/")[-1]
-    program = basename.replace(".x", "")
-    return program.upper()
+    return Path(filename).stem.upper()
 
 
 def kernel_c_type(spec: _XdrTypeSpecifier) -> str:
diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py
index 2cca00e279cd..ad1f214ef22a 100644
--- a/tools/net/sunrpc/xdrgen/generators/union.py
+++ b/tools/net/sunrpc/xdrgen/generators/union.py
@@ -8,7 +8,7 @@ from jinja2 import Environment
 from generators import SourceGenerator
 from generators import create_jinja2_environment, get_jinja2_template
 
-from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name
+from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, _XdrString, get_header_name
 from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian
 
 
@@ -40,13 +40,20 @@ def emit_union_case_spec_definition(
     """Emit a definition for an XDR union's case arm"""
     if isinstance(node.arm, _XdrVoid):
         return
-    assert isinstance(node.arm, _XdrBasic)
+    if isinstance(node.arm, _XdrString):
+        type_name = "char *"
+        classifier = ""
+    else:
+        type_name = node.arm.spec.type_name
+        classifier = node.arm.spec.c_classifier
+
+    assert isinstance(node.arm, (_XdrBasic, _XdrString))
     template = get_jinja2_template(environment, "definition", "case_spec")
     print(
         template.render(
             name=node.arm.name,
-            type=node.arm.spec.type_name,
-            classifier=node.arm.spec.c_classifier,
+            type=type_name,
+            classifier=classifier,
         )
     )
 
@@ -84,6 +91,12 @@ def emit_union_case_spec_decoder(
 
     if isinstance(node.arm, _XdrVoid):
         return
+    if isinstance(node.arm, _XdrString):
+        type_name = "char *"
+        classifier = ""
+    else:
+        type_name = node.arm.spec.type_name
+        classifier = node.arm.spec.c_classifier
 
     if big_endian_discriminant:
         template = get_jinja2_template(environment, "decoder", "case_spec_be")
@@ -92,13 +105,13 @@ def emit_union_case_spec_decoder(
     for case in node.values:
         print(template.render(case=case))
 
-    assert isinstance(node.arm, _XdrBasic)
+    assert isinstance(node.arm, (_XdrBasic, _XdrString))
     template = get_jinja2_template(environment, "decoder", node.arm.template)
     print(
         template.render(
             name=node.arm.name,
-            type=node.arm.spec.type_name,
-            classifier=node.arm.spec.c_classifier,
+            type=type_name,
+            classifier=classifier,
         )
     )
 
@@ -169,7 +182,10 @@ def emit_union_case_spec_encoder(
 
     if isinstance(node.arm, _XdrVoid):
         return
-
+    if isinstance(node.arm, _XdrString):
+        type_name = "char *"
+    else:
+        type_name = node.arm.spec.type_name
     if big_endian_discriminant:
         template = get_jinja2_template(environment, "encoder", "case_spec_be")
     else:
@@ -181,7 +197,7 @@ def emit_union_case_spec_encoder(
     print(
         template.render(
             name=node.arm.name,
-            type=node.arm.spec.type_name,
+            type=type_name,
         )
     )
 
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
@@ -1,3 +1,3 @@
 {# SPDX-License-Identifier: GPL-2.0 #}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
@@ -1,3 +1,3 @@
 {# SPDX-License-Identifier: GPL-2.0 #}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
@@ -1,3 +1,3 @@
 {# SPDX-License-Identifier: GPL-2.0 #}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
index 9a814de54ae8..65698e20d8cd 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
@@ -2,5 +2,5 @@
 {% if annotate %}
 	/* member {{ name }} (variable-length opaque) */
 {% endif %}
-	if (!xdrgen_decode_opaque(xdr, (opaque *)ptr, {{ maxsize }}))
+	if (!xdrgen_decode_opaque(xdr, &ptr->{{ name }}, {{ maxsize }}))
 		return false;
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
@@ -1,3 +1,3 @@
 {# SPDX-License-Identifier: GPL-2.0 #}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
index da4709403dc9..b215e157dfa7 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr)
 	/* (basic) */
 {% endif %}
 	return xdrgen_decode_{{ type }}(xdr, ptr);
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
index d7c80e472fe3..c8953719e626 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
@@ -22,4 +22,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
 			return false;
 	}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
index bdc7bd24ffb1..c854fc8c74e3 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
 	/* (fixed-length opaque) */
 {% endif %}
 	return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) == 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
index 56c5a17d6a70..bcbc1758aae9 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
 	/* (variable-length string) */
 {% endif %}
 	return xdrgen_decode_string(xdr, ptr, {{ maxsize }});
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
index e74ffdd98463..a59cc1f38eed 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
@@ -23,4 +23,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
 		if (!xdrgen_decode_{{ type }}(xdr, &ptr->element[i]))
 			return false;
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
index f28f8b228ad5..eb05f53e1041 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
 	/* (variable-length opaque) */
 {% endif %}
 	return xdrgen_decode_opaque(xdr, ptr, {{ maxsize }});
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
index 35effe67e4ef..0d21dd0b723a 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
@@ -18,4 +18,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
 	/* (basic) */
 {% endif %}
 	return xdrgen_encode_{{ type }}(xdr, value);
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
index 95202ad5ad2d..ec8cd6509514 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
@@ -22,4 +22,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
 			return false;
 	}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
index 9c66a11b9912..b53fa87e1858 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
 	/* (fixed-length opaque) */
 {% endif %}
 	return xdr_stream_encode_opaque_fixed(xdr, value, {{ size }}) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
index 3d490ff180d0..28b81f1d0bd6 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
 	/* (variable-length string) */
 {% endif %}
 	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
index 2d2384f64918..ff093c281d51 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
@@ -27,4 +27,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
 {% endif %}
 			return false;
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
index 8508f13c95b9..2e89592fa702 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
 	/* (variable-length opaque) */
 {% endif %}
 	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2
new file mode 100644
index 000000000000..816291184e8c
--- /dev/null
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2
@@ -0,0 +1,4 @@
+{# SPDX-License-Identifier: GPL-2.0 #}
+
+bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, struct {{ name }} *ptr);
+bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const struct {{ name }} *value);
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
index fdc2dfd1843b..39d8d6c5094d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
@@ -1,4 +1,4 @@
 {# SPDX-License-Identifier: GPL-2.0 #}
 	}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
index fdc2dfd1843b..39d8d6c5094d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
@@ -1,4 +1,4 @@
 {# SPDX-License-Identifier: GPL-2.0 #}
 	}
 	return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2
new file mode 100644
index 000000000000..2f035a64f1f4
--- /dev/null
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2
@@ -0,0 +1,6 @@
+{# SPDX-License-Identifier: GPL-2.0 #}
+{% if annotate %}
+		/* member {{ name }} (variable-length string) */
+{% endif %}
+		if (!xdrgen_encode_string(xdr, ptr->u.{{ name }}, {{ maxsize }}))
+			return false;
diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen
index 43762be39252..3afd0547d67c 100755
--- a/tools/net/sunrpc/xdrgen/xdrgen
+++ b/tools/net/sunrpc/xdrgen/xdrgen
@@ -10,8 +10,13 @@ __license__ = "GPL-2.0 only"
 __version__ = "0.2"
 
 import sys
+from pathlib import Path
 import argparse
 
+_XDRGEN_DIR = Path(__file__).resolve().parent
+if str(_XDRGEN_DIR) not in sys.path:
+    sys.path.insert(0, str(_XDRGEN_DIR))
+
 from subcmds import definitions
 from subcmds import declarations
 from subcmds import lint