Linux 5.19

Merge tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux
Pull clk fix from Stephen Boyd: "One-liner fix of a NULL pointer deref in the Allwinner clk driver" * tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux: clk: sunxi-ng: Fix H6 RTC clock definition
2025-12-07 20:06:24 +00:00 · 2022-07-31 14:03:01 -07:00 · 2022-07-31 09:52:20 -07:00 · 2022-07-31 09:26:53 -07:00 · 2022-07-31 09:21:13 -07:00 · 2022-07-31 09:12:58 -07:00
314 changed files with 3473 additions and 2289 deletions
--- a/.mailmap
+++ b/.mailmap
@@ -60,6 +60,10 @@ Arnd Bergmann <arnd@arndb.de>
 Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
 Axel Dyks <xl@xlsigned.net>
 Axel Lin <axel.lin@gmail.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
 Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
 Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
 Ben Gardner <bgardner@wabtec.com>
@@ -135,6 +139,8 @@ Frank Rowand <frowand.list@gmail.com> <frowand@mvista.com>
 Frank Zago <fzago@systemfabricworks.com>
 Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
 Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com>
@@ -371,6 +377,7 @@ Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
 Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
 Sebastian Reichel <sre@kernel.org> <sre@debian.org>
 Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
+Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3176,6 +3176,7 @@
 					       no_entry_flush [PPC]
 					       no_uaccess_flush [PPC]
 					       mmio_stale_data=off [X86]
+					       retbleed=off [X86]

 				Exceptions:
 					       This does not have any effect on
@@ -3198,6 +3199,7 @@
 					       mds=full,nosmt [X86]
 					       tsx_async_abort=full,nosmt [X86]
 					       mmio_stale_data=full,nosmt [X86]
+					       retbleed=auto,nosmt [X86]

 	mminit_loglevel=
 			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -5796,6 +5798,24 @@
 			expediting.  Set to zero to disable automatic
 			expediting.

+	srcutree.srcu_max_nodelay [KNL]
+			Specifies the number of no-delay instances
+			per jiffy for which the SRCU grace period
+			worker thread will be rescheduled with zero
+			delay. Beyond this limit, worker thread will
+			be rescheduled with a sleep delay of one jiffy.
+
+	srcutree.srcu_max_nodelay_phase [KNL]
+			Specifies the per-grace-period phase, number of
+			non-sleeping polls of readers. Beyond this limit,
+			grace period worker thread will be rescheduled
+			with a sleep delay of one jiffy, between each
+			rescan of the readers, for a grace period phase.
+
+	srcutree.srcu_retry_check_delay [KNL]
+			Specifies number of microseconds of non-sleeping
+			delay between each non-sleeping poll of readers.
+
 	srcutree.small_contention_lim [KNL]
 			Specifies the number of update-side contention
 			events per jiffy will be tolerated before
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -167,70 +167,65 @@ properties:
      - in-band-status

  fixed-link:
-    allOf:
-      - if:
-          type: array
-        then:
-          deprecated: true
-          items:
-            - minimum: 0
-              maximum: 31
-              description:
-                Emulated PHY ID, choose any but unique to the all
-                specified fixed-links
+    oneOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+        deprecated: true
+        items:
+          - minimum: 0
+            maximum: 31
+            description:
+              Emulated PHY ID, choose any but unique to the all
+              specified fixed-links

-            - enum: [0, 1]
-              description:
-                Duplex configuration. 0 for half duplex or 1 for
-                full duplex
+          - enum: [0, 1]
+            description:
+              Duplex configuration. 0 for half duplex or 1 for
+              full duplex

-            - enum: [10, 100, 1000, 2500, 10000]
-              description:
-                Link speed in Mbits/sec.
+          - enum: [10, 100, 1000, 2500, 10000]
+            description:
+              Link speed in Mbits/sec.

-            - enum: [0, 1]
-              description:
-                Pause configuration. 0 for no pause, 1 for pause
+          - enum: [0, 1]
+            description:
+              Pause configuration. 0 for no pause, 1 for pause

-            - enum: [0, 1]
-              description:
-                Asymmetric pause configuration. 0 for no asymmetric
-                pause, 1 for asymmetric pause
+          - enum: [0, 1]
+            description:
+              Asymmetric pause configuration. 0 for no asymmetric
+              pause, 1 for asymmetric pause
+      - type: object
+        additionalProperties: false
+        properties:
+          speed:
+            description:
+              Link speed.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [10, 100, 1000, 2500, 10000]

+          full-duplex:
+            $ref: /schemas/types.yaml#/definitions/flag
+            description:
+              Indicates that full-duplex is used. When absent, half
+              duplex is assumed.

-      - if:
-          type: object
-        then:
-          properties:
-            speed:
-              description:
-                Link speed.
-              $ref: /schemas/types.yaml#/definitions/uint32
-              enum: [10, 100, 1000, 2500, 10000]
+          pause:
+            $ref: /schemas/types.yaml#definitions/flag
+            description:
+              Indicates that pause should be enabled.

-            full-duplex:
-              $ref: /schemas/types.yaml#/definitions/flag
-              description:
-                Indicates that full-duplex is used. When absent, half
-                duplex is assumed.
+          asym-pause:
+            $ref: /schemas/types.yaml#/definitions/flag
+            description:
+              Indicates that asym_pause should be enabled.

-            pause:
-              $ref: /schemas/types.yaml#definitions/flag
-              description:
-                Indicates that pause should be enabled.
+          link-gpios:
+            maxItems: 1
+            description:
+              GPIO to determine if the link is up

-            asym-pause:
-              $ref: /schemas/types.yaml#/definitions/flag
-              description:
-                Indicates that asym_pause should be enabled.
-
-            link-gpios:
-              maxItems: 1
-              description:
-                GPIO to determine if the link is up
-
-          required:
-            - speed
+        required:
+          - speed

 additionalProperties: true

--- a/Documentation/devicetree/bindings/net/fsl,fec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -183,6 +183,7 @@ properties:
      Should specify the gpio for phy reset.

  phy-reset-duration:
+    $ref: /schemas/types.yaml#/definitions/uint32
    deprecated: true
    description:
      Reset duration in milliseconds.  Should present only if property
@@ -191,12 +192,14 @@ properties:
      and 1 millisecond will be used instead.

  phy-reset-active-high:
+    type: boolean
    deprecated: true
    description:
      If present then the reset sequence using the GPIO specified in the
      "phy-reset-gpios" property is reversed (H=reset state, L=operation state).

  phy-reset-post-delay:
+    $ref: /schemas/types.yaml#/definitions/uint32
    deprecated: true
    description:
      Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc.
 Driver development
 ==================

-DSA switch drivers need to implement a dsa_switch_ops structure which will
+DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will
 contain the various members described below.

-``register_switch_driver()`` registers this dsa_switch_ops in its internal list
-of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
+Probing, registration and device lifetime
+-----------------------------------------

-Unless requested differently by setting the priv_size member accordingly, DSA
-does not allocate any driver private context space.
+DSA switches are regular ``device`` structures on buses (be they platform, SPI,
+I2C, MDIO or otherwise). The DSA framework is not involved in their probing
+with the device core.
+
+Switch registration from the perspective of a driver means passing a valid
+``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the
+switch driver's probing function. The following members must be valid in the
+provided structure:
+
+- ``ds->dev``: will be used to parse the switch's OF node or platform data.
+
+- ``ds->num_ports``: will be used to create the port list for this switch, and
+  to validate the port indices provided in the OF node.
+
+- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA
+  method implementations.
+
+- ``ds->priv``: backpointer to a driver-private data structure which can be
+  retrieved in all further DSA method callbacks.
+
+In addition, the following flags in the ``dsa_switch`` structure may optionally
+be configured to obtain driver-specific behavior from the DSA core. Their
+behavior when set is documented through comments in ``include/net/dsa.h``.
+
+- ``ds->vlan_filtering_is_global``
+
+- ``ds->needs_standalone_vlan_filtering``
+
+- ``ds->configure_vlan_while_not_filtering``
+
+- ``ds->untag_bridge_pvid``
+
+- ``ds->assisted_learning_on_cpu_port``
+
+- ``ds->mtu_enforcement_ingress``
+
+- ``ds->fdb_isolation``
+
+Internally, DSA keeps an array of switch trees (group of switches) global to
+the kernel, and attaches a ``dsa_switch`` structure to a tree on registration.
+The tree ID to which the switch is attached is determined by the first u32
+number of the ``dsa,member`` property of the switch's OF node (0 if missing).
+The switch ID within the tree is determined by the second u32 number of the
+same OF property (0 if missing). Registering multiple switches with the same
+switch ID and tree ID is illegal and will cause an error. Using platform data,
+a single switch and a single switch tree is permitted.
+
+In case of a tree with multiple switches, probing takes place asymmetrically.
+The first N-1 callers of ``dsa_register_switch()`` only add their ports to the
+port list of the tree (``dst->ports``), each port having a backpointer to its
+associated switch (``dp->ds``). Then, these switches exit their
+``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()``
+has determined that the tree is not yet complete (not all ports referenced by
+DSA links are present in the tree's port list). The tree becomes complete when
+the last switch calls ``dsa_register_switch()``, and this triggers the effective
+continuation of initialization (including the call to ``ds->ops->setup()``) for
+all switches within that tree, all as part of the calling context of the last
+switch's probe function.
+
+The opposite of registration takes place when calling ``dsa_unregister_switch()``,
+which removes a switch's ports from the port list of the tree. The entire tree
+is torn down when the first switch unregisters.
+
+It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback
+of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal
+version of the full teardown performed by ``dsa_unregister_switch()``).
+The reason is that DSA keeps a reference on the master net device, and if the
+driver for the master device decides to unbind on shutdown, DSA's reference
+will block that operation from finalizing.
+
+Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called,
+but not both, and the device driver model permits the bus' ``remove()`` method
+to be called even if ``shutdown()`` was already called. Therefore, drivers are
+expected to implement a mutual exclusion method between ``remove()`` and
+``shutdown()`` by setting their drvdata to NULL after any of these has run, and
+checking whether the drvdata is NULL before proceeding to take any action.
+
+After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no
+further callbacks via the provided ``dsa_switch_ops`` may take place, and the
+driver may free the data structures associated with the ``dsa_switch``.

 Switch configuration
 --------------------

- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
-  should be a valid value from the ``dsa_tag_protocol`` enum
+- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is
+  supported, should be a valid value from the ``dsa_tag_protocol`` enum.
+  The returned information does not have to be static; the driver is passed the
+  CPU port number, as well as the tagging protocol of a possibly stacked
+  upstream switch, in case there are hardware limitations in terms of supported
+  tag formats.

- ``probe``: probe routine which will be invoked by the DSA platform device upon
-  registration to test for the presence/absence of a switch device. For MDIO
-  devices, it is recommended to issue a read towards internal registers using
-  the switch pseudo-PHY and return whether this is a supported device. For other
-  buses, return a non-NULL string
+- ``change_tag_protocol``: when the default tagging protocol has compatibility
+  problems with the master or other issues, the driver may support changing it
+  at runtime, either through a device tree property or through sysfs. In that
+  case, further calls to ``get_tag_protocol`` should report the protocol in
+  current use.

 - ``setup``: setup function for the switch, this function is responsible for setting
  up the ``dsa_switch_ops`` private structure with all it needs: register maps,
@@ -535,7 +617,17 @@ Switch configuration
  fully configured and ready to serve any kind of request. It is recommended
  to issue a software reset of the switch during this setup function in order to
  avoid relying on what a previous software agent such as a bootloader/firmware
-  may have previously configured.
+  may have previously configured. The method responsible for undoing any
+  applicable allocations or operations done here is ``teardown``.
+
+- ``port_setup`` and ``port_teardown``: methods for initialization and
+  destruction of per-port data structures. It is mandatory for some operations
+  such as registering and unregistering devlink port regions to be done from
+  these methods, otherwise they are optional. A port will be torn down only if
+  it has been previously set up. It is possible for a port to be set up during
+  probing only to be torn down immediately afterwards, for example in case its
+  PHY cannot be found. In this case, probing of the DSA switch continues
+  without that particular port.

 PHY devices and link management
 -------------------------------
@@ -635,26 +727,198 @@ Power management
  ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
  disabled while being a bridge member

+Address databases
+-----------------
+
+Switching hardware is expected to have a table for FDB entries, however not all
+of them are active at the same time. An address database is the subset (partition)
+of FDB entries that is active (can be matched by address learning on RX, or FDB
+lookup on TX) depending on the state of the port. An address database may
+occasionally be called "FID" (Filtering ID) in this document, although the
+underlying implementation may choose whatever is available to the hardware.
+
+For example, all ports that belong to a VLAN-unaware bridge (which is
+*currently* VLAN-unaware) are expected to learn source addresses in the
+database associated by the driver with that bridge (and not with other
+VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a
+VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having
+the same MAC DA as the packet, which is present on another port member of the
+same bridge. At the same time, the FDB lookup process must be able to not find
+an FDB entry having the same MAC DA as the packet, if that entry points towards
+a port which is a member of a different VLAN-unaware bridge (and is therefore
+associated with a different address database).
+
+Similarly, each VLAN of each offloaded VLAN-aware bridge should have an
+associated address database, which is shared by all ports which are members of
+that VLAN, but not shared by ports belonging to different bridges that are
+members of the same VID.
+
+In this context, a VLAN-unaware database means that all packets are expected to
+match on it irrespective of VLAN ID (only MAC address lookup), whereas a
+VLAN-aware database means that packets are supposed to match based on the VLAN
+ID from the classified 802.1Q header (or the pvid if untagged).
+
+At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0,
+whereas VLAN-aware FDB entries have non-zero VID values. Note that a
+VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a
+VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the
+software bridge keeps separate address databases, and offloads to hardware the
+FDB entries belonging to these databases, through switchdev, asynchronously
+relative to the moment when the databases become active or inactive.
+
+When a user port operates in standalone mode, its driver should configure it to
+use a separate database called a port private database. This is different from
+the databases described above, and should impede operation as standalone port
+(packet in, packet out to the CPU port) as little as possible. For example,
+on ingress, it should not attempt to learn the MAC SA of ingress traffic, since
+learning is a bridging layer service and this is a standalone port, therefore
+it would consume useless space. With no address learning, the port private
+database should be empty in a naive implementation, and in this case, all
+received packets should be trivially flooded to the CPU port.
+
+DSA (cascade) and CPU ports are also called "shared" ports because they service
+multiple address databases, and the database that a packet should be associated
+to is usually embedded in the DSA tag. This means that the CPU port may
+simultaneously transport packets coming from a standalone port (which were
+classified by hardware in one address database), and from a bridge port (which
+were classified to a different address database).
+
+Switch drivers which satisfy certain criteria are able to optimize the naive
+configuration by removing the CPU port from the flooding domain of the switch,
+and just program the hardware with FDB entries pointing towards the CPU port
+for which it is known that software is interested in those MAC addresses.
+Packets which do not match a known FDB entry will not be delivered to the CPU,
+which will save CPU cycles required for creating an skb just to drop it.
+
+DSA is able to perform host address filtering for the following kinds of
+addresses:
+
+- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are
+  associated with the port private database of the respective user port,
+  and the driver is notified to install them through ``port_fdb_add`` towards
+  the CPU port.
+
+- Secondary unicast and multicast MAC addresses of ports (addresses added
+  through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated
+  with the port private database of the respective user port.
+
+- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC
+  addresses of the bridge ports, for which packets must be terminated locally
+  and not forwarded. They are associated with the address database for that
+  bridge.
+
+- Static bridge FDB entries installed towards foreign (non-DSA) interfaces
+  present in the same bridge as some DSA switch ports. These are also
+  associated with the address database for that bridge.
+
+- Dynamically learned FDB entries on foreign interfaces present in the same
+  bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port``
+  is set to true by the driver. These are associated with the address database
+  for that bridge.
+
+For various operations detailed below, DSA provides a ``dsa_db`` structure
+which can be of the following types:
+
+- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to
+  the port private database of user port ``db->dp``.
+- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge
+  ``db->bridge``. Separation between the VLAN-unaware database and the per-VID
+  databases of this bridge is expected to be done by the driver.
+- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``.
+  Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future.
+
+The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``,
+``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true.
+
+DSA associates each offloaded bridge and each offloaded LAG with a one-based ID
+(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of
+refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering
+scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may
+implement their own.
+
+Only the drivers which declare support for FDB isolation are notified of FDB
+entries on the CPU port belonging to ``DSA_DB_PORT`` databases.
+For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to
+drivers even if they do not support FDB isolation. However, ``db->bridge.num``
+and ``db->lag.id`` are always set to 0 in that case (to denote the lack of
+isolation, for refcounting purposes).
+
+Note that it is not mandatory for a switch driver to implement physically
+separate address databases for each standalone user port. Since FDB entries in
+the port private databases will always point to the CPU port, there is no risk
+for incorrect forwarding decisions. In this case, all standalone ports may
+share the same database, but the reference counting of host-filtered addresses
+(not deleting the FDB entry for a port's MAC address if it's still in use by
+another port) becomes the responsibility of the driver, because DSA is unaware
+that the port databases are in fact shared. This can be achieved by calling
+``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``.
+The down side is that the RX filtering lists of each user port are in fact
+shared, which means that user port A may accept a packet with a MAC DA it
+shouldn't have, only because that MAC address was in the RX filtering list of
+user port B. These packets will still be dropped in software, however.
+
 Bridge layer
 ------------

+Offloading the bridge forwarding plane is optional and handled by the methods
+below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may
+be non-zero and exceeded, and in this case, joining a bridge port is still
+possible, but the packet forwarding will take place in software, and the ports
+under a software bridge must remain configured in the same way as for
+standalone operation, i.e. have all bridging service functions (address
+learning etc) disabled, and send all received packets to the CPU port only.
+
+Concretely, a port starts offloading the forwarding plane of a bridge once it
+returns success to the ``port_bridge_join`` method, and stops doing so after
+``port_bridge_leave`` has been called. Offloading the bridge means autonomously
+learning FDB entries in accordance with the software bridge port's state, and
+autonomously forwarding (or flooding) received packets without CPU intervention.
+This is optional even when offloading a bridge port. Tagging protocol drivers
+are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which
+have already been autonomously forwarded in the forwarding domain of the
+ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all
+switch ports part of the same tree ID to be part of the same bridge forwarding
+domain (capable of autonomous forwarding to each other).
+
+Offloading the TX forwarding process of a bridge is a distinct concept from
+simply offloading its forwarding plane, and refers to the ability of certain
+driver and tag protocol combinations to transmit a single skb coming from the
+bridge device's transmit function to potentially multiple egress ports (and
+thereby avoid its cloning in software).
+
+Packets for which the bridge requests this behavior are called data plane
+packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol
+driver's ``xmit`` function. Data plane packets are subject to FDB lookup,
+hardware learning on the CPU port, and do not override the port STP state.
+Additionally, replication of data plane packets (multicast, flooding) is
+handled in hardware and the bridge driver will transmit a single skb for each
+packet that may or may not need replication.
+
+When the TX forwarding offload is enabled, the tag protocol driver is
+responsible to inject packets into the data plane of the hardware towards the
+correct bridging domain (FID) that the port is a part of. The port may be
+VLAN-unaware, and in this case the FID must be equal to the FID used by the
+driver for its VLAN-unaware address database associated with that bridge.
+Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed
+that the packet is also VLAN-tagged with the VLAN ID that the bridge processed
+this packet in. It is the responsibility of the hardware to untag the VID on
+the egress-untagged ports, or keep the tag on the egress-tagged ones.
+
 - ``port_bridge_join``: bridge layer function invoked when a given switch port is
  added to a bridge, this function should do what's necessary at the switch
  level to permit the joining port to be added to the relevant logical
  domain for it to ingress/egress traffic with other members of the bridge.
+  By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process
+  of this bridge is also offloaded.

 - ``port_bridge_leave``: bridge layer function invoked when a given switch port is
  removed from a bridge, this function should do what's necessary at the
  switch level to deny the leaving port from ingress/egress traffic from the
-  remaining bridge members. When the port leaves the bridge, it should be aged
-  out at the switch hardware for the switch to (re) learn MAC addresses behind
-  this port.
+  remaining bridge members.

 - ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
  state is computed by the bridge layer and should be propagated to switch
-  hardware to forward/block/learn traffic. The switch driver is responsible for
-  computing a STP state change based on current and asked parameters and perform
-  the relevant ageing based on the intersection results
+  hardware to forward/block/learn traffic.

 - ``port_bridge_flags``: bridge layer function invoked when a port must
  configure its settings for e.g. flooding of unknown traffic or source address
@@ -667,21 +931,11 @@ Bridge layer
  CPU port, and flooding towards the CPU port should also be enabled, due to a
  lack of an explicit address filtering mechanism in the DSA core.

- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
-  ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
-  a non-zero value. Returning success in this function activates the TX
-  forwarding offload bridge feature for this port, which enables the tagging
-  protocol driver to inject data plane packets towards the bridging domain that
-  the port is a part of. Data plane packets are subject to FDB lookup, hardware
-  learning on the CPU port, and do not override the port STP state.
-  Additionally, replication of data plane packets (multicast, flooding) is
-  handled in hardware and the bridge driver will transmit a single skb for each
-  packet that needs replication. The method is provided as a configuration
-  point for drivers that need to configure the hardware for enabling this
-  feature.
-
- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
-  leaves a bridge port which had the TX forwarding offload feature enabled.
+- ``port_fast_age``: bridge layer function invoked when flushing the
+  dynamically learned FDB entries on the port is necessary. This is called when
+  transitioning from an STP state where learning should take place to an STP
+  state where it shouldn't, or when leaving a bridge, or when address learning
+  is turned off via ``port_bridge_flags``.

 Bridge VLAN filtering
 ---------------------
@@ -697,55 +951,44 @@ Bridge VLAN filtering
  allowed.

 - ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
-  (tagged or untagged) for the given switch port. If the operation is not
-  supported by the hardware, this function should return ``-EOPNOTSUPP`` to
-  inform the bridge code to fallback to a software implementation.
+  (tagged or untagged) for the given switch port. The CPU port becomes a member
+  of a VLAN only if a foreign bridge port is also a member of it (and
+  forwarding needs to take place in software), or the VLAN is installed to the
+  VLAN group of the bridge device itself, for termination purposes
+  (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are
+  reference counted and removed when there is no user left. Drivers do not need
+  to manually install a VLAN on the CPU port.

 - ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
  given switch port

- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
-  function that the driver has to call for each VLAN the given port is a member
-  of. A switchdev object is used to carry the VID and bridge flags.
-
 - ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
  Forwarding Database entry, the switch hardware should be programmed with the
  specified address in the specified VLAN Id in the forwarding database
-  associated with this VLAN ID. If the operation is not supported, this
-  function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
-  a software implementation.
-
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
-        of DSA, would be its port-based VLAN, used by the associated bridge device.
+  associated with this VLAN ID.

 - ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
  Forwarding Database entry, the switch hardware should be programmed to delete
  the specified MAC address from the specified VLAN ID if it was mapped into
  this port forwarding database

- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
-  function that the driver has to call for each MAC address known to be behind
-  the given port. A switchdev object is used to carry the VID and FDB info.
+- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the
+  physical DSA port interfaces. Since DSA does not attempt to keep in sync its
+  hardware FDB entries with the software bridge, this method is implemented as
+  a means to view the entries visible on user ports in the hardware database.
+  The entries reported by this function have the ``self`` flag in the output of
+  the ``bridge fdb show`` command.

 - ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
-  a multicast database entry. If the operation is not supported, this function
-  should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
-  software implementation. The switch hardware should be programmed with the
+  a multicast database entry. The switch hardware should be programmed with the
  specified address in the specified VLAN ID in the forwarding database
  associated with this VLAN ID.

-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
-        of DSA, would be its port-based VLAN, used by the associated bridge device.
-
 - ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
  multicast database entry, the switch hardware should be programmed to delete
  the specified MAC address from the specified VLAN ID if it was mapped into
  this port forwarding database.

- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
-  function that the driver has to call for each MAC address known to be behind
-  the given port. A switchdev object is used to carry the VID and MDB info.
-
 Link aggregation
 ----------------

--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1052,11 +1052,7 @@ udp_rmem_min - INTEGER
 	Default: 4K

 udp_wmem_min - INTEGER
-	Minimal size of send buffer used by UDP sockets in moderation.
-	Each UDP socket is able to use the size for sending data, even if
-	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-
-	Default: 4K
+	UDP does not have tx memory accounting and this tunable has no effect.

 RAW variables
 =============
@@ -2870,7 +2866,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
 	Default: 4K

 sctp_wmem  - vector of 3 INTEGERs: min, default, max
-	Currently this tunable has no effect.
+	Only the first value ("min") is used, "default" and "max" are
+	ignored.
+
+	min: Minimum size of send buffer that can be used by SCTP sockets.
+	It is guaranteed to each SCTP socket (but not association) even
+	under moderate memory pressure.
+
+	Default: 4K

 addr_scope_policy - INTEGER
 	Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5658,7 +5658,7 @@ by a string of size ``name_size``.
 	#define KVM_STATS_UNIT_SECONDS		(0x2 << KVM_STATS_UNIT_SHIFT)
 	#define KVM_STATS_UNIT_CYCLES		(0x3 << KVM_STATS_UNIT_SHIFT)
 	#define KVM_STATS_UNIT_BOOLEAN		(0x4 << KVM_STATS_UNIT_SHIFT)
-	#define KVM_STATS_UNIT_MAX		KVM_STATS_UNIT_CYCLES
+	#define KVM_STATS_UNIT_MAX		KVM_STATS_UNIT_BOOLEAN

 	#define KVM_STATS_BASE_SHIFT		8
 	#define KVM_STATS_BASE_MASK		(0xF << KVM_STATS_BASE_SHIFT)
--- a/2
+++ b/2
@@ -15849,7 +15849,7 @@ PIN CONTROLLER - FREESCALE
 M:	Dong Aisheng <aisheng.dong@nxp.com>
 M:	Fabio Estevam <festevam@gmail.com>
 M:	Shawn Guo <shawnguo@kernel.org>
-M:	Stefan Agner <stefan@agner.ch>
+M:	Jacky Bai <ping.bai@nxp.com>
 R:	Pengutronix Kernel Team <kernel@pengutronix.de>
 L:	linux-gpio@vger.kernel.org
 S:	Maintained
--- a/2
+++ b/2
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Superb Owl

 # *DOCUMENTATION*
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE

 config MMU_GATHER_NO_RANGE
 	bool
+	select MMU_GATHER_MERGE_VMAS
+
+config MMU_GATHER_NO_FLUSH_CACHE
+	bool
+
+config MMU_GATHER_MERGE_VMAS
+	bool

 config MMU_GATHER_NO_GATHER
 	bool
--- a/arch/arm/boot/dts/lan966x.dtsi
+++ b/arch/arm/boot/dts/lan966x.dtsi
@@ -38,7 +38,7 @@
 		sys_clk: sys_clk {
 			compatible = "fixed-clock";
 			#clock-cells = <0>;
-			clock-frequency = <162500000>;
+			clock-frequency = <165625000>;
 		};

 		cpu_clk: cpu_clk {
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -10,7 +10,7 @@
 #else
 #define MAX_DMA_ADDRESS	({ \
 	extern phys_addr_t arm_dma_zone_size; \
-	arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
+	arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \
 		(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
 #endif

--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -40,8 +40,8 @@ ENDPROC(_find_first_zero_bit_le)
 * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
 */
 ENTRY(_find_next_zero_bit_le)
-		teq	r1, #0
-		beq	3b
+		cmp	r2, r1
+		bhs	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 ARM(		ldrb	r3, [r0, r2, lsr #3]	)
@@ -81,8 +81,8 @@ ENDPROC(_find_first_bit_le)
 * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
 */
 ENTRY(_find_next_bit_le)
-		teq	r1, #0
-		beq	3b
+		cmp	r2, r1
+		bhs	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 ARM(		ldrb	r3, [r0, r2, lsr #3]	)
@@ -115,8 +115,8 @@ ENTRY(_find_first_zero_bit_be)
 ENDPROC(_find_first_zero_bit_be)

 ENTRY(_find_next_zero_bit_be)
-		teq	r1, #0
-		beq	3b
+		cmp	r2, r1
+		bhs	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
@@ -149,8 +149,8 @@ ENTRY(_find_first_bit_be)
 ENDPROC(_find_first_bit_be)

 ENTRY(_find_next_bit_be)
-		teq	r1, #0
-		beq	3b
+		cmp	r2, r1
+		bhs	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -549,7 +549,7 @@ static struct pxa2xx_spi_controller corgi_spi_info = {
 };

 static struct gpiod_lookup_table corgi_spi_gpio_table = {
-	.dev_id = "pxa2xx-spi.1",
+	.dev_id = "spi1",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
 		GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -635,7 +635,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
 };

 static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
-	.dev_id = "pxa2xx-spi.2",
+	.dev_id = "spi2",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
 		{ },
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -140,7 +140,7 @@ struct platform_device pxa_spi_ssp4 = {
 };

 static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
-	.dev_id = "pxa2xx-spi.3",
+	.dev_id = "spi3",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
 		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
@@ -149,7 +149,7 @@ static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
 };

 static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
-	.dev_id = "pxa2xx-spi.4",
+	.dev_id = "spi4",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
 		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -207,7 +207,7 @@ static struct spi_board_info littleton_spi_devices[] __initdata = {
 };

 static struct gpiod_lookup_table littleton_spi_gpio_table = {
-	.dev_id = "pxa2xx-spi.2",
+	.dev_id = "spi2",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
 		{ },
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -994,7 +994,7 @@ static struct pxa2xx_spi_controller magician_spi_info = {
 };

 static struct gpiod_lookup_table magician_spi_gpio_table = {
-	.dev_id = "pxa2xx-spi.2",
+	.dev_id = "spi2",
 	.table = {
 		/* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
 		GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -578,7 +578,7 @@ static struct pxa2xx_spi_controller spitz_spi_info = {
 };

 static struct gpiod_lookup_table spitz_spi_gpio_table = {
-	.dev_id = "pxa2xx-spi.2",
+	.dev_id = "spi2",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
 		GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -623,7 +623,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
 };

 static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
-	.dev_id = "pxa2xx-spi.1",
+	.dev_id = "spi1",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
 		{ },
@@ -631,7 +631,7 @@ static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
 };

 static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
-	.dev_id = "pxa2xx-spi.2",
+	.dev_id = "spi2",
 	.table = {
 		GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
 		{ },
--- a/arch/csky/include/asm/tlb.h
+++ b/arch/csky/include/asm/tlb.h
@@ -4,21 +4,6 @@
 #define __ASM_CSKY_TLB_H

 #include <asm/cacheflush.h>
-
-#define tlb_start_vma(tlb, vma) \
-	do { \
-		if (!(tlb)->fullmm) \
-			flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
-	}  while (0)
-
-#define tlb_end_vma(tlb, vma) \
-	do { \
-		if (!(tlb)->fullmm) \
-			flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
-	}  while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>

 #endif /* __ASM_CSKY_TLB_H */
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -69,7 +69,6 @@ config LOONGARCH
 	select GENERIC_TIME_VSYSCALL
 	select GPIOLIB
 	select HAVE_ARCH_AUDITSYSCALL
-	select HAVE_ARCH_COMPILER_H
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -108,6 +107,7 @@ config LOONGARCH
 	select TRACE_IRQFLAGS_SUPPORT
 	select USE_PERCPU_NUMA_NODE_ID
 	select ZONE_DMA32
+	select MMU_GATHER_MERGE_VMAS if MMU

 config 32BIT
 	bool
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -274,16 +274,4 @@
 	nor	\dst, \src, zero
 .endm

-.macro bgt r0 r1 label
-	blt	\r1, \r0, \label
-.endm
-
-.macro bltz r0 label
-	blt	\r0, zero, \label
-.endm
-
-.macro bgez r0 label
-	bge	\r0, zero, \label
-.endm
-
 #endif /* _ASM_ASMMACRO_H */
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -10,7 +10,6 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
-#include <asm/compiler.h>

 #if __SIZEOF_LONG__ == 4
 #define __LL		"ll.w	"
@@ -157,27 +156,25 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
 		__asm__ __volatile__(
 		"1:	ll.w	%1, %2		# atomic_sub_if_positive\n"
 		"	addi.w	%0, %1, %3				\n"
-		"	or	%1, %0, $zero				\n"
-		"	blt	%0, $zero, 2f				\n"
+		"	move	%1, %0					\n"
+		"	bltz	%0, 2f					\n"
 		"	sc.w	%1, %2					\n"
-		"	beq	$zero, %1, 1b				\n"
+		"	beqz	%1, 1b					\n"
 		"2:							\n"
 		__WEAK_LLSC_MB
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
 		: "I" (-i));
 	} else {
 		__asm__ __volatile__(
 		"1:	ll.w	%1, %2		# atomic_sub_if_positive\n"
 		"	sub.w	%0, %1, %3				\n"
-		"	or	%1, %0, $zero				\n"
-		"	blt	%0, $zero, 2f				\n"
+		"	move	%1, %0					\n"
+		"	bltz	%0, 2f					\n"
 		"	sc.w	%1, %2					\n"
-		"	beq	$zero, %1, 1b				\n"
+		"	beqz	%1, 1b					\n"
 		"2:							\n"
 		__WEAK_LLSC_MB
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
 		: "r" (i));
 	}

@@ -320,27 +317,25 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v)
 		__asm__ __volatile__(
 		"1:	ll.d	%1, %2	# atomic64_sub_if_positive	\n"
 		"	addi.d	%0, %1, %3				\n"
-		"	or	%1, %0, $zero				\n"
-		"	blt	%0, $zero, 2f				\n"
+		"	move	%1, %0					\n"
+		"	bltz	%0, 2f					\n"
 		"	sc.d	%1, %2					\n"
-		"	beq	%1, $zero, 1b				\n"
+		"	beqz	%1, 1b					\n"
 		"2:							\n"
 		__WEAK_LLSC_MB
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
 		: "I" (-i));
 	} else {
 		__asm__ __volatile__(
 		"1:	ll.d	%1, %2	# atomic64_sub_if_positive	\n"
 		"	sub.d	%0, %1, %3				\n"
-		"	or	%1, %0, $zero				\n"
-		"	blt	%0, $zero, 2f				\n"
+		"	move	%1, %0					\n"
+		"	bltz	%0, 2f					\n"
 		"	sc.d	%1, %2					\n"
-		"	beq	%1, $zero, 1b				\n"
+		"	beqz	%1, 1b					\n"
 		"2:							\n"
 		__WEAK_LLSC_MB
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
 		: "r" (i));
 	}

--- a/arch/loongarch/include/asm/barrier.h
+++ b/arch/loongarch/include/asm/barrier.h
@@ -48,9 +48,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 	__asm__ __volatile__(
 		"sltu	%0, %1, %2\n\t"
 #if (__SIZEOF_LONG__ == 4)
-		"sub.w	%0, $r0, %0\n\t"
+		"sub.w	%0, $zero, %0\n\t"
 #elif (__SIZEOF_LONG__ == 8)
-		"sub.d	%0, $r0, %0\n\t"
+		"sub.d	%0, $zero, %0\n\t"
 #endif
 		: "=r" (mask)
 		: "r" (index), "r" (size)
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -55,9 +55,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
 	__asm__ __volatile__(						\
 	"1:	" ld "	%0, %2		# __cmpxchg_asm \n"		\
 	"	bne	%0, %z3, 2f			\n"		\
-	"	or	$t0, %z4, $zero			\n"		\
+	"	move	$t0, %z4			\n"		\
 	"	" st "	$t0, %1				\n"		\
-	"	beq	$zero, $t0, 1b			\n"		\
+	"	beqz	$t0, 1b				\n"		\
 	"2:						\n"		\
 	__WEAK_LLSC_MB							\
 	: "=&r" (__ret), "=ZB"(*m)					\
--- a/arch/loongarch/include/asm/compiler.h
+++ b/arch/loongarch/include/asm/compiler.h
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_COMPILER_H
-#define _ASM_COMPILER_H
-
-#define GCC_OFF_SMALL_ASM() "ZC"
-
-#define LOONGARCH_ISA_LEVEL "loongarch"
-#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch"
-#define LOONGARCH_ISA_LEVEL_RAW loongarch
-#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW
-
-#endif /* _ASM_COMPILER_H */
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -288,8 +288,6 @@ struct arch_elf_state {
 	.interp_fp_abi = LOONGARCH_ABI_FP_ANY,	\
 }

-#define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT)
-
 extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
 			    bool is_interp, struct arch_elf_state *state);

--- a/arch/loongarch/include/asm/futex.h
+++ b/arch/loongarch/include/asm/futex.h
@@ -8,7 +8,6 @@
 #include <linux/futex.h>
 #include <linux/uaccess.h>
 #include <asm/barrier.h>
-#include <asm/compiler.h>
 #include <asm/errno.h>

 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)		\
@@ -17,7 +16,7 @@
 	"1:	ll.w	%1, %4 # __futex_atomic_op\n"		\
 	"	" insn	"				\n"	\
 	"2:	sc.w	$t0, %2				\n"	\
-	"	beq	$t0, $zero, 1b			\n"	\
+	"	beqz	$t0, 1b				\n"	\
 	"3:						\n"	\
 	"	.section .fixup,\"ax\"			\n"	\
 	"4:	li.w	%0, %6				\n"	\
@@ -82,9 +81,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
 	"# futex_atomic_cmpxchg_inatomic			\n"
 	"1:	ll.w	%1, %3					\n"
 	"	bne	%1, %z4, 3f				\n"
-	"	or	$t0, %z5, $zero				\n"
+	"	move	$t0, %z5				\n"
 	"2:	sc.w	$t0, %2					\n"
-	"	beq	$zero, $t0, 1b				\n"
+	"	beqz	$t0, 1b					\n"
 	"3:							\n"
 	__WEAK_LLSC_MB
 	"	.section .fixup,\"ax\"				\n"
@@ -95,8 +94,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
 	"	"__UA_ADDR "\t1b, 4b				\n"
 	"	"__UA_ADDR "\t2b, 4b				\n"
 	"	.previous					\n"
-	: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
-	: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+	: "+r" (ret), "=&r" (val), "=ZC" (*uaddr)
+	: "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval),
 	  "i" (-EFAULT)
 	: "memory", "t0");

--- a/arch/loongarch/include/asm/irqflags.h
+++ b/arch/loongarch/include/asm/irqflags.h
@@ -9,7 +9,6 @@

 #include <linux/compiler.h>
 #include <linux/stringify.h>
-#include <asm/compiler.h>
 #include <asm/loongarch.h>

 static inline void arch_local_irq_enable(void)
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -9,7 +9,6 @@
 #include <linux/bitops.h>
 #include <linux/atomic.h>
 #include <asm/cmpxchg.h>
-#include <asm/compiler.h>

 typedef struct {
 	atomic_long_t a;
--- a/arch/loongarch/include/asm/loongson.h
+++ b/arch/loongarch/include/asm/loongson.h
@@ -39,18 +39,6 @@ extern const struct plat_smp_ops loongson3_smp_ops;

 #define MAX_PACKAGES 16

-/* Chip Config register of each physical cpu package */
-extern u64 loongson_chipcfg[MAX_PACKAGES];
-#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
-
-/* Chip Temperature register of each physical cpu package */
-extern u64 loongson_chiptemp[MAX_PACKAGES];
-#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
-
-/* Freq Control register of each physical cpu package */
-extern u64 loongson_freqctrl[MAX_PACKAGES];
-#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
-
 #define xconf_readl(addr) readl(addr)
 #define xconf_readq(addr) readq(addr)

@@ -58,7 +46,7 @@ static inline void xconf_writel(u32 val, volatile void __iomem *addr)
 {
 	asm volatile (
 	"	st.w	%[v], %[hw], 0	\n"
-	"	ld.b	$r0, %[hw], 0	\n"
+	"	ld.b	$zero, %[hw], 0	\n"
 	:
 	: [hw] "r" (addr), [v] "r" (val)
 	);
@@ -68,7 +56,7 @@ static inline void xconf_writeq(u64 val64, volatile void __iomem *addr)
 {
 	asm volatile (
 	"	st.d	%[v], %[hw], 0	\n"
-	"	ld.b	$r0, %[hw], 0	\n"
+	"	ld.b	$zero, %[hw], 0	\n"
 	:
 	: [hw] "r" (addr),  [v] "r" (val64)
 	);
--- a/arch/loongarch/include/asm/stacktrace.h
+++ b/arch/loongarch/include/asm/stacktrace.h
@@ -23,13 +23,13 @@
 static __always_inline void prepare_frametrace(struct pt_regs *regs)
 {
 	__asm__ __volatile__(
-		/* Save $r1 */
+		/* Save $ra */
 		STORE_ONE_REG(1)
-		/* Use $r1 to save PC */
-		"pcaddi	$r1, 0\n\t"
-		STR_LONG_S " $r1, %0\n\t"
-		/* Restore $r1 */
-		STR_LONG_L " $r1, %1, "STR_LONGSIZE"\n\t"
+		/* Use $ra to save PC */
+		"pcaddi	$ra, 0\n\t"
+		STR_LONG_S " $ra, %0\n\t"
+		/* Restore $ra */
+		STR_LONG_L " $ra, %1, "STR_LONGSIZE"\n\t"
 		STORE_ONE_REG(2)
 		STORE_ONE_REG(3)
 		STORE_ONE_REG(4)
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -44,14 +44,14 @@ struct thread_info {
 }

 /* How to get the thread information struct from C. */
-register struct thread_info *__current_thread_info __asm__("$r2");
+register struct thread_info *__current_thread_info __asm__("$tp");

 static inline struct thread_info *current_thread_info(void)
 {
 	return __current_thread_info;
 }

-register unsigned long current_stack_pointer __asm__("$r3");
+register unsigned long current_stack_pointer __asm__("$sp");

 #endif /* !__ASSEMBLY__ */

--- a/arch/loongarch/include/asm/tlb.h
+++ b/arch/loongarch/include/asm/tlb.h
@@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr)
 		);
 }

-/*
- * LoongArch doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma)					\
-	do {							\
-		if (!(tlb)->fullmm)				\
-			flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-	}  while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)

 static void tlb_flush(struct mmu_gather *tlb);
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -162,7 +162,7 @@ do {									\
 	"2:							\n"	\
 	"	.section .fixup,\"ax\"				\n"	\
 	"3:	li.w	%0, %3					\n"	\
-	"	or	%1, $r0, $r0				\n"	\
+	"	move	%1, $zero				\n"	\
 	"	b	2b					\n"	\
 	"	.previous					\n"	\
 	"	.section __ex_table,\"a\"			\n"	\
--- a/arch/loongarch/kernel/cacheinfo.c
+++ b/arch/loongarch/kernel/cacheinfo.c
@@ -4,8 +4,9 @@
 *
 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
 */
-#include <asm/cpu-info.h>
 #include <linux/cacheinfo.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu-info.h>

 /* Populates leaf and increments to next leaf */
 #define populate_cache(cache, leaf, c_level, c_type)		\
@@ -17,6 +18,8 @@ do {								\
 	leaf->ways_of_associativity = c->cache.ways;		\
 	leaf->size = c->cache.linesz * c->cache.sets *		\
 		c->cache.ways;					\
+	if (leaf->level > 2)					\
+		leaf->size *= nodes_per_package;		\
 	leaf++;							\
 } while (0)

@@ -95,11 +98,15 @@ static void cache_cpumap_setup(unsigned int cpu)

 int populate_cache_leaves(unsigned int cpu)
 {
-	int level = 1;
+	int level = 1, nodes_per_package = 1;
 	struct cpuinfo_loongarch *c = &current_cpu_data;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;

+	if (loongson_sysconf.nr_nodes > 1)
+		nodes_per_package = loongson_sysconf.cores_per_package
+					/ loongson_sysconf.cores_per_node;
+
 	if (c->icache.waysize) {
 		populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA);
 		populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST);
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -27,7 +27,7 @@ SYM_FUNC_START(handle_syscall)

 	addi.d	sp, sp, -PT_SIZE
 	cfi_st	t2, PT_R3
-	cfi_rel_offset  sp, PT_R3
+	cfi_rel_offset	sp, PT_R3
 	st.d	zero, sp, PT_R0
 	csrrd	t2, LOONGARCH_CSR_PRMD
 	st.d	t2, sp, PT_PRMD
@@ -50,7 +50,7 @@ SYM_FUNC_START(handle_syscall)
 	cfi_st	a7, PT_R11
 	csrrd	ra, LOONGARCH_CSR_ERA
 	st.d	ra, sp, PT_ERA
-	cfi_rel_offset ra, PT_ERA
+	cfi_rel_offset	ra, PT_ERA

 	cfi_st	tp, PT_R2
 	cfi_st	u0, PT_R21
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -17,21 +17,6 @@ u64 efi_system_table;
 struct loongson_system_configuration loongson_sysconf;
 EXPORT_SYMBOL(loongson_sysconf);

-u64 loongson_chipcfg[MAX_PACKAGES];
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
-unsigned long long smp_group[MAX_PACKAGES];
-
-static void __init register_addrs_set(u64 *registers, const u64 addr, int num)
-{
-	u64 i;
-
-	for (i = 0; i < num; i++) {
-		*registers = (i << 44) | addr;
-		registers++;
-	}
-}
-
 void __init init_environ(void)
 {
 	int efi_boot = fw_arg0;
@@ -50,11 +35,6 @@ void __init init_environ(void)
 	efi_memmap_init_early(&data);
 	memblock_reserve(data.phys_map & PAGE_MASK,
 			 PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK)));
-
-	register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16);
-	register_addrs_set(loongson_chipcfg, TO_UNCACHE(0x1fe00180), 16);
-	register_addrs_set(loongson_chiptemp, TO_UNCACHE(0x1fe0019c), 16);
-	register_addrs_set(loongson_freqctrl, TO_UNCACHE(0x1fe001d0), 16);
 }

 static int __init init_cpu_fullname(void)
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -27,78 +27,78 @@
 	.endm

 	.macro sc_save_fp base
-	EX	fst.d $f0,  \base, (0 * FPU_REG_WIDTH)
-	EX	fst.d $f1,  \base, (1 * FPU_REG_WIDTH)
-	EX	fst.d $f2,  \base, (2 * FPU_REG_WIDTH)
-	EX	fst.d $f3,  \base, (3 * FPU_REG_WIDTH)
-	EX	fst.d $f4,  \base, (4 * FPU_REG_WIDTH)
-	EX	fst.d $f5,  \base, (5 * FPU_REG_WIDTH)
-	EX	fst.d $f6,  \base, (6 * FPU_REG_WIDTH)
-	EX	fst.d $f7,  \base, (7 * FPU_REG_WIDTH)
-	EX	fst.d $f8,  \base, (8 * FPU_REG_WIDTH)
-	EX	fst.d $f9,  \base, (9 * FPU_REG_WIDTH)
-	EX	fst.d $f10, \base, (10 * FPU_REG_WIDTH)
-	EX	fst.d $f11, \base, (11 * FPU_REG_WIDTH)
-	EX	fst.d $f12, \base, (12 * FPU_REG_WIDTH)
-	EX	fst.d $f13, \base, (13 * FPU_REG_WIDTH)
-	EX	fst.d $f14, \base, (14 * FPU_REG_WIDTH)
-	EX	fst.d $f15, \base, (15 * FPU_REG_WIDTH)
-	EX	fst.d $f16, \base, (16 * FPU_REG_WIDTH)
-	EX	fst.d $f17, \base, (17 * FPU_REG_WIDTH)
-	EX	fst.d $f18, \base, (18 * FPU_REG_WIDTH)
-	EX	fst.d $f19, \base, (19 * FPU_REG_WIDTH)
-	EX	fst.d $f20, \base, (20 * FPU_REG_WIDTH)
-	EX	fst.d $f21, \base, (21 * FPU_REG_WIDTH)
-	EX	fst.d $f22, \base, (22 * FPU_REG_WIDTH)
-	EX	fst.d $f23, \base, (23 * FPU_REG_WIDTH)
-	EX	fst.d $f24, \base, (24 * FPU_REG_WIDTH)
-	EX	fst.d $f25, \base, (25 * FPU_REG_WIDTH)
-	EX	fst.d $f26, \base, (26 * FPU_REG_WIDTH)
-	EX	fst.d $f27, \base, (27 * FPU_REG_WIDTH)
-	EX	fst.d $f28, \base, (28 * FPU_REG_WIDTH)
-	EX	fst.d $f29, \base, (29 * FPU_REG_WIDTH)
-	EX	fst.d $f30, \base, (30 * FPU_REG_WIDTH)
-	EX	fst.d $f31, \base, (31 * FPU_REG_WIDTH)
+	EX	fst.d	$f0,  \base, (0 * FPU_REG_WIDTH)
+	EX	fst.d	$f1,  \base, (1 * FPU_REG_WIDTH)
+	EX	fst.d	$f2,  \base, (2 * FPU_REG_WIDTH)
+	EX	fst.d	$f3,  \base, (3 * FPU_REG_WIDTH)
+	EX	fst.d	$f4,  \base, (4 * FPU_REG_WIDTH)
+	EX	fst.d	$f5,  \base, (5 * FPU_REG_WIDTH)
+	EX	fst.d	$f6,  \base, (6 * FPU_REG_WIDTH)
+	EX	fst.d	$f7,  \base, (7 * FPU_REG_WIDTH)
+	EX	fst.d	$f8,  \base, (8 * FPU_REG_WIDTH)
+	EX	fst.d	$f9,  \base, (9 * FPU_REG_WIDTH)
+	EX	fst.d	$f10, \base, (10 * FPU_REG_WIDTH)
+	EX	fst.d	$f11, \base, (11 * FPU_REG_WIDTH)
+	EX	fst.d	$f12, \base, (12 * FPU_REG_WIDTH)
+	EX	fst.d	$f13, \base, (13 * FPU_REG_WIDTH)
+	EX	fst.d	$f14, \base, (14 * FPU_REG_WIDTH)
+	EX	fst.d	$f15, \base, (15 * FPU_REG_WIDTH)
+	EX	fst.d	$f16, \base, (16 * FPU_REG_WIDTH)
+	EX	fst.d	$f17, \base, (17 * FPU_REG_WIDTH)
+	EX	fst.d	$f18, \base, (18 * FPU_REG_WIDTH)
+	EX	fst.d	$f19, \base, (19 * FPU_REG_WIDTH)
+	EX	fst.d	$f20, \base, (20 * FPU_REG_WIDTH)
+	EX	fst.d	$f21, \base, (21 * FPU_REG_WIDTH)
+	EX	fst.d	$f22, \base, (22 * FPU_REG_WIDTH)
+	EX	fst.d	$f23, \base, (23 * FPU_REG_WIDTH)
+	EX	fst.d	$f24, \base, (24 * FPU_REG_WIDTH)
+	EX	fst.d	$f25, \base, (25 * FPU_REG_WIDTH)
+	EX	fst.d	$f26, \base, (26 * FPU_REG_WIDTH)
+	EX	fst.d	$f27, \base, (27 * FPU_REG_WIDTH)
+	EX	fst.d	$f28, \base, (28 * FPU_REG_WIDTH)
+	EX	fst.d	$f29, \base, (29 * FPU_REG_WIDTH)
+	EX	fst.d	$f30, \base, (30 * FPU_REG_WIDTH)
+	EX	fst.d	$f31, \base, (31 * FPU_REG_WIDTH)
 	.endm

 	.macro sc_restore_fp base
-	EX	fld.d $f0,  \base, (0 * FPU_REG_WIDTH)
-	EX	fld.d $f1,  \base, (1 * FPU_REG_WIDTH)
-	EX	fld.d $f2,  \base, (2 * FPU_REG_WIDTH)
-	EX	fld.d $f3,  \base, (3 * FPU_REG_WIDTH)
-	EX	fld.d $f4,  \base, (4 * FPU_REG_WIDTH)
-	EX	fld.d $f5,  \base, (5 * FPU_REG_WIDTH)
-	EX	fld.d $f6,  \base, (6 * FPU_REG_WIDTH)
-	EX	fld.d $f7,  \base, (7 * FPU_REG_WIDTH)
-	EX	fld.d $f8,  \base, (8 * FPU_REG_WIDTH)
-	EX	fld.d $f9,  \base, (9 * FPU_REG_WIDTH)
-	EX	fld.d $f10, \base, (10 * FPU_REG_WIDTH)
-	EX	fld.d $f11, \base, (11 * FPU_REG_WIDTH)
-	EX	fld.d $f12, \base, (12 * FPU_REG_WIDTH)
-	EX	fld.d $f13, \base, (13 * FPU_REG_WIDTH)
-	EX	fld.d $f14, \base, (14 * FPU_REG_WIDTH)
-	EX	fld.d $f15, \base, (15 * FPU_REG_WIDTH)
-	EX	fld.d $f16, \base, (16 * FPU_REG_WIDTH)
-	EX	fld.d $f17, \base, (17 * FPU_REG_WIDTH)
-	EX	fld.d $f18, \base, (18 * FPU_REG_WIDTH)
-	EX	fld.d $f19, \base, (19 * FPU_REG_WIDTH)
-	EX	fld.d $f20, \base, (20 * FPU_REG_WIDTH)
-	EX	fld.d $f21, \base, (21 * FPU_REG_WIDTH)
-	EX	fld.d $f22, \base, (22 * FPU_REG_WIDTH)
-	EX	fld.d $f23, \base, (23 * FPU_REG_WIDTH)
-	EX	fld.d $f24, \base, (24 * FPU_REG_WIDTH)
-	EX	fld.d $f25, \base, (25 * FPU_REG_WIDTH)
-	EX	fld.d $f26, \base, (26 * FPU_REG_WIDTH)
-	EX	fld.d $f27, \base, (27 * FPU_REG_WIDTH)
-	EX	fld.d $f28, \base, (28 * FPU_REG_WIDTH)
-	EX	fld.d $f29, \base, (29 * FPU_REG_WIDTH)
-	EX	fld.d $f30, \base, (30 * FPU_REG_WIDTH)
-	EX	fld.d $f31, \base, (31 * FPU_REG_WIDTH)
+	EX	fld.d	$f0,  \base, (0 * FPU_REG_WIDTH)
+	EX	fld.d	$f1,  \base, (1 * FPU_REG_WIDTH)
+	EX	fld.d	$f2,  \base, (2 * FPU_REG_WIDTH)
+	EX	fld.d	$f3,  \base, (3 * FPU_REG_WIDTH)
+	EX	fld.d	$f4,  \base, (4 * FPU_REG_WIDTH)
+	EX	fld.d	$f5,  \base, (5 * FPU_REG_WIDTH)
+	EX	fld.d	$f6,  \base, (6 * FPU_REG_WIDTH)
+	EX	fld.d	$f7,  \base, (7 * FPU_REG_WIDTH)
+	EX	fld.d	$f8,  \base, (8 * FPU_REG_WIDTH)
+	EX	fld.d	$f9,  \base, (9 * FPU_REG_WIDTH)
+	EX	fld.d	$f10, \base, (10 * FPU_REG_WIDTH)
+	EX	fld.d	$f11, \base, (11 * FPU_REG_WIDTH)
+	EX	fld.d	$f12, \base, (12 * FPU_REG_WIDTH)
+	EX	fld.d	$f13, \base, (13 * FPU_REG_WIDTH)
+	EX	fld.d	$f14, \base, (14 * FPU_REG_WIDTH)
+	EX	fld.d	$f15, \base, (15 * FPU_REG_WIDTH)
+	EX	fld.d	$f16, \base, (16 * FPU_REG_WIDTH)
+	EX	fld.d	$f17, \base, (17 * FPU_REG_WIDTH)
+	EX	fld.d	$f18, \base, (18 * FPU_REG_WIDTH)
+	EX	fld.d	$f19, \base, (19 * FPU_REG_WIDTH)
+	EX	fld.d	$f20, \base, (20 * FPU_REG_WIDTH)
+	EX	fld.d	$f21, \base, (21 * FPU_REG_WIDTH)
+	EX	fld.d	$f22, \base, (22 * FPU_REG_WIDTH)
+	EX	fld.d	$f23, \base, (23 * FPU_REG_WIDTH)
+	EX	fld.d	$f24, \base, (24 * FPU_REG_WIDTH)
+	EX	fld.d	$f25, \base, (25 * FPU_REG_WIDTH)
+	EX	fld.d	$f26, \base, (26 * FPU_REG_WIDTH)
+	EX	fld.d	$f27, \base, (27 * FPU_REG_WIDTH)
+	EX	fld.d	$f28, \base, (28 * FPU_REG_WIDTH)
+	EX	fld.d	$f29, \base, (29 * FPU_REG_WIDTH)
+	EX	fld.d	$f30, \base, (30 * FPU_REG_WIDTH)
+	EX	fld.d	$f31, \base, (31 * FPU_REG_WIDTH)
 	.endm

 	.macro sc_save_fcc base, tmp0, tmp1
 	movcf2gr	\tmp0, $fcc0
-	move	\tmp1, \tmp0
+	move		\tmp1, \tmp0
 	movcf2gr	\tmp0, $fcc1
 	bstrins.d	\tmp1, \tmp0, 15, 8
 	movcf2gr	\tmp0, $fcc2
@@ -113,11 +113,11 @@
 	bstrins.d	\tmp1, \tmp0, 55, 48
 	movcf2gr	\tmp0, $fcc7
 	bstrins.d	\tmp1, \tmp0, 63, 56
-	EX	st.d \tmp1, \base, 0
+	EX	st.d	\tmp1, \base, 0
 	.endm

 	.macro sc_restore_fcc base, tmp0, tmp1
-	EX	ld.d \tmp0, \base, 0
+	EX	ld.d	\tmp0, \base, 0
 	bstrpick.d	\tmp1, \tmp0, 7, 0
 	movgr2cf	$fcc0, \tmp1
 	bstrpick.d	\tmp1, \tmp0, 15, 8
@@ -138,11 +138,11 @@

 	.macro sc_save_fcsr base, tmp0
 	movfcsr2gr	\tmp0, fcsr0
-	EX	st.w \tmp0, \base, 0
+	EX	st.w	\tmp0, \base, 0
 	.endm

 	.macro sc_restore_fcsr base, tmp0
-	EX	ld.w \tmp0, \base, 0
+	EX	ld.w	\tmp0, \base, 0
 	movgr2fcsr	fcsr0, \tmp0
 	.endm

@@ -151,9 +151,9 @@
 */
 SYM_FUNC_START(_save_fp)
 	fpu_save_csr	a0 t1
-	fpu_save_double a0 t1			# clobbers t1
+	fpu_save_double	a0 t1			# clobbers t1
 	fpu_save_cc	a0 t1 t2		# clobbers t1, t2
-	jirl zero, ra, 0
+	jr		ra
 SYM_FUNC_END(_save_fp)
 EXPORT_SYMBOL(_save_fp)

@@ -161,10 +161,10 @@ EXPORT_SYMBOL(_save_fp)
 * Restore a thread's fp context.
 */
 SYM_FUNC_START(_restore_fp)
-	fpu_restore_double a0 t1		# clobbers t1
-	fpu_restore_csr	a0 t1
-	fpu_restore_cc	a0 t1 t2		# clobbers t1, t2
-	jirl zero, ra, 0
+	fpu_restore_double	a0 t1		# clobbers t1
+	fpu_restore_csr		a0 t1
+	fpu_restore_cc		a0 t1 t2	# clobbers t1, t2
+	jr			ra
 SYM_FUNC_END(_restore_fp)

 /*
@@ -216,7 +216,7 @@ SYM_FUNC_START(_init_fpu)
 	movgr2fr.d	$f30, t1
 	movgr2fr.d	$f31, t1

-	jirl zero, ra, 0
+	jr	ra
 SYM_FUNC_END(_init_fpu)

 /*
@@ -225,11 +225,11 @@ SYM_FUNC_END(_init_fpu)
 * a2: fcsr
 */
 SYM_FUNC_START(_save_fp_context)
-	sc_save_fcc a1 t1 t2
-	sc_save_fcsr a2 t1
-	sc_save_fp a0
-	li.w	a0, 0					# success
-	jirl zero, ra, 0
+	sc_save_fcc	a1 t1 t2
+	sc_save_fcsr	a2 t1
+	sc_save_fp	a0
+	li.w		a0, 0				# success
+	jr		ra
 SYM_FUNC_END(_save_fp_context)

 /*
@@ -238,14 +238,14 @@ SYM_FUNC_END(_save_fp_context)
 * a2: fcsr
 */
 SYM_FUNC_START(_restore_fp_context)
-	sc_restore_fp a0
-	sc_restore_fcc a1 t1 t2
-	sc_restore_fcsr a2 t1
-	li.w	a0, 0					# success
-	jirl zero, ra, 0
+	sc_restore_fp	a0
+	sc_restore_fcc	a1 t1 t2
+	sc_restore_fcsr	a2 t1
+	li.w		a0, 0				# success
+	jr		ra
 SYM_FUNC_END(_restore_fp_context)

 SYM_FUNC_START(fault)
 	li.w	a0, -EFAULT				# failure
-	jirl zero, ra, 0
+	jr	ra
 SYM_FUNC_END(fault)
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -28,23 +28,23 @@ SYM_FUNC_START(__arch_cpu_idle)
 	nop
 	idle	0
 	/* end of rollback region */
-1:	jirl	zero, ra, 0
+1:	jr	ra
 SYM_FUNC_END(__arch_cpu_idle)

 SYM_FUNC_START(handle_vint)
 	BACKUP_T0T1
 	SAVE_ALL
 	la.abs	t1, __arch_cpu_idle
-	LONG_L  t0, sp, PT_ERA
+	LONG_L	t0, sp, PT_ERA
 	/* 32 byte rollback region */
 	ori	t0, t0, 0x1f
 	xori	t0, t0, 0x1f
 	bne	t0, t1, 1f
-	LONG_S  t0, sp, PT_ERA
+	LONG_S	t0, sp, PT_ERA
 1:	move	a0, sp
 	move	a1, sp
 	la.abs	t0, do_vint
-	jirl    ra, t0, 0
+	jirl	ra, t0, 0
 	RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_vint)

@@ -72,7 +72,7 @@ SYM_FUNC_END(except_vec_cex)
 	build_prep_\prep
 	move	a0, sp
 	la.abs	t0, do_\handler
-	jirl    ra, t0, 0
+	jirl	ra, t0, 0
 	RESTORE_ALL_AND_RET
 	SYM_FUNC_END(handle_\exception)
 	.endm
@@ -91,5 +91,5 @@ SYM_FUNC_END(except_vec_cex)

 SYM_FUNC_START(handle_sys)
 	la.abs	t0, handle_syscall
-	jirl    zero, t0, 0
+	jr	t0
 SYM_FUNC_END(handle_sys)
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -32,7 +32,7 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	/* We might not get launched at the address the kernel is linked to,
 	   so we jump there.  */
 	la.abs		t0, 0f
-	jirl		zero, t0, 0
+	jr		t0
 0:
 	la		t0, __bss_start		# clear .bss
 	st.d		zero, t0, 0
@@ -50,7 +50,7 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	/* KSave3 used for percpu base, initialized as 0 */
 	csrwr		zero, PERCPU_BASE_KS
 	/* GPR21 used for percpu base (runtime), initialized as 0 */
-	or		u0, zero, zero
+	move		u0, zero

 	la		tp, init_thread_union
 	/* Set the SP after an empty pt_regs.  */
@@ -85,8 +85,8 @@ SYM_CODE_START(smpboot_entry)
 	ld.d		sp, t0, CPU_BOOT_STACK
 	ld.d		tp, t0, CPU_BOOT_TINFO

-	la.abs	t0, 0f
-	jirl	zero, t0, 0
+	la.abs		t0, 0f
+	jr		t0
 0:
 	bl		start_secondary
 SYM_CODE_END(smpboot_entry)
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -193,7 +193,7 @@ static int fpr_set(struct task_struct *target,
 		   const void *kbuf, const void __user *ubuf)
 {
 	const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t);
-	const int fcc_end = fcc_start + sizeof(u64);
+	const int fcsr_start = fcc_start + sizeof(u64);
 	int err;

 	BUG_ON(count % sizeof(elf_fpreg_t));
@@ -209,10 +209,12 @@ static int fpr_set(struct task_struct *target,
 	if (err)
 		return err;

-	if (count > 0)
-		err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					  &target->thread.fpu.fcc,
-					  fcc_start, fcc_end);
+	err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpu.fcc, fcc_start,
+				  fcc_start + sizeof(u64));
+	err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpu.fcsr, fcsr_start,
+				  fcsr_start + sizeof(u32));

 	return err;
 }
--- a/arch/loongarch/kernel/reset.c
+++ b/arch/loongarch/kernel/reset.c
@@ -13,7 +13,6 @@
 #include <linux/console.h>

 #include <acpi/reboot.h>
-#include <asm/compiler.h>
 #include <asm/idle.h>
 #include <asm/loongarch.h>
 #include <asm/reboot.h>
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -126,7 +126,7 @@ static void __init parse_bios_table(const struct dmi_header *dm)
 	char *dmi_data = (char *)dm;

 	bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET);
-	b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET);
+	b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6;

 	if (bios_extern & LOONGSON_EFI_ENABLE)
 		set_bit(EFI_BOOT, &efi.flags);
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -278,116 +278,29 @@ void loongson3_cpu_die(unsigned int cpu)
 	mb();
 }

-/*
- * The target CPU should go to XKPRANGE (uncached area) and flush
- * ICache/DCache/VCache before the control CPU can safely disable its clock.
- */
-static void loongson3_play_dead(int *state_addr)
+void play_dead(void)
 {
-	register int val;
-	register void *addr;
+	register uint64_t addr;
 	register void (*init_fn)(void);

-	__asm__ __volatile__(
-		"   li.d %[addr], 0x8000000000000000\n"
-		"1: cacop 0x8, %[addr], 0           \n" /* flush ICache */
-		"   cacop 0x8, %[addr], 1           \n"
-		"   cacop 0x8, %[addr], 2           \n"
-		"   cacop 0x8, %[addr], 3           \n"
-		"   cacop 0x9, %[addr], 0           \n" /* flush DCache */
-		"   cacop 0x9, %[addr], 1           \n"
-		"   cacop 0x9, %[addr], 2           \n"
-		"   cacop 0x9, %[addr], 3           \n"
-		"   addi.w %[sets], %[sets], -1     \n"
-		"   addi.d %[addr], %[addr], 0x40   \n"
-		"   bnez %[sets], 1b                \n"
-		"   li.d %[addr], 0x8000000000000000\n"
-		"2: cacop 0xa, %[addr], 0           \n" /* flush VCache */
-		"   cacop 0xa, %[addr], 1           \n"
-		"   cacop 0xa, %[addr], 2           \n"
-		"   cacop 0xa, %[addr], 3           \n"
-		"   cacop 0xa, %[addr], 4           \n"
-		"   cacop 0xa, %[addr], 5           \n"
-		"   cacop 0xa, %[addr], 6           \n"
-		"   cacop 0xa, %[addr], 7           \n"
-		"   cacop 0xa, %[addr], 8           \n"
-		"   cacop 0xa, %[addr], 9           \n"
-		"   cacop 0xa, %[addr], 10          \n"
-		"   cacop 0xa, %[addr], 11          \n"
-		"   cacop 0xa, %[addr], 12          \n"
-		"   cacop 0xa, %[addr], 13          \n"
-		"   cacop 0xa, %[addr], 14          \n"
-		"   cacop 0xa, %[addr], 15          \n"
-		"   addi.w %[vsets], %[vsets], -1   \n"
-		"   addi.d %[addr], %[addr], 0x40   \n"
-		"   bnez   %[vsets], 2b             \n"
-		"   li.w   %[val], 0x7              \n" /* *state_addr = CPU_DEAD; */
-		"   st.w   %[val], %[state_addr], 0 \n"
-		"   dbar 0                          \n"
-		"   cacop 0x11, %[state_addr], 0    \n" /* flush entry of *state_addr */
-		: [addr] "=&r" (addr), [val] "=&r" (val)
-		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
-		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
-
+	idle_task_exit();
 	local_irq_enable();
-	change_csr_ecfg(ECFG0_IM, ECFGF_IPI);
+	set_csr_ecfg(ECFGF_IPI);
+	__this_cpu_write(cpu_state, CPU_DEAD);

-	__asm__ __volatile__(
-		"   idle      0			    \n"
-		"   li.w      $t0, 0x1020	    \n"
-		"   iocsrrd.d %[init_fn], $t0	    \n" /* Get init PC */
-		: [init_fn] "=&r" (addr)
-		: /* No Input */
-		: "a0");
-	init_fn = __va(addr);
+	__smp_mb();
+	do {
+		__asm__ __volatile__("idle 0\n\t");
+		addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
+	} while (addr == 0);
+
+	init_fn = (void *)TO_CACHE(addr);
+	iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);

 	init_fn();
 	unreachable();
 }

-void play_dead(void)
-{
-	int *state_addr;
-	unsigned int cpu = smp_processor_id();
-	void (*play_dead_uncached)(int *s);
-
-	idle_task_exit();
-	play_dead_uncached = (void *)TO_UNCACHE(__pa((unsigned long)loongson3_play_dead));
-	state_addr = &per_cpu(cpu_state, cpu);
-	mb();
-	play_dead_uncached(state_addr);
-}
-
-static int loongson3_enable_clock(unsigned int cpu)
-{
-	uint64_t core_id = cpu_data[cpu].core;
-	uint64_t package_id = cpu_data[cpu].package;
-
-	LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
-
-	return 0;
-}
-
-static int loongson3_disable_clock(unsigned int cpu)
-{
-	uint64_t core_id = cpu_data[cpu].core;
-	uint64_t package_id = cpu_data[cpu].package;
-
-	LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
-
-	return 0;
-}
-
-static int register_loongson3_notifier(void)
-{
-	return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE,
-					 "loongarch/loongson:prepare",
-					 loongson3_enable_clock,
-					 loongson3_disable_clock);
-}
-early_initcall(register_loongson3_notifier);
-
 #endif

 /*
--- a/arch/loongarch/kernel/switch.S
+++ b/arch/loongarch/kernel/switch.S
@@ -24,8 +24,8 @@ SYM_FUNC_START(__switch_to)
 	move	tp, a2
 	cpu_restore_nonscratch a1

-	li.w	t0, _THREAD_SIZE - 32
-	PTR_ADD	t0, t0, tp
+	li.w		t0, _THREAD_SIZE - 32
+	PTR_ADD		t0, t0, tp
 	set_saved_sp	t0, t1, t2

 	ldptr.d	t1, a1, THREAD_CSRPRMD
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(__clear_user)
 1:	st.b	zero, a0, 0
 	addi.d	a0, a0, 1
 	addi.d	a1, a1, -1
-	bgt	a1, zero, 1b
+	bgtz	a1, 1b

 2:	move	a0, a1
 	jr	ra
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -35,7 +35,7 @@ SYM_FUNC_START(__copy_user)
 	addi.d	a0, a0, 1
 	addi.d	a1, a1, 1
 	addi.d	a2, a2, -1
-	bgt	a2, zero, 1b
+	bgtz	a2, 1b

 3:	move	a0, a2
 	jr	ra
--- a/arch/loongarch/lib/delay.c
+++ b/arch/loongarch/lib/delay.c
@@ -7,7 +7,6 @@
 #include <linux/smp.h>
 #include <linux/timex.h>

-#include <asm/compiler.h>
 #include <asm/processor.h>

 void __delay(unsigned long cycles)
--- a/arch/loongarch/mm/page.S
+++ b/arch/loongarch/mm/page.S
@@ -10,75 +10,75 @@

 	.align 5
 SYM_FUNC_START(clear_page)
-	lu12i.w  t0, 1 << (PAGE_SHIFT - 12)
-	add.d    t0, t0, a0
+	lu12i.w	t0, 1 << (PAGE_SHIFT - 12)
+	add.d	t0, t0, a0
 1:
-	st.d     zero, a0, 0
-	st.d     zero, a0, 8
-	st.d     zero, a0, 16
-	st.d     zero, a0, 24
-	st.d     zero, a0, 32
-	st.d     zero, a0, 40
-	st.d     zero, a0, 48
-	st.d     zero, a0, 56
-	addi.d   a0,   a0, 128
-	st.d     zero, a0, -64
-	st.d     zero, a0, -56
-	st.d     zero, a0, -48
-	st.d     zero, a0, -40
-	st.d     zero, a0, -32
-	st.d     zero, a0, -24
-	st.d     zero, a0, -16
-	st.d     zero, a0, -8
-	bne      t0,   a0, 1b
+	st.d	zero, a0, 0
+	st.d	zero, a0, 8
+	st.d	zero, a0, 16
+	st.d	zero, a0, 24
+	st.d	zero, a0, 32
+	st.d	zero, a0, 40
+	st.d	zero, a0, 48
+	st.d	zero, a0, 56
+	addi.d	a0,   a0, 128
+	st.d	zero, a0, -64
+	st.d	zero, a0, -56
+	st.d	zero, a0, -48
+	st.d	zero, a0, -40
+	st.d	zero, a0, -32
+	st.d	zero, a0, -24
+	st.d	zero, a0, -16
+	st.d	zero, a0, -8
+	bne	t0,   a0, 1b

-	jirl     $r0, ra, 0
+	jr	ra
 SYM_FUNC_END(clear_page)
 EXPORT_SYMBOL(clear_page)

 .align 5
 SYM_FUNC_START(copy_page)
-	lu12i.w  t8, 1 << (PAGE_SHIFT - 12)
-	add.d    t8, t8, a0
+	lu12i.w	t8, 1 << (PAGE_SHIFT - 12)
+	add.d	t8, t8, a0
 1:
-	ld.d     t0, a1,  0
-	ld.d     t1, a1,  8
-	ld.d     t2, a1,  16
-	ld.d     t3, a1,  24
-	ld.d     t4, a1,  32
-	ld.d     t5, a1,  40
-	ld.d     t6, a1,  48
-	ld.d     t7, a1,  56
+	ld.d	t0, a1, 0
+	ld.d	t1, a1, 8
+	ld.d	t2, a1, 16
+	ld.d	t3, a1, 24
+	ld.d	t4, a1, 32
+	ld.d	t5, a1, 40
+	ld.d	t6, a1, 48
+	ld.d	t7, a1, 56

-	st.d     t0, a0,  0
-	st.d     t1, a0,  8
-	ld.d     t0, a1,  64
-	ld.d     t1, a1,  72
-	st.d     t2, a0,  16
-	st.d     t3, a0,  24
-	ld.d     t2, a1,  80
-	ld.d     t3, a1,  88
-	st.d     t4, a0,  32
-	st.d     t5, a0,  40
-	ld.d     t4, a1,  96
-	ld.d     t5, a1,  104
-	st.d     t6, a0,  48
-	st.d     t7, a0,  56
-	ld.d     t6, a1,  112
-	ld.d     t7, a1,  120
-	addi.d   a0, a0,  128
-	addi.d   a1, a1,  128
+	st.d	t0, a0, 0
+	st.d	t1, a0, 8
+	ld.d	t0, a1, 64
+	ld.d	t1, a1, 72
+	st.d	t2, a0, 16
+	st.d	t3, a0, 24
+	ld.d	t2, a1, 80
+	ld.d	t3, a1, 88
+	st.d	t4, a0, 32
+	st.d	t5, a0, 40
+	ld.d	t4, a1, 96
+	ld.d	t5, a1, 104
+	st.d	t6, a0, 48
+	st.d	t7, a0, 56
+	ld.d	t6, a1, 112
+	ld.d	t7, a1, 120
+	addi.d	a0, a0, 128
+	addi.d	a1, a1, 128

-	st.d     t0, a0,  -64
-	st.d     t1, a0,  -56
-	st.d     t2, a0,  -48
-	st.d     t3, a0,  -40
-	st.d     t4, a0,  -32
-	st.d     t5, a0,  -24
-	st.d     t6, a0,  -16
-	st.d     t7, a0,  -8
+	st.d	t0, a0, -64
+	st.d	t1, a0, -56
+	st.d	t2, a0, -48
+	st.d	t3, a0, -40
+	st.d	t4, a0, -32
+	st.d	t5, a0, -24
+	st.d	t6, a0, -16
+	st.d	t7, a0, -8

-	bne      t8, a0, 1b
-	jirl     $r0, ra, 0
+	bne	t8, a0, 1b
+	jr	ra
 SYM_FUNC_END(copy_page)
 EXPORT_SYMBOL(copy_page)
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -18,7 +18,7 @@
 	REG_S	a2, sp, PT_BVADDR
 	li.w	a1, \write
 	la.abs	t0, do_page_fault
-	jirl    ra, t0, 0
+	jirl	ra, t0, 0
 	RESTORE_ALL_AND_RET
 	SYM_FUNC_END(tlb_do_page_fault_\write)
 	.endm
@@ -34,7 +34,7 @@ SYM_FUNC_START(handle_tlb_protect)
 	csrrd	a2, LOONGARCH_CSR_BADV
 	REG_S	a2, sp, PT_BVADDR
 	la.abs	t0, do_page_fault
-	jirl    ra, t0, 0
+	jirl	ra, t0, 0
 	RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_tlb_protect)

@@ -47,7 +47,7 @@ SYM_FUNC_START(handle_tlb_load)
 	 * The vmalloc handling is not in the hotpath.
 	 */
 	csrrd	t0, LOONGARCH_CSR_BADV
-	blt	t0, $r0, vmalloc_load
+	bltz	t0, vmalloc_load
 	csrrd	t1, LOONGARCH_CSR_PGDL

 vmalloc_done_load:
@@ -80,7 +80,7 @@ vmalloc_done_load:
 	 * see if we need to jump to huge tlb processing.
 	 */
 	andi	t0, ra, _PAGE_HUGE
-	bne	t0, $r0, tlb_huge_update_load
+	bnez	t0, tlb_huge_update_load

 	csrrd	t0, LOONGARCH_CSR_BADV
 	srli.d	t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -100,12 +100,12 @@ smp_pgtable_change_load:

 	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
 	andi	ra, ra, 1
-	beq	ra, $r0, nopage_tlb_load
+	beqz	ra, nopage_tlb_load

 	ori	t0, t0, _PAGE_VALID
 #ifdef CONFIG_SMP
 	sc.d	t0, t1, 0
-	beq	t0, $r0, smp_pgtable_change_load
+	beqz	t0, smp_pgtable_change_load
 #else
 	st.d	t0, t1, 0
 #endif
@@ -139,23 +139,23 @@ tlb_huge_update_load:
 #endif
 	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
 	andi	ra, ra, 1
-	beq	ra, $r0, nopage_tlb_load
+	beqz	ra, nopage_tlb_load
 	tlbsrch

 	ori	t0, t0, _PAGE_VALID
 #ifdef CONFIG_SMP
 	sc.d	t0, t1, 0
-	beq	t0, $r0, tlb_huge_update_load
+	beqz	t0, tlb_huge_update_load
 	ld.d	t0, t1, 0
 #else
 	st.d	t0, t1, 0
 #endif
-	addu16i.d	t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
-	addi.d	ra, t1, 0
-	csrxchg	ra, t1, LOONGARCH_CSR_TLBIDX
+	addu16i.d	t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+	addi.d		ra, t1, 0
+	csrxchg		ra, t1, LOONGARCH_CSR_TLBIDX
 	tlbwr

-	csrxchg	$r0, t1, LOONGARCH_CSR_TLBIDX
+	csrxchg	zero, t1, LOONGARCH_CSR_TLBIDX

 	/*
 	 * A huge PTE describes an area the size of the
@@ -178,27 +178,27 @@ tlb_huge_update_load:
 	addi.d	t0, ra, 0

 	/* Convert to entrylo1 */
-	addi.d	t1, $r0, 1
+	addi.d	t1, zero, 1
 	slli.d	t1, t1, (HPAGE_SHIFT - 1)
 	add.d	t0, t0, t1
 	csrwr	t0, LOONGARCH_CSR_TLBELO1

 	/* Set huge page tlb entry size */
-	addu16i.d	t0, $r0, (CSR_TLBIDX_PS >> 16)
-	addu16i.d	t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
+	addu16i.d	t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

 	tlbfill

-	addu16i.d	t0, $r0, (CSR_TLBIDX_PS >> 16)
-	addu16i.d	t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
+	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

 nopage_tlb_load:
 	dbar	0
 	csrrd	ra, EXCEPTION_KS2
 	la.abs	t0, tlb_do_page_fault_0
-	jirl	$r0, t0, 0
+	jr	t0
 SYM_FUNC_END(handle_tlb_load)

 SYM_FUNC_START(handle_tlb_store)
@@ -210,7 +210,7 @@ SYM_FUNC_START(handle_tlb_store)
 	 * The vmalloc handling is not in the hotpath.
 	 */
 	csrrd	t0, LOONGARCH_CSR_BADV
-	blt	t0, $r0, vmalloc_store
+	bltz	t0, vmalloc_store
 	csrrd	t1, LOONGARCH_CSR_PGDL

 vmalloc_done_store:
@@ -244,7 +244,7 @@ vmalloc_done_store:
 	 * see if we need to jump to huge tlb processing.
 	 */
 	andi	t0, ra, _PAGE_HUGE
-	bne	t0, $r0, tlb_huge_update_store
+	bnez	t0, tlb_huge_update_store

 	csrrd	t0, LOONGARCH_CSR_BADV
 	srli.d	t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -265,12 +265,12 @@ smp_pgtable_change_store:
 	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
 	andi	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
 	xori	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-	bne	ra, $r0, nopage_tlb_store
+	bnez	ra, nopage_tlb_store

 	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #ifdef CONFIG_SMP
 	sc.d	t0, t1, 0
-	beq	t0, $r0, smp_pgtable_change_store
+	beqz	t0, smp_pgtable_change_store
 #else
 	st.d	t0, t1, 0
 #endif
@@ -306,24 +306,24 @@ tlb_huge_update_store:
 	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
 	andi	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
 	xori	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-	bne	ra, $r0, nopage_tlb_store
+	bnez	ra, nopage_tlb_store

 	tlbsrch
 	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)

 #ifdef CONFIG_SMP
 	sc.d	t0, t1, 0
-	beq	t0, $r0, tlb_huge_update_store
+	beqz	t0, tlb_huge_update_store
 	ld.d	t0, t1, 0
 #else
 	st.d	t0, t1, 0
 #endif
-	addu16i.d	t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
-	addi.d	ra, t1, 0
-	csrxchg	ra, t1, LOONGARCH_CSR_TLBIDX
+	addu16i.d	t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+	addi.d		ra, t1, 0
+	csrxchg		ra, t1, LOONGARCH_CSR_TLBIDX
 	tlbwr

-	csrxchg	$r0, t1, LOONGARCH_CSR_TLBIDX
+	csrxchg	zero, t1, LOONGARCH_CSR_TLBIDX
 	/*
 	 * A huge PTE describes an area the size of the
 	 * configured huge page size. This is twice the
@@ -345,28 +345,28 @@ tlb_huge_update_store:
 	addi.d	t0, ra, 0

 	/* Convert to entrylo1 */
-	addi.d	t1, $r0, 1
+	addi.d	t1, zero, 1
 	slli.d	t1, t1, (HPAGE_SHIFT - 1)
 	add.d	t0, t0, t1
 	csrwr	t0, LOONGARCH_CSR_TLBELO1

 	/* Set huge page tlb entry size */
-	addu16i.d	t0, $r0, (CSR_TLBIDX_PS >> 16)
-	addu16i.d	t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
+	addu16i.d	t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

 	tlbfill

 	/* Reset default page size */
-	addu16i.d	t0, $r0, (CSR_TLBIDX_PS >> 16)
-	addu16i.d	t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
+	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

 nopage_tlb_store:
 	dbar	0
 	csrrd	ra, EXCEPTION_KS2
 	la.abs	t0, tlb_do_page_fault_1
-	jirl	$r0, t0, 0
+	jr	t0
 SYM_FUNC_END(handle_tlb_store)

 SYM_FUNC_START(handle_tlb_modify)
@@ -378,7 +378,7 @@ SYM_FUNC_START(handle_tlb_modify)
 	 * The vmalloc handling is not in the hotpath.
 	 */
 	csrrd	t0, LOONGARCH_CSR_BADV
-	blt	t0, $r0, vmalloc_modify
+	bltz	t0, vmalloc_modify
 	csrrd	t1, LOONGARCH_CSR_PGDL

 vmalloc_done_modify:
@@ -411,7 +411,7 @@ vmalloc_done_modify:
 	 * see if we need to jump to huge tlb processing.
 	 */
 	andi	t0, ra, _PAGE_HUGE
-	bne	t0, $r0, tlb_huge_update_modify
+	bnez	t0, tlb_huge_update_modify

 	csrrd	t0, LOONGARCH_CSR_BADV
 	srli.d	t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -431,12 +431,12 @@ smp_pgtable_change_modify:

 	srli.d	ra, t0, _PAGE_WRITE_SHIFT
 	andi	ra, ra, 1
-	beq	ra, $r0, nopage_tlb_modify
+	beqz	ra, nopage_tlb_modify

 	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #ifdef CONFIG_SMP
 	sc.d	t0, t1, 0
-	beq	t0, $r0, smp_pgtable_change_modify
+	beqz	t0, smp_pgtable_change_modify
 #else
 	st.d	t0, t1, 0
 #endif
@@ -454,7 +454,7 @@ leave_modify:
 	ertn
 #ifdef CONFIG_64BIT
 vmalloc_modify:
-	la.abs  t1, swapper_pg_dir
+	la.abs	t1, swapper_pg_dir
 	b	vmalloc_done_modify
 #endif

@@ -471,14 +471,14 @@ tlb_huge_update_modify:

 	srli.d	ra, t0, _PAGE_WRITE_SHIFT
 	andi	ra, ra, 1
-	beq	ra, $r0, nopage_tlb_modify
+	beqz	ra, nopage_tlb_modify

 	tlbsrch
 	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)

 #ifdef CONFIG_SMP
 	sc.d	t0, t1, 0
-	beq	t0, $r0, tlb_huge_update_modify
+	beqz	t0, tlb_huge_update_modify
 	ld.d	t0, t1, 0
 #else
 	st.d	t0, t1, 0
@@ -504,28 +504,28 @@ tlb_huge_update_modify:
 	addi.d	t0, ra, 0

 	/* Convert to entrylo1 */
-	addi.d	t1, $r0, 1
+	addi.d	t1, zero, 1
 	slli.d	t1, t1, (HPAGE_SHIFT - 1)
 	add.d	t0, t0, t1
 	csrwr	t0, LOONGARCH_CSR_TLBELO1

 	/* Set huge page tlb entry size */
-	addu16i.d	t0, $r0, (CSR_TLBIDX_PS >> 16)
-	addu16i.d	t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
-	csrxchg	t1, t0, LOONGARCH_CSR_TLBIDX
+	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
+	addu16i.d	t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

 	tlbwr

 	/* Reset default page size */
-	addu16i.d	t0, $r0, (CSR_TLBIDX_PS >> 16)
-	addu16i.d	t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
-	csrxchg	t1, t0, LOONGARCH_CSR_TLBIDX
+	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
+	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

 nopage_tlb_modify:
 	dbar	0
 	csrrd	ra, EXCEPTION_KS2
 	la.abs	t0, tlb_do_page_fault_1
-	jirl	$r0, t0, 0
+	jr	t0
 SYM_FUNC_END(handle_tlb_modify)

 SYM_FUNC_START(handle_tlb_refill)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -256,6 +256,7 @@ config PPC
 	select IRQ_FORCED_THREADING
 	select MMU_GATHER_PAGE_SIZE
 	select MMU_GATHER_RCU_TABLE_FREE
+	select MMU_GATHER_MERGE_VMAS
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK	if PPC64
@@ -281,6 +282,10 @@ config PPC
 	# Please keep this list sorted alphabetically.
 	#

+config PPC_LONG_DOUBLE_128
+	depends on PPC64
+	def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
+
 config PPC_BARRIER_NOSPEC
 	bool
 	default y
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -19,8 +19,6 @@

 #include <linux/pagemap.h>

-#define tlb_start_vma(tlb, vma)	do { } while (0)
-#define tlb_end_vma(tlb, vma)	do { } while (0)
 #define __tlb_remove_tlb_entry	__tlb_remove_tlb_entry

 #define tlb_flush tlb_flush
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -20,6 +20,7 @@ CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 CFLAGS_prom_init.o += -fno-stack-protector
 CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)

 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -73,6 +73,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
 endif

 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
+KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)

 # GCC versions that support the "-mstrict-align" option default to allowing
 # unaligned accesses.  While unaligned accesses are explicitly allowed in the
@@ -110,7 +111,7 @@ PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 	$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
-		$(build)=arch/riscv/kernel/compat_vdso $@)
+		$(build)=arch/riscv/kernel/compat_vdso compat_$@)

 ifeq ($(KBUILD_EXTMOD),)
 ifeq ($(CONFIG_MMU),y)
--- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -35,7 +35,7 @@
 	gpio-keys {
 		compatible = "gpio-keys";

-		key0 {
+		key {
 			label = "KEY0";
 			linux,code = <BTN_0>;
 			gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -47,7 +47,7 @@
 	gpio-keys {
 		compatible = "gpio-keys";

-		boot {
+		key-boot {
 			label = "BOOT";
 			linux,code = <BTN_0>;
 			gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -52,7 +52,7 @@
 	gpio-keys {
 		compatible = "gpio-keys";

-		boot {
+		key-boot {
 			label = "BOOT";
 			linux,code = <BTN_0>;
 			gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -46,19 +46,19 @@
 	gpio-keys {
 		compatible = "gpio-keys";

-		up {
+		key-up {
 			label = "UP";
 			linux,code = <BTN_1>;
 			gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
 		};

-		press {
+		key-press {
 			label = "PRESS";
 			linux,code = <BTN_0>;
 			gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
 		};

-		down {
+		key-down {
 			label = "DOWN";
 			linux,code = <BTN_2>;
 			gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -23,7 +23,7 @@
 	gpio-keys {
 		compatible = "gpio-keys";

-		boot {
+		key-boot {
 			label = "BOOT";
 			linux,code = <BTN_0>;
 			gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -78,7 +78,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o
 endif
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu-hotplug.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
-obj-$(CONFIG_KEXEC)		+= kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_KEXEC_CORE)	+= kexec_relocate.o crash_save_regs.o machine_kexec.o
 obj-$(CONFIG_KEXEC_FILE)	+= elf_kexec.o machine_kexec_file.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o

--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -349,7 +349,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
 {
 	const char *strtab, *name, *shstrtab;
 	const Elf_Shdr *sechdrs;
-	Elf_Rela *relas;
+	Elf64_Rela *relas;
 	int i, r_type;

 	/* String & section header string table */
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -204,6 +204,7 @@ config S390
 	select IOMMU_SUPPORT		if PCI
 	select MMU_GATHER_NO_GATHER
 	select MMU_GATHER_RCU_TABLE_FREE
+	select MMU_GATHER_MERGE_VMAS
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE	if PCI
 	select NEED_SG_DMA_LENGTH	if PCI
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -2,7 +2,7 @@
 /*
 * Kernel interface for the s390 arch_random_* functions
 *
- * Copyright IBM Corp. 2017, 2020
+ * Copyright IBM Corp. 2017, 2022
 *
 * Author: Harald Freudenberger <freude@de.ibm.com>
 *
@@ -14,6 +14,7 @@
 #ifdef CONFIG_ARCH_RANDOM

 #include <linux/static_key.h>
+#include <linux/preempt.h>
 #include <linux/atomic.h>
 #include <asm/cpacf.h>

@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)

 static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
 {
-	if (static_branch_likely(&s390_arch_random_available)) {
+	if (static_branch_likely(&s390_arch_random_available) &&
+	    in_task()) {
 		cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
 		atomic64_add(sizeof(*v), &s390_arch_random_counter);
 		return true;
@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)

 static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
 {
-	if (static_branch_likely(&s390_arch_random_available)) {
+	if (static_branch_likely(&s390_arch_random_available) &&
+	    in_task()) {
 		cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
 		atomic64_add(sizeof(*v), &s390_arch_random_counter);
 		return true;
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 					  struct page *page, int page_size);

-#define tlb_start_vma(tlb, vma)			do { } while (0)
-#define tlb_end_vma(tlb, vma)			do { } while (0)
-
 #define tlb_flush tlb_flush
 #define pte_free_tlb pte_free_tlb
 #define pmd_free_tlb pmd_free_tlb
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -67,6 +67,8 @@ config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select MMU_GATHER_RCU_TABLE_FREE if SMP
+	select MMU_GATHER_MERGE_VMAS
+	select MMU_GATHER_NO_FLUSH_CACHE
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm);
 void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
 void flush_tlb_pending(void);

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma)	do { } while (0)
 #define tlb_flush(tlb)	flush_tlb_pending()

 /*
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -245,6 +245,7 @@ config X86
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select MMU_GATHER_RCU_TABLE_FREE	if PARAVIRT
+	select MMU_GATHER_MERGE_VMAS
 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if UNWINDER_ORC || STACK_VALIDATION
@@ -2473,7 +2474,7 @@ config RETHUNK
 	bool "Enable return-thunks"
 	depends on RETPOLINE && CC_HAS_RETURN_THUNK
 	select OBJTOOL if HAVE_OBJTOOL
-	default y
+	default y if X86_64
 	help
 	  Compile the kernel with the return-thunks compiler option to guard
 	  against kernel-to-user data leaks by avoiding return speculation.
@@ -2482,21 +2483,21 @@ config RETHUNK

 config CPU_UNRET_ENTRY
 	bool "Enable UNRET on kernel entry"
-	depends on CPU_SUP_AMD && RETHUNK
+	depends on CPU_SUP_AMD && RETHUNK && X86_64
 	default y
 	help
 	  Compile the kernel with support for the retbleed=unret mitigation.

 config CPU_IBPB_ENTRY
 	bool "Enable IBPB on kernel entry"
-	depends on CPU_SUP_AMD
+	depends on CPU_SUP_AMD && X86_64
 	default y
 	help
 	  Compile the kernel with support for the retbleed=ibpb mitigation.

 config CPU_IBRS_ENTRY
 	bool "Enable IBRS on kernel entry"
-	depends on CPU_SUP_INTEL
+	depends on CPU_SUP_INTEL && X86_64
 	default y
 	help
 	  Compile the kernel with support for the spectre_v2=ibrs mitigation.
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,6 +27,7 @@ RETHUNK_CFLAGS		:= -mfunction-return=thunk-extern
 RETPOLINE_CFLAGS	+= $(RETHUNK_CFLAGS)
 endif

+export RETHUNK_CFLAGS
 export RETPOLINE_CFLAGS
 export RETPOLINE_VDSO_CFLAGS

--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -278,9 +278,9 @@ enum {
 };

 /*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
 *
 *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
 *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -288,7 +288,7 @@ enum {
 *
 * Therefore, if:
 *
- *   1) LBR has TSX format
+ *   1) LBR format LBR_FORMAT_EIP_FLAGS2
 *   2) CPU has no TSX support enabled
 *
 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
 	bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
 			   boot_cpu_has(X86_FEATURE_RTM);

-	return !tsx_support && x86_pmu.lbr_has_tsx;
+	return !tsx_support;
 }

 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void)
 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;

 	x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
-	if (lbr_from_signext_quirk_needed())
-		static_branch_enable(&lbr_from_quirk_key);
 }

 /* skylake */
@@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void)
 	switch (x86_pmu.intel_cap.lbr_format) {
 	case LBR_FORMAT_EIP_FLAGS2:
 		x86_pmu.lbr_has_tsx = 1;
-		fallthrough;
+		x86_pmu.lbr_from_flags = 1;
+		if (lbr_from_signext_quirk_needed())
+			static_branch_enable(&lbr_from_quirk_key);
+		break;
+
 	case LBR_FORMAT_EIP_FLAGS:
 		x86_pmu.lbr_from_flags = 1;
 		break;
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -302,6 +302,7 @@
 #define X86_FEATURE_RETPOLINE_LFENCE	(11*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_RETHUNK		(11*32+14) /* "" Use REturn THUNK */
 #define X86_FEATURE_UNRET		(11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW		(11*32+16) /* "" Use IBPB during runtime firmware calls */

 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -297,6 +297,8 @@ do {									\
 	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
 			      spec_ctrl_current() | SPEC_CTRL_IBRS,	\
 			      X86_FEATURE_USE_IBRS_FW);			\
+	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,		\
+			      X86_FEATURE_USE_IBPB_FW);			\
 } while (0)

 #define firmware_restrict_branch_speculation_end()			\
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -72,7 +72,6 @@ static inline u64 lower_bits(u64 val, unsigned int bits)

 struct real_mode_header;
 enum stack_type;
-struct ghcb;

 /* Early IDT entry points for #VC handler */
 extern void vc_no_ghcb(void);
@@ -156,11 +155,7 @@ static __always_inline void sev_es_nmi_complete(void)
 		__sev_es_nmi_complete();
 }
 extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
-					  bool set_ghcb_msr,
-					  struct es_em_ctxt *ctxt,
-					  u64 exit_code, u64 exit_info_1,
-					  u64 exit_info_2);
+
 static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
 {
 	int rc;
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,9 +2,6 @@
 #ifndef _ASM_X86_TLB_H
 #define _ASM_X86_TLB_H

-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
 #define tlb_flush tlb_flush
 static inline void tlb_flush(struct mmu_gather *tlb);

--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -555,7 +555,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
 			dest = addr + insn.length + insn.immediate.value;

 		if (__static_call_fixup(addr, op, dest) ||
-		    WARN_ON_ONCE(dest != &__x86_return_thunk))
+		    WARN_ONCE(dest != &__x86_return_thunk,
+			      "missing return thunk: %pS-%pS: %*ph",
+			      addr, dest, 5, addr))
 			continue;

 		DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -975,6 +975,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
 #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
 #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
 #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"

 #ifdef CONFIG_BPF_SYSCALL
 void unpriv_ebpf_notify(int new_state)
@@ -1415,6 +1416,8 @@ static void __init spectre_v2_select_mitigation(void)

 	case SPECTRE_V2_IBRS:
 		setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+		if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
+			pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
 		break;

 	case SPECTRE_V2_LFENCE:
@@ -1516,7 +1519,17 @@ static void __init spectre_v2_select_mitigation(void)
 	 * the CPU supports Enhanced IBRS, kernel might un-intentionally not
 	 * enable IBRS around firmware calls.
 	 */
-	if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+	if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+	    boot_cpu_has(X86_FEATURE_IBPB) &&
+	    (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+	     boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
+
+		if (retbleed_cmd != RETBLEED_CMD_IBPB) {
+			setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
+			pr_info("Enabling Speculation Barrier for firmware calls\n");
+		}
+
+	} else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
 		pr_info("Enabling Restricted Speculation for firmware calls\n");
 	}
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -219,9 +219,10 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
 	return ES_VMM_ERROR;
 }

-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
-				   struct es_em_ctxt *ctxt, u64 exit_code,
-				   u64 exit_info_1, u64 exit_info_2)
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+					  struct es_em_ctxt *ctxt,
+					  u64 exit_code, u64 exit_info_1,
+					  u64 exit_info_2)
 {
 	/* Fill in protocol and format specifiers */
 	ghcb->protocol_version = ghcb_version;
@@ -231,14 +232,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
 	ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
 	ghcb_set_sw_exit_info_2(ghcb, exit_info_2);

-	/*
-	 * Hyper-V unenlightened guests use a paravisor for communicating and
-	 * GHCB pages are being allocated and set up by that paravisor. Linux
-	 * should not change the GHCB page's physical address.
-	 */
-	if (set_ghcb_msr)
-		sev_es_wr_ghcb_msr(__pa(ghcb));
-
+	sev_es_wr_ghcb_msr(__pa(ghcb));
 	VMGEXIT();

 	return verify_exception_info(ghcb, ctxt);
@@ -795,7 +789,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
 		 */
 		sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
 		ghcb_set_sw_scratch(ghcb, sw_scratch);
-		ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO,
+		ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
 					  exit_info_1, exit_info_2);
 		if (ret != ES_OK)
 			return ret;
@@ -837,8 +831,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)

 		ghcb_set_rax(ghcb, rax);

-		ret = sev_es_ghcb_hv_call(ghcb, true, ctxt,
-					  SVM_EXIT_IOIO, exit_info_1, 0);
+		ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
 		if (ret != ES_OK)
 			return ret;

@@ -894,7 +887,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
 		/* xgetbv will cause #GP - use reset value for xcr0 */
 		ghcb_set_xcr0(ghcb, 1);

-	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
 	if (ret != ES_OK)
 		return ret;

@@ -919,7 +912,7 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
 	bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
 	enum es_result ret;

-	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
 	if (ret != ES_OK)
 		return ret;

--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -786,7 +786,7 @@ static int vmgexit_psc(struct snp_psc_desc *desc)
 		ghcb_set_sw_scratch(ghcb, (u64)__pa(data));

 		/* This will advance the shared buffer data points to. */
-		ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+		ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);

 		/*
 		 * Page State Change VMGEXIT can pass error code through
@@ -1212,8 +1212,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
 		ghcb_set_rdx(ghcb, regs->dx);
 	}

-	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR,
-				  exit_info_1, 0);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);

 	if ((ret == ES_OK) && (!exit_info_1)) {
 		regs->ax = ghcb->save.rax;
@@ -1452,7 +1451,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,

 	ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));

-	return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2);
+	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
 }

 /*
@@ -1628,7 +1627,7 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,

 	/* Using a value of 0 for ExitInfo1 means RAX holds the value */
 	ghcb_set_rax(ghcb, val);
-	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
 	if (ret != ES_OK)
 		return ret;

@@ -1658,7 +1657,7 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
 				       struct es_em_ctxt *ctxt)
 {
-	return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0);
+	return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
 }

 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
@@ -1667,7 +1666,7 @@ static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt

 	ghcb_set_rcx(ghcb, ctxt->regs->cx);

-	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
 	if (ret != ES_OK)
 		return ret;

@@ -1708,7 +1707,7 @@ static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
 	if (x86_platform.hyper.sev_es_hcall_prepare)
 		x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);

-	ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
 	if (ret != ES_OK)
 		return ret;

@@ -2197,7 +2196,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 		ghcb_set_rbx(ghcb, input->data_npages);
 	}

-	ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+	ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
 	if (ret)
 		goto e_put;

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6029,6 +6029,11 @@ split_irqchip_unlock:
 		r = 0;
 		break;
 	case KVM_CAP_X86_USER_SPACE_MSR:
+		r = -EINVAL;
+		if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
+				     KVM_MSR_EXIT_REASON_UNKNOWN |
+				     KVM_MSR_EXIT_REASON_FILTER))
+			break;
 		kvm->arch.user_space_msr_mask = cap->args[0];
 		r = 0;
 		break;
@@ -6183,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
 	if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
 		return -EFAULT;

+	if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+		return -EINVAL;
+
 	for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
 		empty &= !filter.ranges[i].nmsrs;

--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -43,6 +43,7 @@ config SYSTEM_TRUSTED_KEYRING
 	bool "Provide system-wide ring of trusted keys"
 	depends on KEYS
 	depends on ASYMMETRIC_KEY_TYPE
+	depends on X509_CERTIFICATE_PARSER
 	help
 	  Provide a system keyring to which trusted keys can be added.  Keys in
 	  the keyring are considered to be trusted.  Keys may be added at will
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -782,7 +782,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)

 					if (!osc_cpc_flexible_adr_space_confirmed) {
 						pr_debug("Flexible address space capability not supported\n");
-						goto out_free;
+						if (!cpc_supported_by_cpu())
+							goto out_free;
 					}

 					addr = ioremap(gas_t->address, gas_t->bit_width/8);
@@ -809,7 +810,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 				}
 				if (!osc_cpc_flexible_adr_space_confirmed) {
 					pr_debug("Flexible address space capability not supported\n");
-					goto out_free;
+					if (!cpc_supported_by_cpu())
+						goto out_free;
 				}
 			} else {
 				if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
--- a/drivers/clk/clk-lan966x.c
+++ b/drivers/clk/clk-lan966x.c
@@ -213,7 +213,7 @@ static int lan966x_gate_clk_register(struct device *dev,

 		hw_data->hws[i] =
 			devm_clk_hw_register_gate(dev, clk_gate_desc[idx].name,
-						  "lan966x", 0, base,
+						  "lan966x", 0, gate_base,
 						  clk_gate_desc[idx].bit_idx,
 						  0, &clk_gate_lock);

--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
@@ -138,6 +138,7 @@ static struct ccu_common *sun50i_h6_r_ccu_clks[] = {
 	&r_apb2_rsb_clk.common,
 	&r_apb1_ir_clk.common,
 	&r_apb1_w1_clk.common,
+	&r_apb1_rtc_clk.common,
 	&ir_clk.common,
 	&w1_clk.common,
 };
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -103,9 +103,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)

 	dmi_memdev_name(handle, &bank, &device);

-	/* both strings must be non-zero */
-	if (bank && *bank && device && *device)
-		snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
+	/*
+	 * Set to a NULL string when both bank and device are zero. In this case,
+	 * the label assigned by default will be preserved.
+	 */
+	snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
+		 (bank && *bank) ? bank : "",
+		 (bank && *bank && device && *device) ? " " : "",
+		 (device && *device) ? device : "");
 }

 static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -514,6 +514,28 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
 	memset(p, 0, sizeof(*p));
 }

+static void enable_intr(struct synps_edac_priv *priv)
+{
+	/* Enable UE/CE Interrupts */
+	if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+		writel(DDR_UE_MASK | DDR_CE_MASK,
+		       priv->baseaddr + ECC_CLR_OFST);
+	else
+		writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+		       priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
+
+}
+
+static void disable_intr(struct synps_edac_priv *priv)
+{
+	/* Disable UE/CE Interrupts */
+	if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+		writel(0x0, priv->baseaddr + ECC_CLR_OFST);
+	else
+		writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+		       priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+}
+
 /**
 * intr_handler - Interrupt Handler for ECC interrupts.
 * @irq:        IRQ number.
@@ -555,6 +577,9 @@ static irqreturn_t intr_handler(int irq, void *dev_id)
 	/* v3.0 of the controller does not have this register */
 	if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR))
 		writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
+	else
+		enable_intr(priv);
+
 	return IRQ_HANDLED;
 }

@@ -837,25 +862,6 @@ static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev)
 	init_csrows(mci);
 }

-static void enable_intr(struct synps_edac_priv *priv)
-{
-	/* Enable UE/CE Interrupts */
-	if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
-		writel(DDR_UE_MASK | DDR_CE_MASK,
-		       priv->baseaddr + ECC_CLR_OFST);
-	else
-		writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
-		       priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
-
-}
-
-static void disable_intr(struct synps_edac_priv *priv)
-{
-	/* Disable UE/CE Interrupts */
-	writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
-			priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
-}
-
 static int setup_irq(struct mem_ctl_info *mci,
 		     struct platform_device *pdev)
 {
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = {
 	.reg_bits = 8,
 	.val_bits = 8,

+	.use_single_read = true,
+	.use_single_write = true,
+
 	.readable_reg = pca953x_readable_register,
 	.writeable_reg = pca953x_writeable_register,
 	.volatile_reg = pca953x_volatile_register,
@@ -906,15 +909,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
 {
 	DECLARE_BITMAP(val, MAX_LINE);
+	u8 regaddr;
 	int ret;

-	ret = regcache_sync_region(chip->regmap, chip->regs->output,
-				   chip->regs->output + NBANK(chip));
+	regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
+	ret = regcache_sync_region(chip->regmap, regaddr,
+				   regaddr + NBANK(chip) - 1);
 	if (ret)
 		goto out;

-	ret = regcache_sync_region(chip->regmap, chip->regs->direction,
-				   chip->regs->direction + NBANK(chip));
+	regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
+	ret = regcache_sync_region(chip->regmap, regaddr,
+				   regaddr + NBANK(chip) - 1);
 	if (ret)
 		goto out;

@@ -1127,14 +1133,14 @@ static int pca953x_regcache_sync(struct device *dev)
 	 * sync these registers first and only then sync the rest.
 	 */
 	regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
-	ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+	ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
 	if (ret) {
 		dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret);
 		return ret;
 	}

 	regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
-	ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+	ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
 	if (ret) {
 		dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret);
 		return ret;
@@ -1144,7 +1150,7 @@ static int pca953x_regcache_sync(struct device *dev)
 	if (chip->driver_data & PCA_PCAL) {
 		regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0);
 		ret = regcache_sync_region(chip->regmap, regaddr,
-					   regaddr + NBANK(chip));
+					   regaddr + NBANK(chip) - 1);
 		if (ret) {
 			dev_err(dev, "Failed to sync INT latch registers: %d\n",
 				ret);
@@ -1153,7 +1159,7 @@ static int pca953x_regcache_sync(struct device *dev)

 		regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0);
 		ret = regcache_sync_region(chip->regmap, regaddr,
-					   regaddr + NBANK(chip));
+					   regaddr + NBANK(chip) - 1);
 		if (ret) {
 			dev_err(dev, "Failed to sync INT mask registers: %d\n",
 				ret);
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v)
 	const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5);

 	map[index] &= ~(0xFFFFFFFFul << offset);
-	map[index] |= v << offset;
+	map[index] |= (unsigned long)v << offset;
 }

 static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch)
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -421,6 +421,10 @@ out_free_lh:
 * @work: the worker that implements software debouncing
 * @sw_debounced: flag indicating if the software debouncer is active
 * @level: the current debounced physical level of the line
+ * @hdesc: the Hardware Timestamp Engine (HTE) descriptor
+ * @raw_level: the line level at the time of event
+ * @total_discard_seq: the running counter of the discarded events
+ * @last_seqno: the last sequence number before debounce period expires
 */
 struct line {
 	struct gpio_desc *desc;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1364,16 +1364,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				    struct amdgpu_vm *vm)
 {
 	struct amdkfd_process_info *process_info = vm->process_info;
-	struct amdgpu_bo *pd = vm->root.bo;

 	if (!process_info)
 		return;

-	/* Release eviction fence from PD */
-	amdgpu_bo_reserve(pd, false);
-	amdgpu_bo_fence(pd, NULL, false);
-	amdgpu_bo_unreserve(pd);
-
 	/* Update process info */
 	mutex_lock(&process_info->lock);
 	process_info->n_vms--;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
 {
 	struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
 						   rhead);
-
+	mutex_destroy(&list->bo_list_mutex);
 	kvfree(list);
 }

@@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,

 	trace_amdgpu_cs_bo_status(list->num_entries, total_size);

+	mutex_init(&list->bo_list_mutex);
 	*result = list;
 	return 0;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -47,6 +47,10 @@ struct amdgpu_bo_list {
 	struct amdgpu_bo *oa_obj;
 	unsigned first_userptr;
 	unsigned num_entries;
+
+	/* Protect access during command submission.
+	 */
+	struct mutex bo_list_mutex;
 };

 int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			return r;
 	}

+	mutex_lock(&p->bo_list->bo_list_mutex);
+
 	/* One for TTM and one for the CS job */
 	amdgpu_bo_list_for_each_entry(e, p->bo_list)
 		e->tv.num_shared = 2;
@@ -651,6 +653,7 @@ out_free_user_pages:
 			kvfree(e->user_pages);
 			e->user_pages = NULL;
 		}
+		mutex_unlock(&p->bo_list->bo_list_mutex);
 	}
 	return r;
 }
@@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 {
 	unsigned i;

-	if (error && backoff)
+	if (error && backoff) {
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
+		mutex_unlock(&parser->bo_list->bo_list_mutex);
+	}

 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 			continue;

 		r = amdgpu_vm_bo_update(adev, bo_va, false);
-		if (r)
+		if (r) {
+			mutex_unlock(&p->bo_list->bo_list_mutex);
 			return r;
+		}

 		r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
-		if (r)
+		if (r) {
+			mutex_unlock(&p->bo_list->bo_list_mutex);
 			return r;
+		}
 	}

 	r = amdgpu_vm_handle_moved(adev, vm);
@@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,

 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	mutex_unlock(&p->adev->notifier_lock);
+	mutex_unlock(&p->bo_list->bo_list_mutex);

 	return 0;

--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
 	select SND_HDA_COMPONENT if SND_HDA_CORE
-	select DRM_AMD_DC_DCN if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+	select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1653,7 +1653,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
 	adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
 #endif
-	if (dc_enable_dmub_notifications(adev->dm.dc)) {
+	if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
 		init_completion(&adev->dm.dmub_aux_transfer_done);
 		adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
 		if (!adev->dm.dmub_notify) {
@@ -1689,6 +1689,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		goto error;
 	}

+	/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+	 * It is expected that DMUB will resend any pending notifications at this point, for
+	 * example HPD from DPIA.
+	 */
+	if (dc_is_dmub_outbox_supported(adev->dm.dc))
+		dc_enable_dmub_outbox(adev->dm.dc);
+
 	/* create fake encoders for MST */
 	dm_dp_create_fake_mst_encoders(adev);

@@ -2678,9 +2685,6 @@ static int dm_resume(void *handle)
 		 */
 		link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);

-		if (dc_enable_dmub_notifications(adev->dm.dc))
-			amdgpu_dm_outbox_init(adev);
-
 		r = dm_dmub_hw_init(adev);
 		if (r)
 			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
@@ -2698,6 +2702,11 @@ static int dm_resume(void *handle)
 			}
 		}

+		if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+			amdgpu_dm_outbox_init(adev);
+			dc_enable_dmub_outbox(adev->dm.dc);
+		}
+
 		WARN_ON(!dc_commit_state(dm->dc, dc_state));

 		dm_gpureset_commit_state(dm->cached_dc_state, dm);
@@ -2719,13 +2728,15 @@ static int dm_resume(void *handle)
 	/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
 	dc_resource_state_construct(dm->dc, dm_state->context);

-	/* Re-enable outbox interrupts for DPIA. */
-	if (dc_enable_dmub_notifications(adev->dm.dc))
-		amdgpu_dm_outbox_init(adev);
-
 	/* Before powering on DC we need to re-initialize DMUB. */
 	dm_dmub_hw_resume(adev);

+	/* Re-enable outbox interrupts for DPIA. */
+	if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+		amdgpu_dm_outbox_init(adev);
+		dc_enable_dmub_outbox(adev->dm.dc);
+	}
+
 	/* power on hardware */
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);

--- a/drivers/gpu/drm/drm_gem_ttm_helper.c
+++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
@@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem,
 		     struct iosys_map *map)
 {
 	struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+	int ret;

-	return ttm_bo_vmap(bo, map);
+	dma_resv_lock(gem->resv, NULL);
+	ret = ttm_bo_vmap(bo, map);
+	dma_resv_unlock(gem->resv);
+
+	return ret;
 }
 EXPORT_SYMBOL(drm_gem_ttm_vmap);

@@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem,
 {
 	struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);

+	dma_resv_lock(gem->resv, NULL);
 	ttm_bo_vunmap(bo, map);
+	dma_resv_unlock(gem->resv);
 }
 EXPORT_SYMBOL(drm_gem_ttm_vunmap);

--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -273,10 +273,17 @@ struct intel_context {
 		u8 child_index;
 		/** @guc: GuC specific members for parallel submission */
 		struct {
-			/** @wqi_head: head pointer in work queue */
+			/** @wqi_head: cached head pointer in work queue */
 			u16 wqi_head;
-			/** @wqi_tail: tail pointer in work queue */
+			/** @wqi_tail: cached tail pointer in work queue */
 			u16 wqi_tail;
+			/** @wq_head: pointer to the actual head in work queue */
+			u32 *wq_head;
+			/** @wq_tail: pointer to the actual head in work queue */
+			u32 *wq_tail;
+			/** @wq_status: pointer to the status in work queue */
+			u32 *wq_status;
+
 			/**
 			 * @parent_page: page in context state (ce->state) used
 			 * by parent for work queue, process descriptor
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);

+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);

 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1282,10 +1282,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
 	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));

 	/*
-	 * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+	 * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
 	 * stopped, set ring stop bit and prefetch disable bit to halt CS
 	 */
-	if (GRAPHICS_VER(engine->i915) == 12)
+	if (IS_GRAPHICS_VER(engine->i915, 11, 12))
 		intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
 				      _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));

@@ -1308,6 +1308,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
 		return -ENODEV;

 	ENGINE_TRACE(engine, "\n");
+	/*
+	 * TODO: Find out why occasionally stopping the CS times out. Seen
+	 * especially with gem_eio tests.
+	 *
+	 * Occasionally trying to stop the cs times out, but does not adversely
+	 * affect functionality. The timeout is set as a config parameter that
+	 * defaults to 100ms. In most cases the follow up operation is to wait
+	 * for pending MI_FORCE_WAKES. The assumption is that this timeout is
+	 * sufficient for any pending MI_FORCEWAKEs to complete. Once root
+	 * caused, the caller must check and handle the return from this
+	 * function.
+	 */
 	if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
 		ENGINE_TRACE(engine,
 			     "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
@@ -1334,6 +1346,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }

+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+		[RCS0] = MSG_IDLE_CS,
+		[BCS0] = MSG_IDLE_BCS,
+		[VCS0] = MSG_IDLE_VCS0,
+		[VCS1] = MSG_IDLE_VCS1,
+		[VCS2] = MSG_IDLE_VCS2,
+		[VCS3] = MSG_IDLE_VCS3,
+		[VCS4] = MSG_IDLE_VCS4,
+		[VCS5] = MSG_IDLE_VCS5,
+		[VCS6] = MSG_IDLE_VCS6,
+		[VCS7] = MSG_IDLE_VCS7,
+		[VECS0] = MSG_IDLE_VECS0,
+		[VECS1] = MSG_IDLE_VECS1,
+		[VECS2] = MSG_IDLE_VECS2,
+		[VECS3] = MSG_IDLE_VECS3,
+		[CCS0] = MSG_IDLE_CS,
+		[CCS1] = MSG_IDLE_CS,
+		[CCS2] = MSG_IDLE_CS,
+		[CCS3] = MSG_IDLE_CS,
+	};
+	u32 val;
+
+	if (!_reg[engine->id].reg) {
+		drm_err(&engine->i915->drm,
+			"MSG IDLE undefined for engine id %u\n", engine->id);
+		return 0;
+	}
+
+	val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+	/* bits[29:25] & bits[13:9] >> shift */
+	return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
+{
+	int ret;
+
+	/* Ensure GPM receives fw up/down after CS is stopped */
+	udelay(1);
+
+	/* Wait for forcewake request to complete in GPM */
+	ret =  __intel_wait_for_register_fw(gt->uncore,
+					    GEN9_PWRGT_DOMAIN_STATUS,
+					    fw_mask, fw_mask, 5000, 0, NULL);
+
+	/* Ensure CS receives fw ack from GPM */
+	udelay(1);
+
+	if (ret)
+		GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
+}
+
+/*
+ * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
+ * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
+ * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
+ * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
+ * are concerned only with the gt reset here, we use a logical OR of pending
+ * forcewakeups from all reset domains and then wait for them to complete by
+ * querying PWRGT_DOMAIN_STATUS.
+ */
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
+{
+	u32 fw_pending = __cs_pending_mi_force_wakes(engine);
+
+	if (fw_pending)
+		__gpm_wait_for_fw_complete(engine->gt, fw_pending);
+}
+
 static u32
 read_subslice_reg(const struct intel_engine_cs *engine,
 		  int slice, int subslice, i915_reg_t reg)
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq)
 	i915_request_put(rq);
 }

+static u32 map_i915_prio_to_lrc_desc_prio(int prio)
+{
+	if (prio > I915_PRIORITY_NORMAL)
+		return GEN12_CTX_PRIORITY_HIGH;
+	else if (prio < I915_PRIORITY_NORMAL)
+		return GEN12_CTX_PRIORITY_LOW;
+	else
+		return GEN12_CTX_PRIORITY_NORMAL;
+}
+
 static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->context;
@@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq)

 	desc = ce->lrc.desc;
 	if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
-		desc |= lrc_desc_priority(rq_prio(rq));
+		desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq));

 	/*
 	 * WaIdleLiteRestore:bdw,skl
@@ -2958,6 +2968,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	ring_set_paused(engine, 1);
 	intel_engine_stop_cs(engine);

+	/*
+	 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+	 * to wait for any pending mi force wakeups
+	 */
+	if (IS_GRAPHICS_VER(engine->i915, 11, 12))
+		intel_engine_wait_for_pending_mi_fw(engine);
+
 	engine->execlists.reset_ccid = active_ccid(engine);
 }

--- a/Show More
+++ b/Show More