Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (3/6)

The current withdraw code duplicates the journal recovery code gfs2
already has for dealing with node failures, and it does so poorly.  That
code was added because when releasing a lockspace, we didn't have a way
to indicate that the lockspace needs recovery.  We now do have this
feature, so the current withdraw code can be removed almost entirely.
This is one of several steps towards that.

Reverts parts of commit 601ef0d52e ("gfs2: Force withdraw to replay
journals and wait for it to finish").

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
This commit is contained in:
Andreas Gruenbacher
2025-07-25 21:40:45 +02:00
parent 2aae092dc4
commit 4cee5b0f7a
4 changed files with 0 additions and 86 deletions

View File

@@ -30,8 +30,6 @@
struct workqueue_struct *gfs2_freeze_wq;
extern struct workqueue_struct *gfs2_control_wq;
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -638,55 +636,6 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
}
}
/**
* nondisk_go_callback - used to signal when a node did a withdraw
* @gl: the nondisk glock
* @remote: true if this came from a different cluster node
*
*/
static void nondisk_go_callback(struct gfs2_glock *gl, bool remote)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
/* Ignore the callback unless it's from another node, and it's the
live lock. */
if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK)
return;
/* First order of business is to cancel the demote request. We don't
* really want to demote a nondisk glock. At best it's just to inform
* us of another node's withdraw. We'll keep it in SH mode. */
clear_bit(GLF_DEMOTE, &gl->gl_flags);
clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
/* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) ||
test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags))
return;
/* We only care when a node wants us to unlock, because that means
* they want a journal recovered. */
if (gl->gl_demote_state != LM_ST_UNLOCKED)
return;
if (sdp->sd_args.ar_spectator) {
fs_warn(sdp, "Spectator node cannot recover journals.\n");
return;
}
fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n");
set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
/*
* We can't call remote_withdraw directly here or gfs2_recover_journal
* because this is called from the glock unlock function and the
* remote_withdraw needs to enqueue and dequeue the same "live" glock
* we were called from. So we queue it to the control work queue in
* lock_dlm.
*/
queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
}
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_type = LM_TYPE_META,
.go_flags = GLOF_NONDISK,
@@ -734,7 +683,6 @@ const struct gfs2_glock_operations gfs2_flock_glops = {
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_type = LM_TYPE_NONDISK,
.go_flags = GLOF_NONDISK,
.go_callback = nondisk_go_callback,
};
const struct gfs2_glock_operations gfs2_quota_glops = {

View File

@@ -597,7 +597,6 @@ enum {
SDF_SKIP_DLM_UNLOCK = 8,
SDF_FORCE_AIL_FLUSH = 9,
SDF_FREEZE_INITIATOR = 10,
SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */
SDF_KILL = 15,
SDF_EVICTING = 16,
SDF_FROZEN = 17,

View File

@@ -15,7 +15,6 @@
#include <linux/sched/signal.h>
#include "incore.h"
#include "recovery.h"
#include "util.h"
#include "sys.h"
#include "trace_gfs2.h"
@@ -395,7 +394,6 @@ static void gdlm_cancel(struct gfs2_glock *gl)
/*
* dlm/gfs2 recovery coordination using dlm_recover callbacks
*
* 0. gfs2 checks for another cluster node withdraw, needing journal replay
* 1. dlm_controld sees lockspace members change
* 2. dlm_controld blocks dlm-kernel locking activity
* 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
@@ -653,28 +651,6 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
&ls->ls_control_lksb, "control_lock");
}
/**
* remote_withdraw - react to a node withdrawing from the file system
* @sdp: The superblock
*/
static void remote_withdraw(struct gfs2_sbd *sdp)
{
struct gfs2_jdesc *jd;
int ret = 0, count = 0;
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
continue;
ret = gfs2_recover_journal(jd, true);
if (ret)
break;
count++;
}
/* Now drop the additional reference we acquired */
fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
}
static void gfs2_control_func(struct work_struct *work)
{
struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
@@ -685,13 +661,6 @@ static void gfs2_control_func(struct work_struct *work)
int recover_size;
int i, error;
/* First check for other nodes that may have done a withdraw. */
if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
remote_withdraw(sdp);
clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
return;
}
spin_lock(&ls->ls_recover_spin);
/*
* No MOUNT_DONE means we're still mounting; control_mount()

View File

@@ -84,7 +84,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
"Force AIL Flush: %d\n"
"FS Freeze Initiator: %d\n"
"FS Frozen: %d\n"
"Remote Withdraw: %d\n"
"Killing: %d\n"
"sd_log_error: %d\n"
"sd_log_flush_lock: %d\n"
@@ -114,7 +113,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
test_bit(SDF_FORCE_AIL_FLUSH, &f),
test_bit(SDF_FREEZE_INITIATOR, &f),
test_bit(SDF_FROZEN, &f),
test_bit(SDF_REMOTE_WITHDRAW, &f),
test_bit(SDF_KILL, &f),
sdp->sd_log_error,
rwsem_is_locked(&sdp->sd_log_flush_lock),