mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
staging/lustre: Replace jobid acquiring with per node setting
Insted of meddling directly in process environment variables (which is also not possible on certain platforms due to not exported symbols), create jobid_name proc file to represent this info (to be filled by job scheduler epilogue). Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> CC: Andreas Dilger <andreas.dilger@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
7bc3dfa37b
commit
76133e66b1
@@ -56,7 +56,6 @@
|
|||||||
/* check if task is running in compat mode.*/
|
/* check if task is running in compat mode.*/
|
||||||
#define current_pid() (current->pid)
|
#define current_pid() (current->pid)
|
||||||
#define current_comm() (current->comm)
|
#define current_comm() (current->comm)
|
||||||
int cfs_get_environ(const char *key, char *value, int *val_len);
|
|
||||||
|
|
||||||
typedef __u32 cfs_cap_t;
|
typedef __u32 cfs_cap_t;
|
||||||
|
|
||||||
|
|||||||
@@ -369,6 +369,7 @@ static inline void s2dhms(struct dhms *ts, time_t secs)
|
|||||||
#define JOBSTATS_JOBID_VAR_MAX_LEN 20
|
#define JOBSTATS_JOBID_VAR_MAX_LEN 20
|
||||||
#define JOBSTATS_DISABLE "disable"
|
#define JOBSTATS_DISABLE "disable"
|
||||||
#define JOBSTATS_PROCNAME_UID "procname_uid"
|
#define JOBSTATS_PROCNAME_UID "procname_uid"
|
||||||
|
#define JOBSTATS_NODELOCAL "nodelocal"
|
||||||
|
|
||||||
extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
|
extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
|
||||||
int *val, int mult);
|
int *val, int mult);
|
||||||
|
|||||||
@@ -2182,6 +2182,9 @@ void class_exit_uuidlist(void);
|
|||||||
int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen);
|
int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen);
|
||||||
int raw_name2idx(int hashtype, int count, const char *name, int namelen);
|
int raw_name2idx(int hashtype, int count, const char *name, int namelen);
|
||||||
|
|
||||||
|
/* class_obd.c */
|
||||||
|
extern char obd_jobid_node[];
|
||||||
|
|
||||||
/* prng.c */
|
/* prng.c */
|
||||||
#define ll_generate_random_uuid(uuid_out) cfs_get_random_bytes(uuid_out, sizeof(class_uuid_t))
|
#define ll_generate_random_uuid(uuid_out) cfs_get_random_bytes(uuid_out, sizeof(class_uuid_t))
|
||||||
|
|
||||||
|
|||||||
@@ -100,158 +100,6 @@ cfs_cap_t cfs_curproc_cap_pack(void)
|
|||||||
return cap;
|
return cap;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cfs_access_process_vm(struct task_struct *tsk, unsigned long addr,
|
|
||||||
void *buf, int len, int write)
|
|
||||||
{
|
|
||||||
/* Just copied from kernel for the kernels which doesn't
|
|
||||||
* have access_process_vm() exported */
|
|
||||||
struct mm_struct *mm;
|
|
||||||
struct vm_area_struct *vma;
|
|
||||||
struct page *page;
|
|
||||||
void *old_buf = buf;
|
|
||||||
|
|
||||||
mm = get_task_mm(tsk);
|
|
||||||
if (!mm)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
down_read(&mm->mmap_sem);
|
|
||||||
/* ignore errors, just check how much was successfully transferred */
|
|
||||||
while (len) {
|
|
||||||
int bytes, rc, offset;
|
|
||||||
void *maddr;
|
|
||||||
|
|
||||||
rc = get_user_pages(tsk, mm, addr, 1,
|
|
||||||
write, 1, &page, &vma);
|
|
||||||
if (rc <= 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
bytes = len;
|
|
||||||
offset = addr & (PAGE_SIZE-1);
|
|
||||||
if (bytes > PAGE_SIZE-offset)
|
|
||||||
bytes = PAGE_SIZE-offset;
|
|
||||||
|
|
||||||
maddr = kmap(page);
|
|
||||||
if (write) {
|
|
||||||
copy_to_user_page(vma, page, addr,
|
|
||||||
maddr + offset, buf, bytes);
|
|
||||||
set_page_dirty_lock(page);
|
|
||||||
} else {
|
|
||||||
copy_from_user_page(vma, page, addr,
|
|
||||||
buf, maddr + offset, bytes);
|
|
||||||
}
|
|
||||||
kunmap(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
len -= bytes;
|
|
||||||
buf += bytes;
|
|
||||||
addr += bytes;
|
|
||||||
}
|
|
||||||
up_read(&mm->mmap_sem);
|
|
||||||
mmput(mm);
|
|
||||||
|
|
||||||
return buf - old_buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Read the environment variable of current process specified by @key. */
|
|
||||||
int cfs_get_environ(const char *key, char *value, int *val_len)
|
|
||||||
{
|
|
||||||
struct mm_struct *mm;
|
|
||||||
char *buffer, *tmp_buf = NULL;
|
|
||||||
int buf_len = PAGE_CACHE_SIZE;
|
|
||||||
int key_len = strlen(key);
|
|
||||||
unsigned long addr;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
buffer = kmalloc(buf_len, GFP_USER);
|
|
||||||
if (!buffer)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
mm = get_task_mm(current);
|
|
||||||
if (!mm) {
|
|
||||||
kfree(buffer);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
|
|
||||||
* which is already holding mmap_sem for writes. If some other
|
|
||||||
* thread gets the write lock in the meantime, this thread will
|
|
||||||
* block, but at least it won't deadlock on itself. LU-1735 */
|
|
||||||
if (down_read_trylock(&mm->mmap_sem) == 0) {
|
|
||||||
kfree(buffer);
|
|
||||||
return -EDEADLK;
|
|
||||||
}
|
|
||||||
up_read(&mm->mmap_sem);
|
|
||||||
|
|
||||||
addr = mm->env_start;
|
|
||||||
while (addr < mm->env_end) {
|
|
||||||
int this_len, retval, scan_len;
|
|
||||||
char *env_start, *env_end;
|
|
||||||
|
|
||||||
memset(buffer, 0, buf_len);
|
|
||||||
|
|
||||||
this_len = min_t(int, mm->env_end - addr, buf_len);
|
|
||||||
retval = cfs_access_process_vm(current, addr, buffer,
|
|
||||||
this_len, 0);
|
|
||||||
if (retval != this_len)
|
|
||||||
break;
|
|
||||||
|
|
||||||
addr += retval;
|
|
||||||
|
|
||||||
/* Parse the buffer to find out the specified key/value pair.
|
|
||||||
* The "key=value" entries are separated by '\0'. */
|
|
||||||
env_start = buffer;
|
|
||||||
scan_len = this_len;
|
|
||||||
while (scan_len) {
|
|
||||||
char *entry;
|
|
||||||
int entry_len;
|
|
||||||
|
|
||||||
env_end = memscan(env_start, '\0', scan_len);
|
|
||||||
LASSERT(env_end >= env_start &&
|
|
||||||
env_end <= env_start + scan_len);
|
|
||||||
|
|
||||||
/* The last entry of this buffer cross the buffer
|
|
||||||
* boundary, reread it in next cycle. */
|
|
||||||
if (unlikely(env_end - env_start == scan_len)) {
|
|
||||||
/* This entry is too large to fit in buffer */
|
|
||||||
if (unlikely(scan_len == this_len)) {
|
|
||||||
CERROR("Too long env variable.\n");
|
|
||||||
GOTO(out, rc = -EINVAL);
|
|
||||||
}
|
|
||||||
addr -= scan_len;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
entry = env_start;
|
|
||||||
entry_len = env_end - env_start;
|
|
||||||
|
|
||||||
/* Key length + length of '=' */
|
|
||||||
if (entry_len > key_len + 1 &&
|
|
||||||
!memcmp(entry, key, key_len)) {
|
|
||||||
entry += key_len + 1;
|
|
||||||
entry_len -= key_len + 1;
|
|
||||||
/* The 'value' buffer passed in is too small.*/
|
|
||||||
if (entry_len >= *val_len)
|
|
||||||
GOTO(out, rc = -EOVERFLOW);
|
|
||||||
|
|
||||||
memcpy(value, entry, entry_len);
|
|
||||||
*val_len = entry_len;
|
|
||||||
GOTO(out, rc = 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
scan_len -= (env_end - env_start + 1);
|
|
||||||
env_start = env_end + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
GOTO(out, rc = -ENOENT);
|
|
||||||
|
|
||||||
out:
|
|
||||||
mmput(mm);
|
|
||||||
kfree((void *)buffer);
|
|
||||||
if (tmp_buf)
|
|
||||||
kfree((void *)tmp_buf);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(cfs_get_environ);
|
|
||||||
|
|
||||||
EXPORT_SYMBOL(cfs_cap_raise);
|
EXPORT_SYMBOL(cfs_cap_raise);
|
||||||
EXPORT_SYMBOL(cfs_cap_lower);
|
EXPORT_SYMBOL(cfs_cap_lower);
|
||||||
EXPORT_SYMBOL(cfs_cap_raised);
|
EXPORT_SYMBOL(cfs_cap_raised);
|
||||||
|
|||||||
@@ -102,23 +102,17 @@ EXPORT_SYMBOL(obd_dirty_transit_pages);
|
|||||||
char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
|
char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
|
||||||
EXPORT_SYMBOL(obd_jobid_var);
|
EXPORT_SYMBOL(obd_jobid_var);
|
||||||
|
|
||||||
/* Get jobid of current process by reading the environment variable
|
char obd_jobid_node[JOBSTATS_JOBID_SIZE + 1];
|
||||||
|
|
||||||
|
/* Get jobid of current process from stored variable or calculate
|
||||||
|
* it from pid and user_id.
|
||||||
|
*
|
||||||
|
* Historically this was also done by reading the environment variable
|
||||||
* stored in between the "env_start" & "env_end" of task struct.
|
* stored in between the "env_start" & "env_end" of task struct.
|
||||||
*
|
* This is now deprecated.
|
||||||
* TODO:
|
|
||||||
* It's better to cache the jobid for later use if there is any
|
|
||||||
* efficient way, the cl_env code probably could be reused for this
|
|
||||||
* purpose.
|
|
||||||
*
|
|
||||||
* If some job scheduler doesn't store jobid in the "env_start/end",
|
|
||||||
* then an upcall could be issued here to get the jobid by utilizing
|
|
||||||
* the userspace tools/api. Then, the jobid must be cached.
|
|
||||||
*/
|
*/
|
||||||
int lustre_get_jobid(char *jobid)
|
int lustre_get_jobid(char *jobid)
|
||||||
{
|
{
|
||||||
int jobid_len = JOBSTATS_JOBID_SIZE;
|
|
||||||
int rc = 0;
|
|
||||||
|
|
||||||
memset(jobid, 0, JOBSTATS_JOBID_SIZE);
|
memset(jobid, 0, JOBSTATS_JOBID_SIZE);
|
||||||
/* Jobstats isn't enabled */
|
/* Jobstats isn't enabled */
|
||||||
if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
|
if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
|
||||||
@@ -132,31 +126,13 @@ int lustre_get_jobid(char *jobid)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len);
|
/* Whole node dedicated to single job */
|
||||||
if (rc) {
|
if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
|
||||||
if (rc == -EOVERFLOW) {
|
strcpy(jobid, obd_jobid_node);
|
||||||
/* For the PBS_JOBID and LOADL_STEP_ID keys (which are
|
return 0;
|
||||||
* variable length strings instead of just numbers), it
|
|
||||||
* might make sense to keep the unique parts for JobID,
|
|
||||||
* instead of just returning an error. That means a
|
|
||||||
* larger temp buffer for cfs_get_environ(), then
|
|
||||||
* truncating the string at some separator to fit into
|
|
||||||
* the specified jobid_len. Fix later if needed. */
|
|
||||||
static bool printed;
|
|
||||||
if (unlikely(!printed)) {
|
|
||||||
LCONSOLE_ERROR_MSG(0x16b, "%s value too large "
|
|
||||||
"for JobID buffer (%d)\n",
|
|
||||||
obd_jobid_var, jobid_len);
|
|
||||||
printed = true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
CDEBUG((rc == -ENOENT || rc == -EINVAL ||
|
|
||||||
rc == -EDEADLK) ? D_INFO : D_ERROR,
|
|
||||||
"Get jobid for (%s) failed: rc = %d\n",
|
|
||||||
obd_jobid_var, rc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return rc;
|
|
||||||
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(lustre_get_jobid);
|
EXPORT_SYMBOL(lustre_get_jobid);
|
||||||
|
|
||||||
|
|||||||
@@ -292,6 +292,31 @@ static ssize_t obd_proc_jobid_var_seq_write(struct file *file, const char *buffe
|
|||||||
}
|
}
|
||||||
LPROC_SEQ_FOPS(obd_proc_jobid_var);
|
LPROC_SEQ_FOPS(obd_proc_jobid_var);
|
||||||
|
|
||||||
|
static int obd_proc_jobid_name_seq_show(struct seq_file *m, void *v)
|
||||||
|
{
|
||||||
|
return seq_printf(m, "%s\n", obd_jobid_var);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t obd_proc_jobid_name_seq_write(struct file *file,
|
||||||
|
const char __user *buffer,
|
||||||
|
size_t count, loff_t *off)
|
||||||
|
{
|
||||||
|
if (!count || count > JOBSTATS_JOBID_SIZE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (copy_from_user(obd_jobid_node, buffer, count))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
obd_jobid_node[count] = 0;
|
||||||
|
|
||||||
|
/* Trim the trailing '\n' if any */
|
||||||
|
if (obd_jobid_node[count - 1] == '\n')
|
||||||
|
obd_jobid_node[count - 1] = 0;
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
LPROC_SEQ_FOPS(obd_proc_jobid_name);
|
||||||
|
|
||||||
/* Root for /proc/fs/lustre */
|
/* Root for /proc/fs/lustre */
|
||||||
struct proc_dir_entry *proc_lustre_root = NULL;
|
struct proc_dir_entry *proc_lustre_root = NULL;
|
||||||
EXPORT_SYMBOL(proc_lustre_root);
|
EXPORT_SYMBOL(proc_lustre_root);
|
||||||
@@ -301,6 +326,8 @@ struct lprocfs_vars lprocfs_base[] = {
|
|||||||
{ "pinger", &obd_proc_pinger_fops },
|
{ "pinger", &obd_proc_pinger_fops },
|
||||||
{ "health_check", &obd_proc_health_fops },
|
{ "health_check", &obd_proc_health_fops },
|
||||||
{ "jobid_var", &obd_proc_jobid_var_fops },
|
{ "jobid_var", &obd_proc_jobid_var_fops },
|
||||||
|
{ .name = "jobid_name",
|
||||||
|
.fops = &obd_proc_jobid_name_fops},
|
||||||
{ 0 }
|
{ 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user