diff -urN linux-2.5.14-base/include/linux/init_task.h \ linux-2.5.14-rcu_poll_preempt/include/linux/init_task.h --- linux-2.5.14-base/include/linux/init_task.h Mon May 6 09:07:54 2002 +++ linux-2.5.14-rcu_poll_preempt/include/linux/init_task.h Tue May 7 17:23:51 2002 @@ -79,6 +79,7 @@ blocked: {{0}}, \ alloc_lock: SPIN_LOCK_UNLOCKED, \ journal_info: NULL, \ + cpu_preempt_cntr: NULL, \ } diff -urN linux-2.5.14-base/include/linux/rcupdate.h \ linux-2.5.14-rcu_poll_preempt/include/linux/rcupdate.h --- linux-2.5.14-base/include/linux/rcupdate.h Thu Jan 1 05:30:00 1970 +++ linux-2.5.14-rcu_poll_preempt/include/linux/rcupdate.h Tue May 7 17:23:51 2002 @@ -0,0 +1,71 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) International Business Machines Corp., 2001 + * + * Author: Dipankar Sarma <dipankar@in.ibm.com> + * + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> + * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#ifndef __LINUX_RCUPDATE_H +#define __LINUX_RCUPDATE_H + +#include <linux/list.h> + +/* + * Callback structure for use with call_rcu(). + */ +struct rcu_head { + struct list_head list; + void (*func)(void *obj); + void *arg; +}; + +#define RCU_HEAD_INIT(head) { LIST_HEAD_INIT(head.list), NULL, NULL } +#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head) +#define INIT_RCU_HEAD(ptr) do { \ + INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \ +} while (0) + + +extern void FASTCALL(call_rcu(struct rcu_head *head, void (*func)(void *arg), void \ *arg)); + +#ifdef CONFIG_PREEMPT +extern void FASTCALL(call_rcu_preempt(struct rcu_head *head, void (*func)(void \ *arg), void *arg)); +#else +static inline void call_rcu_preempt(struct rcu_head *head, + void (*func)(void *arg), void *arg) +{ + call_rcu(head, func, arg); +} +#endif + +extern void synchronize_kernel(void); +extern void synchronize_kernel(void); + +extern void rcu_init(void); + +#endif /* __LINUX_RCUPDATE_H */ diff -urN linux-2.5.14-base/include/linux/sched.h \ linux-2.5.14-rcu_poll_preempt/include/linux/sched.h --- linux-2.5.14-base/include/linux/sched.h Mon May 6 09:07:54 2002 +++ linux-2.5.14-rcu_poll_preempt/include/linux/sched.h Tue May 7 17:23:51 2002 @@ -28,6 +28,7 @@ #include <linux/securebits.h> #include <linux/fs_struct.h> #include <linux/compiler.h> +#include <linux/percpu.h> struct exec_domain; @@ -162,6 +163,7 @@ extern void flush_scheduled_tasks(void); extern int start_context_thread(void); extern int current_is_keventd(void); +extern void force_cpu_reschedule(int cpu); struct namespace; @@ -347,6 +349,7 @@ /* journalling filesystem info */ void *journal_info; struct dentry *proc_dentry; + atomic_t *cpu_preempt_cntr; }; extern void __put_task_struct(struct task_struct *tsk); @@ -419,6 +422,7 @@ extern struct mm_struct init_mm; extern struct task_struct *init_tasks[NR_CPUS]; +extern long cpu_quiescent __per_cpu_data; /* PID hashing. (shouldnt this be dynamic?) */ #define PIDHASH_SZ (4096 >> 2) @@ -876,6 +880,53 @@ clear_thread_flag(TIF_SIGPENDING); } +#ifdef CONFIG_PREEMPT + +extern atomic_t rcu_preempt_cntr[2] __per_cpu_data; +extern atomic_t *curr_preempt_cntr __per_cpu_data; +extern atomic_t *next_preempt_cntr __per_cpu_data; + +static inline void rcu_switch_preempt_cntr(int cpu) +{ + atomic_t *tmp; + tmp = per_cpu(curr_preempt_cntr, cpu); + per_cpu(curr_preempt_cntr, cpu) = per_cpu(next_preempt_cntr, cpu); + per_cpu(next_preempt_cntr, cpu) = tmp; + +} + +static inline void rcu_preempt_put(void) +{ + if (unlikely(current->cpu_preempt_cntr != NULL)) { + atomic_dec(current->cpu_preempt_cntr); + current->cpu_preempt_cntr = NULL; + } +} + +/* Must not be preempted */ +static inline void rcu_preempt_get(void) +{ + if (likely(current->cpu_preempt_cntr == NULL)) { + current->cpu_preempt_cntr = + this_cpu(next_preempt_cntr); + atomic_inc(current->cpu_preempt_cntr); + } +} + +static inline int rcu_cpu_preempted(int cpu) +{ + return (atomic_read(per_cpu(curr_preempt_cntr, cpu)) != 0); +} +#else + +#define rcu_init_preempt_cntr(cpu) do { } while(0) +#define rcu_switch_preempt_cntr(cpu) do { } while(0) +#define rcu_preempt_put() do { } while(0) +#define rcu_preempt_get() do { } while(0) +#define rcu_cpu_preempted(cpu) (0) + +#endif + #endif /* __KERNEL__ */ #endif diff -urN linux-2.5.14-base/init/main.c linux-2.5.14-rcu_poll_preempt/init/main.c --- linux-2.5.14-base/init/main.c Mon May 6 09:07:56 2002 +++ linux-2.5.14-rcu_poll_preempt/init/main.c Tue May 7 17:23:51 2002 @@ -28,6 +28,7 @@ #include <linux/bootmem.h> #include <linux/tty.h> #include <linux/percpu.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/bugs.h> @@ -346,6 +347,7 @@ printk("Kernel command line: %s\n", saved_command_line); parse_options(command_line); trap_init(); + rcu_init(); init_IRQ(); sched_init(); softirq_init(); diff -urN linux-2.5.14-base/kernel/Makefile \ linux-2.5.14-rcu_poll_preempt/kernel/Makefile --- linux-2.5.14-base/kernel/Makefile Mon May 6 09:07:56 2002 +++ linux-2.5.14-rcu_poll_preempt/kernel/Makefile Tue May 7 17:23:51 2002 @@ -10,12 +10,12 @@ O_TARGET := kernel.o export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \ - printk.o platform.o + printk.o platform.o rcupdate.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o context.o futex.o platform.o + signal.o sys.o kmod.o context.o futex.o platform.o rcupdate.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o diff -urN linux-2.5.14-base/kernel/exit.c linux-2.5.14-rcu_poll_preempt/kernel/exit.c --- linux-2.5.14-base/kernel/exit.c Mon May 6 09:07:59 2002 +++ linux-2.5.14-rcu_poll_preempt/kernel/exit.c Tue May 7 17:23:51 2002 @@ -550,6 +550,7 @@ tsk->exit_code = code; exit_notify(); + rcu_preempt_put(); schedule(); BUG(); /* diff -urN linux-2.5.14-base/kernel/fork.c linux-2.5.14-rcu_poll_preempt/kernel/fork.c --- linux-2.5.14-base/kernel/fork.c Mon May 6 09:07:54 2002 +++ linux-2.5.14-rcu_poll_preempt/kernel/fork.c Tue May 7 17:23:51 2002 @@ -117,6 +117,7 @@ tsk->thread_info = ti; ti->task = tsk; atomic_set(&tsk->usage,1); + tsk->cpu_preempt_cntr = NULL; return tsk; } diff -urN linux-2.5.14-base/kernel/rcupdate.c \ linux-2.5.14-rcu_poll_preempt/kernel/rcupdate.c --- linux-2.5.14-base/kernel/rcupdate.c Thu Jan 1 05:30:00 1970 +++ linux-2.5.14-rcu_poll_preempt/kernel/rcupdate.c Tue May 7 17:23:51 2002 @@ -0,0 +1,285 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) International Business Machines Corp., 2001 + * Copyright (C) Andrea Arcangeli <andrea@suse.de> SuSE, 2001 + * + * Author: Dipankar Sarma <dipankar@in.ibm.com>, + * Andrea Arcangeli <andrea@suse.de> + * + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> + * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> + +/* Definition for rcupdate internal data. */ +struct rcu_data { + spinlock_t lock; + struct list_head nxtlist; + struct list_head curlist; + struct tasklet_struct tasklet; + unsigned long qsmask; + int polling_in_progress; + long quiescent_checkpoint[NR_CPUS]; +}; + +struct rcu_data rcu_data; + +#ifdef CONFIG_PREEMPT +struct rcu_data rcu_data_preempt; +#endif + +#define RCU_quiescent(cpu) per_cpu(cpu_quiescent, cpu) + +/* + * Register a new rcu callback. This will be invoked as soon + * as all CPUs have performed a context switch or been seen in the + * idle loop or in a user process. It can be called only from + * process or BH context, however can be made to work from irq + * context too with minor code changes if necessary. + */ +void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg) +{ + head->func = func; + head->arg = arg; + + spin_lock_bh(&rcu_data.lock); + list_add(&head->list, &rcu_data.nxtlist); + spin_unlock_bh(&rcu_data.lock); + + tasklet_hi_schedule(&rcu_data.tasklet); +} + +#ifdef CONFIG_PREEMPT +/* + * Same as call_rcu except that you don't need to disable preemption + * during the read-side of the RCU critical section. Preemption is + * handled transparently here. + */ +void call_rcu_preempt(struct rcu_head *head, void (*func)(void *arg), void *arg) +{ + head->func = func; + head->arg = arg; + + spin_lock_bh(&rcu_data_preempt.lock); + list_add(&head->list, &rcu_data_preempt.nxtlist); + spin_unlock_bh(&rcu_data_preempt.lock); + + tasklet_hi_schedule(&rcu_data_preempt.tasklet); +} +static inline void rcu_setup_grace_period(struct rcu_data *rdata, int cpu) +{ + rdata->qsmask |= 1UL << cpu; + rdata->quiescent_checkpoint[cpu] = RCU_quiescent(cpu); + if (rdata == &rcu_data_preempt) + rcu_switch_preempt_cntr(cpu); + force_cpu_reschedule(cpu); +} +static inline int rcu_grace_period_complete(struct rcu_data *rdata, int cpu) +{ + if (rdata == &rcu_data) { + return ((rdata->qsmask & (1UL << cpu)) && + (rdata->quiescent_checkpoint[cpu] != + RCU_quiescent(cpu))); + } else { + return ((rdata->qsmask & (1UL << cpu)) && + (rdata->quiescent_checkpoint[cpu] != + RCU_quiescent(cpu)) && + !rcu_cpu_preempted(cpu)); + } +} +#else +static inline void rcu_setup_grace_period(struct rcu_data *rdata, int cpu) +{ + rdata->qsmask |= 1UL << cpu; + rdata->quiescent_checkpoint[cpu] = RCU_quiescent(cpu); + force_cpu_reschedule(cpu); +} +static inline int rcu_grace_period_complete(struct rcu_data *rdata, int cpu) +{ + return ((rdata->qsmask & (1UL << cpu)) && + (rdata->quiescent_checkpoint[cpu] != RCU_quiescent(cpu))); +} +#endif + + +static int rcu_prepare_polling(struct rcu_data *rdata) +{ + int stop; + int i; + +#ifdef DEBUG + if (!list_empty(&rdata->curlist)) + BUG(); +#endif + + stop = 1; + if (!list_empty(&rdata->nxtlist)) { + list_splice(&rdata->nxtlist, &rdata->curlist); + INIT_LIST_HEAD(&rdata->nxtlist); + + rdata->polling_in_progress = 1; + + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + rcu_setup_grace_period(rdata, cpu); + } + stop = 0; + } + + return stop; +} + +/* + * Invoke the completed RCU callbacks. + */ +static void rcu_invoke_callbacks(struct rcu_data *rdata) +{ + struct list_head *entry; + struct rcu_head *head; + +#ifdef DEBUG + if (list_empty(&rdata->curlist)) + BUG(); +#endif + + entry = rdata->curlist.prev; + do { + head = list_entry(entry, struct rcu_head, list); + entry = entry->prev; + + head->func(head->arg); + } while (entry != &rdata->curlist); + + INIT_LIST_HEAD(&rdata->curlist); +} + +static int rcu_completion(struct rcu_data *rdata) +{ + int stop; + + rdata->polling_in_progress = 0; + rcu_invoke_callbacks(rdata); + + stop = rcu_prepare_polling(rdata); + + return stop; +} + +static int rcu_polling(struct rcu_data *rdata) +{ + int i; + int stop; + + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + + if (rcu_grace_period_complete(rdata, cpu)) + rdata->qsmask &= ~(1UL << cpu); + } + + stop = 0; + if (!rdata->qsmask) + stop = rcu_completion(rdata); + + return stop; +} + +/* + * Look into the per-cpu callback information to see if there is + * any processing necessary - if so do it. + */ +static void rcu_process_callbacks(unsigned long data) +{ + int stop; + struct rcu_data *rdata = (struct rcu_data *)data; + + spin_lock(&rdata->lock); + if (!rdata->polling_in_progress) + stop = rcu_prepare_polling(rdata); + else + stop = rcu_polling(rdata); + spin_unlock(&rdata->lock); + + if (!stop) + tasklet_hi_schedule(&rdata->tasklet); +} + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(void *completion) +{ + complete(completion); +} + +static void rcu_init_data(struct rcu_data *rdata) +{ + tasklet_init(&rdata->tasklet, rcu_process_callbacks, + (unsigned long)rdata); + INIT_LIST_HEAD(&rdata->nxtlist); + INIT_LIST_HEAD(&rdata->curlist); + spin_lock_init(&rdata->lock); +} + +/* + * Initializes rcu mechanism. Assumed to be called early. + * That is before local timer(SMP) or jiffie timer (uniproc) is setup. + */ +void __init rcu_init(void) +{ + rcu_init_data(&rcu_data); +#ifdef CONFIG_PREEMPT + rcu_init_data(&rcu_data_preempt); +#endif +} + +/* + * Wait until all the CPUs have gone through a "quiescent" state. + */ +void synchronize_kernel(void) +{ + struct rcu_head rcu; + DECLARE_COMPLETION(completion); + + /* Will wake me after RCU finished */ + call_rcu_preempt(&rcu, wakeme_after_rcu, &completion); + + /* Wait for it */ + wait_for_completion(&completion); +} + +EXPORT_SYMBOL(call_rcu); +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(call_rcu_preempt); +#endif +EXPORT_SYMBOL(synchronize_kernel); diff -urN linux-2.5.14-base/kernel/sched.c \ linux-2.5.14-rcu_poll_preempt/kernel/sched.c --- linux-2.5.14-base/kernel/sched.c Mon May 6 09:07:57 2002 +++ linux-2.5.14-rcu_poll_preempt/kernel/sched.c Tue May 7 17:23:51 2002 @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/completion.h> #include <linux/kernel_stat.h> +#include <linux/percpu.h> /* * Priority of a process goes from 0 to 139. The 0-99 @@ -156,12 +157,32 @@ static struct runqueue runqueues[NR_CPUS] __cacheline_aligned; +long cpu_quiescent __per_cpu_data; + #define cpu_rq(cpu) (runqueues + (cpu)) #define this_rq() cpu_rq(smp_processor_id()) #define task_rq(p) cpu_rq((p)->thread_info->cpu) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#ifdef CONFIG_PREEMPT +atomic_t rcu_preempt_cntr[2] __per_cpu_data; +atomic_t *curr_preempt_cntr __per_cpu_data; +atomic_t *next_preempt_cntr __per_cpu_data; +#endif + +#ifdef CONFIG_PREEMPT +static inline void rcu_init_preempt_cntr(int cpu) +{ + atomic_set(&per_cpu(rcu_preempt_cntr[0], cpu), 0); + atomic_set(&per_cpu(rcu_preempt_cntr[1], cpu), 0); + per_cpu(curr_preempt_cntr, cpu) = + &per_cpu(rcu_preempt_cntr[1], cpu); + per_cpu(next_preempt_cntr, cpu) = + &per_cpu(rcu_preempt_cntr[0], cpu); +} +#endif + static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) { struct runqueue *rq; @@ -773,8 +794,11 @@ * if entering from preempt_schedule, off a kernel preemption, * go straight to picking the next task. */ - if (unlikely(preempt_get_count() & PREEMPT_ACTIVE)) + if (unlikely(preempt_get_count() & PREEMPT_ACTIVE)) { goto pick_next_task; + } else { + rcu_preempt_put(); + } switch (prev->state) { case TASK_INTERRUPTIBLE: @@ -817,6 +841,7 @@ switch_tasks: prefetch(next); clear_tsk_need_resched(prev); + per_cpu(cpu_quiescent, prev->thread_info->cpu)++; if (likely(prev != next)) { rq->nr_switches++; @@ -857,6 +882,7 @@ return; ti->preempt_count = PREEMPT_ACTIVE; + rcu_preempt_get(); schedule(); ti->preempt_count = 0; barrier(); @@ -1031,6 +1057,21 @@ task_rq_unlock(rq, &flags); } +void force_cpu_reschedule(int cpu) +{ + unsigned long flags; + struct runqueue *rq, *newrq; + struct task_struct *p; + + rq = cpu_rq(cpu); + p = rq->curr; + newrq = task_rq_lock(p, &flags); + if (newrq == rq) + resched_task(p); + task_rq_unlock(newrq, &flags); +} + + #ifndef __alpha__ /* @@ -1592,6 +1633,7 @@ spin_lock_init(&rq->lock); spin_lock_init(&rq->frozen); INIT_LIST_HEAD(&rq->migration_queue); + rcu_init_preempt_cntr(i); for (j = 0; j < 2; j++) { array = rq->arrays + j;