patch-2.4.0-test3 linux/kernel/sched.c
Next file: linux/kernel/sys.c
Previous file: linux/kernel/printk.c
Back to the patch index
Back to the overall index
- Lines: 464
- Date:
Mon Jul 10 13:18:57 2000
- Orig file:
v2.4.0-test2/linux/kernel/sched.c
- Orig date:
Fri Jun 23 21:55:23 2000
diff -u --recursive --new-file v2.4.0-test2/linux/kernel/sched.c linux/kernel/sched.c
@@ -43,6 +43,31 @@
extern void mem_use(void);
/*
+ * Scheduling quanta.
+ *
+ * NOTE! The unix "nice" value influences how long a process
+ * gets. The nice value ranges from -20 to +19, where a -20
+ * is a "high-priority" task, and a "+10" is a low-priority
+ * task.
+ *
+ * We want the time-slice to be around 50ms or so, so this
+ * calculation depends on the value of HZ.
+ */
+#if HZ < 200
+#define LOG2_HZ 7
+#elif HZ < 400
+#define LOG2_HZ 8
+#elif HZ < 800
+#define LOG2_HZ 9
+#elif HZ < 1600
+#define LOG2_HZ 10
+#else
+#define LOG2_HZ 11
+#endif
+
+#define NICE_TO_TICKS(nice) ((((20)-(nice)) >> (LOG2_HZ-5))+1)
+
+/*
* Init task must be ok at boot for the ix86 as we will check its signals
* via the SMP irq return path.
*/
@@ -60,8 +85,8 @@
* The run-queue lock locks the parts that actually access
* and change the run-queues, and have to be interrupt-safe.
*/
-__cacheline_aligned spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED; /* second */
-__cacheline_aligned rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; /* third */
+spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* second */
+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* third */
static LIST_HEAD(runqueue_head);
@@ -78,18 +103,20 @@
} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
+#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
struct kernel_stat kstat = { 0 };
#ifdef CONFIG_SMP
#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
-#define can_schedule(p) (!(p)->has_cpu)
+#define can_schedule(p,cpu) ((!(p)->has_cpu) && \
+ ((p)->cpus_allowed & (1 << cpu)))
#else
#define idle_task(cpu) (&init_task)
-#define can_schedule(p) (1)
+#define can_schedule(p,cpu) (1)
#endif
@@ -144,7 +171,7 @@
/* .. and a slight advantage to the current MM */
if (p->mm == this_mm || !p->mm)
weight += 1;
- weight += p->priority;
+ weight += 20 - p->nice;
out:
return weight;
@@ -183,87 +210,108 @@
* up unlocking it early, so the caller must not unlock the
* runqueue, it's always done by reschedule_idle().
*/
-static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
+static void reschedule_idle(struct task_struct * p, unsigned long flags)
{
#ifdef CONFIG_SMP
- int this_cpu = smp_processor_id(), target_cpu;
- struct task_struct *tsk;
- int cpu, best_cpu, i;
+ int this_cpu = smp_processor_id();
+ struct task_struct *tsk, *target_tsk;
+ int cpu, best_cpu, i, max_prio;
+ cycles_t oldest_idle;
/*
* shortcut if the woken up task's last CPU is
* idle now.
*/
best_cpu = p->processor;
- tsk = idle_task(best_cpu);
- if (cpu_curr(best_cpu) == tsk)
- goto send_now;
+ if (can_schedule(p, best_cpu)) {
+ tsk = idle_task(best_cpu);
+ if (cpu_curr(best_cpu) == tsk)
+ goto send_now_idle;
+
+ /*
+ * Maybe this process has enough priority to preempt
+ * its preferred CPU. (this is a shortcut):
+ */
+ tsk = cpu_curr(best_cpu);
+ if (preemption_goodness(tsk, p, best_cpu) > 1)
+ goto preempt_now;
+ }
/*
* We know that the preferred CPU has a cache-affine current
* process, lets try to find a new idle CPU for the woken-up
- * process:
- */
- for (i = smp_num_cpus - 1; i >= 0; i--) {
+ * process. Select the least recently active idle CPU. (that
+ * one will have the least active cache context.) Also find
+ * the executing process which has the least priority.
+ */
+ oldest_idle = -1ULL;
+ target_tsk = NULL;
+ max_prio = 1;
+
+ for (i = 0; i < smp_num_cpus; i++) {
cpu = cpu_logical_map(i);
- if (cpu == best_cpu)
+ if (!can_schedule(p, cpu))
continue;
tsk = cpu_curr(cpu);
/*
- * We use the last available idle CPU. This creates
+ * We use the first available idle CPU. This creates
* a priority list between idle CPUs, but this is not
* a problem.
*/
- if (tsk == idle_task(cpu))
- goto send_now;
- }
-
- /*
- * No CPU is idle, but maybe this process has enough priority
- * to preempt it's preferred CPU.
- */
- tsk = cpu_curr(best_cpu);
- if (preemption_goodness(tsk, p, best_cpu) > 0)
- goto send_now;
+ if (tsk == idle_task(cpu)) {
+ if (last_schedule(cpu) < oldest_idle) {
+ oldest_idle = last_schedule(cpu);
+ target_tsk = tsk;
+ }
+ } else {
+ if (oldest_idle == -1ULL) {
+ int prio = preemption_goodness(tsk, p, cpu);
- /*
- * We will get here often - or in the high CPU contention
- * case. No CPU is idle and this process is either lowprio or
- * the preferred CPU is highprio. Try to preempt some other CPU
- * only if it's RT or if it's iteractive and the preferred
- * cpu won't reschedule shortly.
- */
- if (p->avg_slice < cacheflush_time || (p->policy & ~SCHED_YIELD) != SCHED_OTHER) {
- for (i = smp_num_cpus - 1; i >= 0; i--) {
- cpu = cpu_logical_map(i);
- if (cpu == best_cpu)
- continue;
- tsk = cpu_curr(cpu);
- if (preemption_goodness(tsk, p, cpu) > 0)
- goto send_now;
+ if (prio > max_prio) {
+ max_prio = prio;
+ target_tsk = tsk;
+ }
+ }
}
}
+ tsk = target_tsk;
+ if (tsk) {
+ if (oldest_idle != -1ULL)
+ goto send_now_idle;
+ goto preempt_now;
+ }
spin_unlock_irqrestore(&runqueue_lock, flags);
return;
-send_now:
- target_cpu = tsk->processor;
+send_now_idle:
+ /*
+ * If need_resched == -1 then we can skip sending the IPI
+ * altogether, tsk->need_resched is actively watched by the
+ * idle thread.
+ */
+ if (!tsk->need_resched)
+ smp_send_reschedule(tsk->processor);
+ tsk->need_resched = 1;
+ spin_unlock_irqrestore(&runqueue_lock, flags);
+ return;
+
+preempt_now:
tsk->need_resched = 1;
spin_unlock_irqrestore(&runqueue_lock, flags);
/*
* the APIC stuff can go outside of the lock because
* it uses no task information, only CPU#.
*/
- if (target_cpu != this_cpu)
- smp_send_reschedule(target_cpu);
+ if (tsk->processor != this_cpu)
+ smp_send_reschedule(tsk->processor);
return;
#else /* UP */
int this_cpu = smp_processor_id();
struct task_struct *tsk;
tsk = cpu_curr(this_cpu);
- if (preemption_goodness(tsk, p, this_cpu) > 0)
+ if (preemption_goodness(tsk, p, this_cpu) > 1)
tsk->need_resched = 1;
spin_unlock_irqrestore(&runqueue_lock, flags);
#endif
@@ -413,10 +461,12 @@
unsigned long flags;
spin_lock_irqsave(&runqueue_lock, flags);
+ prev->has_cpu = 0;
reschedule_idle(prev, flags); // spin_unlocks runqueue
+ } else {
+ wmb();
+ prev->has_cpu = 0;
}
- wmb();
- prev->has_cpu = 0;
#endif /* CONFIG_SMP */
}
@@ -501,7 +551,7 @@
still_running_back:
list_for_each(tmp, &runqueue_head) {
p = list_entry(tmp, struct task_struct, run_list);
- if (can_schedule(p)) {
+ if (can_schedule(p, this_cpu)) {
int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c)
c = weight, next = p;
@@ -540,13 +590,6 @@
t = get_cycles();
this_slice = t - sched_data->last_schedule;
sched_data->last_schedule = t;
-
- /*
- * Exponentially fading average calculation, with
- * some weight so it doesnt get fooled easily by
- * smaller irregularities.
- */
- prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2;
}
/*
@@ -604,7 +647,7 @@
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
for_each_task(p)
- p->counter = (p->counter >> 1) + p->priority;
+ p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
}
@@ -630,7 +673,7 @@
move_rr_last:
if (!prev->counter) {
- prev->counter = prev->priority;
+ prev->counter = NICE_TO_TICKS(prev->nice);
move_last_runqueue(prev);
}
goto move_rr_back;
@@ -641,15 +684,20 @@
return;
}
-static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, const int sync)
+static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
+ const int sync)
{
struct list_head *tmp, *head;
- struct task_struct *p;
+ struct task_struct *p, *best_exclusive;
unsigned long flags;
+ int best_cpu, irq;
if (!q)
goto out;
+ best_cpu = smp_processor_id();
+ irq = in_interrupt();
+ best_exclusive = NULL;
wq_write_lock_irqsave(&q->lock, flags);
#if WAITQUEUE_DEBUG
@@ -661,10 +709,13 @@
if (!head->next || !head->prev)
WQ_BUG();
#endif
- list_for_each(tmp, head) {
+ tmp = head->next;
+ while (tmp != head) {
unsigned int state;
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+ tmp = tmp->next;
+
#if WAITQUEUE_DEBUG
CHECK_MAGIC(curr->__magic);
#endif
@@ -674,15 +725,37 @@
#if WAITQUEUE_DEBUG
curr->__waker = (long)__builtin_return_address(0);
#endif
- if (sync)
- wake_up_process_synchronous(p);
- else
- wake_up_process(p);
- if (state & mode & TASK_EXCLUSIVE)
- break;
+ /*
+ * If waking up from an interrupt context then
+ * prefer processes which are affine to this
+ * CPU.
+ */
+ if (irq && (state & mode & TASK_EXCLUSIVE)) {
+ if (!best_exclusive)
+ best_exclusive = p;
+ else if ((p->processor == best_cpu) &&
+ (best_exclusive->processor != best_cpu))
+ best_exclusive = p;
+ } else {
+ if (sync)
+ wake_up_process_synchronous(p);
+ else
+ wake_up_process(p);
+ if (state & mode & TASK_EXCLUSIVE)
+ break;
+ }
}
}
+ if (best_exclusive)
+ best_exclusive->state = TASK_RUNNING;
wq_write_unlock_irqrestore(&q->lock, flags);
+
+ if (best_exclusive) {
+ if (sync)
+ wake_up_process_synchronous(best_exclusive);
+ else
+ wake_up_process(best_exclusive);
+ }
out:
return;
}
@@ -697,6 +770,7 @@
__wake_up_common(q, mode, 1);
}
+
#define SLEEP_ON_VAR \
unsigned long flags; \
wait_queue_t wait; \
@@ -772,50 +846,28 @@
asmlinkage long sys_nice(int increment)
{
- unsigned long newprio;
- int increase = 0;
+ long newprio;
/*
* Setpriority might change our priority at the same moment.
* We don't have to worry. Conceptually one call occurs first
* and we have a single winner.
*/
-
- newprio = increment;
if (increment < 0) {
if (!capable(CAP_SYS_NICE))
return -EPERM;
- newprio = -increment;
- increase = 1;
+ if (increment < -40)
+ increment = -40;
}
+ if (increment > 40)
+ increment = 40;
- if (newprio > 40)
- newprio = 40;
- /*
- * do a "normalization" of the priority (traditionally
- * Unix nice values are -20 to 20; Linux doesn't really
- * use that kind of thing, but uses the length of the
- * timeslice instead (default 200 ms). The rounding is
- * why we want to avoid negative values.
- */
- newprio = (newprio * DEF_PRIORITY + 10) / 20;
- increment = newprio;
- if (increase)
- increment = -increment;
- /*
- * Current->priority can change between this point
- * and the assignment. We are assigning not doing add/subs
- * so thats ok. Conceptually a process might just instantaneously
- * read the value we stomp over. I don't think that is an issue
- * unless posix makes it one. If so we can loop on changes
- * to current->priority.
- */
- newprio = current->priority - increment;
- if ((signed) newprio < 1)
- newprio = 1;
- if (newprio > DEF_PRIORITY*2)
- newprio = DEF_PRIORITY*2;
- current->priority = newprio;
+ newprio = current->nice + increment;
+ if (newprio < -20)
+ newprio = -20;
+ if (newprio > 19)
+ newprio = 19;
+ current->nice = newprio;
return 0;
}
@@ -1007,12 +1059,23 @@
asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
{
struct timespec t;
+ struct task_struct *p;
+ int retval = -EINVAL;
- t.tv_sec = 0;
- t.tv_nsec = 150000;
- if (copy_to_user(interval, &t, sizeof(struct timespec)))
- return -EFAULT;
- return 0;
+ if (pid < 0)
+ goto out_nounlock;
+
+ retval = -ESRCH;
+ read_lock(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ if (p)
+ jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
+ &t);
+ read_unlock(&tasklist_lock);
+ if (p)
+ retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+out_nounlock:
+ return retval;
}
static void show_task(struct task_struct * p)
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)