#define root_task_group init_task_group
-/* task_group_lock serializes add/remove of task groups and also changes to
- * a task group's cpu shares.
- */
+/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
#ifdef CONFIG_FAIR_GROUP_SCHED
* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
* list is used during load balance.
*/
+ int on_list;
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
#ifdef CONFIG_FAIR_GROUP_SCHED
-static void update_cfs_load(struct cfs_rq *cfs_rq);
+static void update_cfs_load(struct cfs_rq *cfs_rq, int lb);
static void update_cfs_shares(struct cfs_rq *cfs_rq);
/*
raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
- update_cfs_load(cfs_rq);
+ update_cfs_load(cfs_rq, 1);
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
load_avg -= cfs_rq->load_contribution;
#ifdef CONFIG_FAIR_GROUP_SCHED
static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
- struct sched_entity *se, int cpu, int add,
+ struct sched_entity *se, int cpu,
struct sched_entity *parent)
{
struct rq *rq = cpu_rq(cpu);
tg->cfs_rq[cpu] = cfs_rq;
init_cfs_rq(cfs_rq, rq);
cfs_rq->tg = tg;
- if (add)
- list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
tg->se[cpu] = se;
/* se could be NULL for init_task_group */
#ifdef CONFIG_RT_GROUP_SCHED
static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
- struct sched_rt_entity *rt_se, int cpu, int add,
+ struct sched_rt_entity *rt_se, int cpu,
struct sched_rt_entity *parent)
{
struct rq *rq = cpu_rq(cpu);
init_rt_rq(rt_rq, rq);
rt_rq->tg = tg;
rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
- if (add)
- list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
tg->rt_se[cpu] = rt_se;
if (!rt_se)
* We achieve this by letting init_task_group's tasks sit
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
*/
- init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
+ init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
#endif
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
#ifdef CONFIG_CGROUP_SCHED
- init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
+ init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
#endif
#endif
if (!se)
goto err_free_rq;
- init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
+ init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
}
return 1;
return 0;
}
-static inline void register_fair_sched_group(struct task_group *tg, int cpu)
-{
- list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
- &cpu_rq(cpu)->leaf_cfs_rq_list);
-}
-
static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
{
- list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+ int i;
+
+ /*
+ * Only empty task groups can be destroyed; so we can speculatively
+ * check on_list without danger of it being re-added.
+ */
+ if (!tg->cfs_rq[cpu]->on_list)
+ return;
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ list_del_leaf_cfs_rq(tg->cfs_rq[i]);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
#else /* !CONFG_FAIR_GROUP_SCHED */
static inline void free_fair_sched_group(struct task_group *tg)
return 1;
}
-static inline void register_fair_sched_group(struct task_group *tg, int cpu)
-{
-}
-
static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
{
}
if (!rt_se)
goto err_free_rq;
- init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
+ init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
}
return 1;
err:
return 0;
}
-
-static inline void register_rt_sched_group(struct task_group *tg, int cpu)
-{
- list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
- &cpu_rq(cpu)->leaf_rt_rq_list);
-}
-
-static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
-{
- list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
-}
#else /* !CONFIG_RT_GROUP_SCHED */
static inline void free_rt_sched_group(struct task_group *tg)
{
{
return 1;
}
-
-static inline void register_rt_sched_group(struct task_group *tg, int cpu)
-{
-}
-
-static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
-{
-}
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CGROUP_SCHED
{
struct task_group *tg;
unsigned long flags;
- int i;
tg = kzalloc(sizeof(*tg), GFP_KERNEL);
if (!tg)
goto err;
spin_lock_irqsave(&task_group_lock, flags);
- for_each_possible_cpu(i) {
- register_fair_sched_group(tg, i);
- register_rt_sched_group(tg, i);
- }
list_add_rcu(&tg->list, &task_groups);
WARN_ON(!parent); /* root should already exist */
unsigned long flags;
int i;
- spin_lock_irqsave(&task_group_lock, flags);
- for_each_possible_cpu(i) {
+ /* end participation in shares distribution */
+ for_each_possible_cpu(i)
unregister_fair_sched_group(tg, i);
- unregister_rt_sched_group(tg, i);
- }
+
+ spin_lock_irqsave(&task_group_lock, flags);
list_del_rcu(&tg->list);
list_del_rcu(&tg->siblings);
spin_unlock_irqrestore(&task_group_lock, flags);
int sched_group_set_shares(struct task_group *tg, unsigned long shares)
{
int i;
- unsigned long flags;
/*
* We can't change the weight of the root cgroup.
if (tg->shares == shares)
goto done;
- spin_lock_irqsave(&task_group_lock, flags);
- for_each_possible_cpu(i)
- unregister_fair_sched_group(tg, i);
- list_del_rcu(&tg->siblings);
- spin_unlock_irqrestore(&task_group_lock, flags);
-
- /* wait for any ongoing reference to this group to finish */
- synchronize_sched();
-
- /*
- * Now we are free to modify the group's share on each cpu
- * w/o tripping rebalance_share or load_balance_fair.
- */
tg->shares = shares;
for_each_possible_cpu(i) {
/*
set_se_shares(tg->se[i], shares);
}
- /*
- * Enable load balance activity on this group, by inserting it back on
- * each cpu's rq->leaf_cfs_rq_list.
- */
- spin_lock_irqsave(&task_group_lock, flags);
- for_each_possible_cpu(i)
- register_fair_sched_group(tg, i);
- list_add_rcu(&tg->siblings, &tg->parent->children);
- spin_unlock_irqrestore(&task_group_lock, flags);
done:
mutex_unlock(&shares_mutex);
return 0;
return cfs_rq->tg->cfs_rq[this_cpu];
}
+static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+ if (!cfs_rq->on_list) {
+ list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
+ &rq_of(cfs_rq)->leaf_cfs_rq_list);
+
+ cfs_rq->on_list = 1;
+ }
+}
+
+static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+ if (cfs_rq->on_list) {
+ list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+ cfs_rq->on_list = 0;
+ }
+}
+
/* Iterate thr' all leaf cfs_rq's on a runqueue */
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
return &cpu_rq(this_cpu)->cfs;
}
+static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+}
+
+static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+}
+
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
}
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
-static void update_cfs_load(struct cfs_rq *cfs_rq)
+static void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
{
u64 period = sched_avg_period();
u64 now, delta;
cfs_rq->load_period /= 2;
cfs_rq->load_avg /= 2;
}
+
+ if (lb && !cfs_rq->nr_running) {
+ if (cfs_rq->load_avg < (period / 8))
+ list_del_leaf_cfs_rq(cfs_rq);
+ }
}
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
reweight_entity(cfs_rq_of(se), se, shares);
}
#else /* CONFIG_FAIR_GROUP_SCHED */
-static inline void update_cfs_load(struct cfs_rq *cfs_rq)
+static inline void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
{
}
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
- update_cfs_load(cfs_rq);
+ update_cfs_load(cfs_rq, 0);
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
+
+ if (cfs_rq->nr_running == 1)
+ list_add_leaf_cfs_rq(cfs_rq);
}
static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
- update_cfs_load(cfs_rq);
+ update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
update_cfs_shares(cfs_rq);
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- update_cfs_load(cfs_rq);
+ update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq);
}
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- update_cfs_load(cfs_rq);
+ update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq);
}