From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Jan 2009 19:44:09 +0000 (-0800)
Subject: Merge branch 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel... 
X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=b840d79631c882786925303c2b0f4fefc31845ed;p=openwrt%2Fstaging%2Fblogic.git

Merge branch 'cpus4096-for-linus-2' of git://git./linux/kernel/git/tip/linux-2.6-tip

* 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (66 commits)
  x86: export vector_used_by_percpu_irq
  x86: use logical apicid in x2apic_cluster's x2apic_cpu_mask_to_apicid_and()
  sched: nominate preferred wakeup cpu, fix
  x86: fix lguest used_vectors breakage, -v2
  x86: fix warning in arch/x86/kernel/io_apic.c
  sched: fix warning in kernel/sched.c
  sched: move test_sd_parent() to an SMP section of sched.h
  sched: add SD_BALANCE_NEWIDLE at MC and CPU level for sched_mc>0
  sched: activate active load balancing in new idle cpus
  sched: bias task wakeups to preferred semi-idle packages
  sched: nominate preferred wakeup cpu
  sched: favour lower logical cpu number for sched_mc balance
  sched: framework for sched_mc/smt_power_savings=N
  sched: convert BALANCE_FOR_xx_POWER to inline functions
  x86: use possible_cpus=NUM to extend the possible cpus allowed
  x86: fix cpu_mask_to_apicid_and to include cpu_online_mask
  x86: update io_apic.c to the new cpumask code
  x86: Introduce topology_core_cpumask()/topology_thread_cpumask()
  x86: xen: use smp_call_function_many()
  x86: use work_on_cpu in x86/kernel/cpu/mcheck/mce_amd_64.c
  ...

Fixed up trivial conflict in kernel/time/tick-sched.c manually
---

b840d79631c882786925303c2b0f4fefc31845ed
diff --cc arch/powerpc/kernel/smp.c
index 8ac3f721d235,d1165566f064..65484b2200b3
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@@ -57,10 -57,9 +57,8 @@@
  #define DBG(fmt...)
  #endif
  
 -int smp_hw_index[NR_CPUS];
  struct thread_info *secondary_ti;
  
- cpumask_t cpu_possible_map = CPU_MASK_NONE;
- cpumask_t cpu_online_map = CPU_MASK_NONE;
  DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
  DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
  
diff --cc arch/sparc/kernel/irq_64.c
index a3ea2bcb95de,000000000000..cab8e0286871
mode 100644,000000..100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@@ -1,1101 -1,0 +1,1104 @@@
 +/* irq.c: UltraSparc IRQ handling/init/registry.
 + *
 + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
 + * Copyright (C) 1998  Eddie C. Dost    (ecd@skynet.be)
 + * Copyright (C) 1998  Jakub Jelinek    (jj@ultra.linux.cz)
 + */
 +
 +#include <linux/module.h>
 +#include <linux/sched.h>
 +#include <linux/linkage.h>
 +#include <linux/ptrace.h>
 +#include <linux/errno.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/signal.h>
 +#include <linux/mm.h>
 +#include <linux/interrupt.h>
 +#include <linux/slab.h>
 +#include <linux/random.h>
 +#include <linux/init.h>
 +#include <linux/delay.h>
 +#include <linux/proc_fs.h>
 +#include <linux/seq_file.h>
 +#include <linux/bootmem.h>
 +#include <linux/irq.h>
 +
 +#include <asm/ptrace.h>
 +#include <asm/processor.h>
 +#include <asm/atomic.h>
 +#include <asm/system.h>
 +#include <asm/irq.h>
 +#include <asm/io.h>
 +#include <asm/iommu.h>
 +#include <asm/upa.h>
 +#include <asm/oplib.h>
 +#include <asm/prom.h>
 +#include <asm/timer.h>
 +#include <asm/smp.h>
 +#include <asm/starfire.h>
 +#include <asm/uaccess.h>
 +#include <asm/cache.h>
 +#include <asm/cpudata.h>
 +#include <asm/auxio.h>
 +#include <asm/head.h>
 +#include <asm/hypervisor.h>
 +#include <asm/cacheflush.h>
 +
 +#include "entry.h"
 +
 +#define NUM_IVECS	(IMAP_INR + 1)
 +
 +struct ino_bucket *ivector_table;
 +unsigned long ivector_table_pa;
 +
 +/* On several sun4u processors, it is illegal to mix bypass and
 + * non-bypass accesses.  Therefore we access all INO buckets
 + * using bypass accesses only.
 + */
 +static unsigned long bucket_get_chain_pa(unsigned long bucket_pa)
 +{
 +	unsigned long ret;
 +
 +	__asm__ __volatile__("ldxa	[%1] %2, %0"
 +			     : "=&r" (ret)
 +			     : "r" (bucket_pa +
 +				    offsetof(struct ino_bucket,
 +					     __irq_chain_pa)),
 +			       "i" (ASI_PHYS_USE_EC));
 +
 +	return ret;
 +}
 +
 +static void bucket_clear_chain_pa(unsigned long bucket_pa)
 +{
 +	__asm__ __volatile__("stxa	%%g0, [%0] %1"
 +			     : /* no outputs */
 +			     : "r" (bucket_pa +
 +				    offsetof(struct ino_bucket,
 +					     __irq_chain_pa)),
 +			       "i" (ASI_PHYS_USE_EC));
 +}
 +
 +static unsigned int bucket_get_virt_irq(unsigned long bucket_pa)
 +{
 +	unsigned int ret;
 +
 +	__asm__ __volatile__("lduwa	[%1] %2, %0"
 +			     : "=&r" (ret)
 +			     : "r" (bucket_pa +
 +				    offsetof(struct ino_bucket,
 +					     __virt_irq)),
 +			       "i" (ASI_PHYS_USE_EC));
 +
 +	return ret;
 +}
 +
 +static void bucket_set_virt_irq(unsigned long bucket_pa,
 +				unsigned int virt_irq)
 +{
 +	__asm__ __volatile__("stwa	%0, [%1] %2"
 +			     : /* no outputs */
 +			     : "r" (virt_irq),
 +			       "r" (bucket_pa +
 +				    offsetof(struct ino_bucket,
 +					     __virt_irq)),
 +			       "i" (ASI_PHYS_USE_EC));
 +}
 +
 +#define irq_work_pa(__cpu)	&(trap_block[(__cpu)].irq_worklist_pa)
 +
 +static struct {
 +	unsigned int dev_handle;
 +	unsigned int dev_ino;
 +	unsigned int in_use;
 +} virt_irq_table[NR_IRQS];
 +static DEFINE_SPINLOCK(virt_irq_alloc_lock);
 +
 +unsigned char virt_irq_alloc(unsigned int dev_handle,
 +			     unsigned int dev_ino)
 +{
 +	unsigned long flags;
 +	unsigned char ent;
 +
 +	BUILD_BUG_ON(NR_IRQS >= 256);
 +
 +	spin_lock_irqsave(&virt_irq_alloc_lock, flags);
 +
 +	for (ent = 1; ent < NR_IRQS; ent++) {
 +		if (!virt_irq_table[ent].in_use)
 +			break;
 +	}
 +	if (ent >= NR_IRQS) {
 +		printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
 +		ent = 0;
 +	} else {
 +		virt_irq_table[ent].dev_handle = dev_handle;
 +		virt_irq_table[ent].dev_ino = dev_ino;
 +		virt_irq_table[ent].in_use = 1;
 +	}
 +
 +	spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 +
 +	return ent;
 +}
 +
 +#ifdef CONFIG_PCI_MSI
 +void virt_irq_free(unsigned int virt_irq)
 +{
 +	unsigned long flags;
 +
 +	if (virt_irq >= NR_IRQS)
 +		return;
 +
 +	spin_lock_irqsave(&virt_irq_alloc_lock, flags);
 +
 +	virt_irq_table[virt_irq].in_use = 0;
 +
 +	spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 +}
 +#endif
 +
 +/*
 + * /proc/interrupts printing:
 + */
 +
 +int show_interrupts(struct seq_file *p, void *v)
 +{
 +	int i = *(loff_t *) v, j;
 +	struct irqaction * action;
 +	unsigned long flags;
 +
 +	if (i == 0) {
 +		seq_printf(p, "           ");
 +		for_each_online_cpu(j)
 +			seq_printf(p, "CPU%d       ",j);
 +		seq_putc(p, '\n');
 +	}
 +
 +	if (i < NR_IRQS) {
 +		spin_lock_irqsave(&irq_desc[i].lock, flags);
 +		action = irq_desc[i].action;
 +		if (!action)
 +			goto skip;
 +		seq_printf(p, "%3d: ",i);
 +#ifndef CONFIG_SMP
 +		seq_printf(p, "%10u ", kstat_irqs(i));
 +#else
 +		for_each_online_cpu(j)
 +			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 +#endif
 +		seq_printf(p, " %9s", irq_desc[i].chip->typename);
 +		seq_printf(p, "  %s", action->name);
 +
 +		for (action=action->next; action; action = action->next)
 +			seq_printf(p, ", %s", action->name);
 +
 +		seq_putc(p, '\n');
 +skip:
 +		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 +	}
 +	return 0;
 +}
 +
 +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 +{
 +	unsigned int tid;
 +
 +	if (this_is_starfire) {
 +		tid = starfire_translate(imap, cpuid);
 +		tid <<= IMAP_TID_SHIFT;
 +		tid &= IMAP_TID_UPA;
 +	} else {
 +		if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 +			unsigned long ver;
 +
 +			__asm__ ("rdpr %%ver, %0" : "=r" (ver));
 +			if ((ver >> 32UL) == __JALAPENO_ID ||
 +			    (ver >> 32UL) == __SERRANO_ID) {
 +				tid = cpuid << IMAP_TID_SHIFT;
 +				tid &= IMAP_TID_JBUS;
 +			} else {
 +				unsigned int a = cpuid & 0x1f;
 +				unsigned int n = (cpuid >> 5) & 0x1f;
 +
 +				tid = ((a << IMAP_AID_SHIFT) |
 +				       (n << IMAP_NID_SHIFT));
 +				tid &= (IMAP_AID_SAFARI |
 +					IMAP_NID_SAFARI);;
 +			}
 +		} else {
 +			tid = cpuid << IMAP_TID_SHIFT;
 +			tid &= IMAP_TID_UPA;
 +		}
 +	}
 +
 +	return tid;
 +}
 +
 +struct irq_handler_data {
 +	unsigned long	iclr;
 +	unsigned long	imap;
 +
 +	void		(*pre_handler)(unsigned int, void *, void *);
 +	void		*arg1;
 +	void		*arg2;
 +};
 +
 +#ifdef CONFIG_SMP
 +static int irq_choose_cpu(unsigned int virt_irq)
 +{
 +	cpumask_t mask = irq_desc[virt_irq].affinity;
 +	int cpuid;
 +
 +	if (cpus_equal(mask, CPU_MASK_ALL)) {
 +		static int irq_rover;
 +		static DEFINE_SPINLOCK(irq_rover_lock);
 +		unsigned long flags;
 +
 +		/* Round-robin distribution... */
 +	do_round_robin:
 +		spin_lock_irqsave(&irq_rover_lock, flags);
 +
 +		while (!cpu_online(irq_rover)) {
 +			if (++irq_rover >= NR_CPUS)
 +				irq_rover = 0;
 +		}
 +		cpuid = irq_rover;
 +		do {
 +			if (++irq_rover >= NR_CPUS)
 +				irq_rover = 0;
 +		} while (!cpu_online(irq_rover));
 +
 +		spin_unlock_irqrestore(&irq_rover_lock, flags);
 +	} else {
 +		cpumask_t tmp;
 +
 +		cpus_and(tmp, cpu_online_map, mask);
 +
 +		if (cpus_empty(tmp))
 +			goto do_round_robin;
 +
 +		cpuid = first_cpu(tmp);
 +	}
 +
 +	return cpuid;
 +}
 +#else
 +static int irq_choose_cpu(unsigned int virt_irq)
 +{
 +	return real_hard_smp_processor_id();
 +}
 +#endif
 +
 +static void sun4u_irq_enable(unsigned int virt_irq)
 +{
 +	struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +
 +	if (likely(data)) {
 +		unsigned long cpuid, imap, val;
 +		unsigned int tid;
 +
 +		cpuid = irq_choose_cpu(virt_irq);
 +		imap = data->imap;
 +
 +		tid = sun4u_compute_tid(imap, cpuid);
 +
 +		val = upa_readq(imap);
 +		val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
 +			 IMAP_AID_SAFARI | IMAP_NID_SAFARI);
 +		val |= tid | IMAP_VALID;
 +		upa_writeq(val, imap);
 +		upa_writeq(ICLR_IDLE, data->iclr);
 +	}
 +}
 +
- static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
++static void sun4u_set_affinity(unsigned int virt_irq,
++			       const struct cpumask *mask)
 +{
 +	sun4u_irq_enable(virt_irq);
 +}
 +
 +static void sun4u_irq_disable(unsigned int virt_irq)
 +{
 +	struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +
 +	if (likely(data)) {
 +		unsigned long imap = data->imap;
 +		unsigned long tmp = upa_readq(imap);
 +
 +		tmp &= ~IMAP_VALID;
 +		upa_writeq(tmp, imap);
 +	}
 +}
 +
 +static void sun4u_irq_eoi(unsigned int virt_irq)
 +{
 +	struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +	struct irq_desc *desc = irq_desc + virt_irq;
 +
 +	if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
 +		return;
 +
 +	if (likely(data))
 +		upa_writeq(ICLR_IDLE, data->iclr);
 +}
 +
 +static void sun4v_irq_enable(unsigned int virt_irq)
 +{
 +	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +	unsigned long cpuid = irq_choose_cpu(virt_irq);
 +	int err;
 +
 +	err = sun4v_intr_settarget(ino, cpuid);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 +		       "err(%d)\n", ino, cpuid, err);
 +	err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_intr_setstate(%x): "
 +		       "err(%d)\n", ino, err);
 +	err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
 +		       ino, err);
 +}
 +
- static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
++static void sun4v_set_affinity(unsigned int virt_irq,
++			       const struct cpumask *mask)
 +{
 +	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +	unsigned long cpuid = irq_choose_cpu(virt_irq);
 +	int err;
 +
 +	err = sun4v_intr_settarget(ino, cpuid);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 +		       "err(%d)\n", ino, cpuid, err);
 +}
 +
 +static void sun4v_irq_disable(unsigned int virt_irq)
 +{
 +	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +	int err;
 +
 +	err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_intr_setenabled(%x): "
 +		       "err(%d)\n", ino, err);
 +}
 +
 +static void sun4v_irq_eoi(unsigned int virt_irq)
 +{
 +	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +	struct irq_desc *desc = irq_desc + virt_irq;
 +	int err;
 +
 +	if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
 +		return;
 +
 +	err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_intr_setstate(%x): "
 +		       "err(%d)\n", ino, err);
 +}
 +
 +static void sun4v_virq_enable(unsigned int virt_irq)
 +{
 +	unsigned long cpuid, dev_handle, dev_ino;
 +	int err;
 +
 +	cpuid = irq_choose_cpu(virt_irq);
 +
 +	dev_handle = virt_irq_table[virt_irq].dev_handle;
 +	dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +	err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 +		       "err(%d)\n",
 +		       dev_handle, dev_ino, cpuid, err);
 +	err = sun4v_vintr_set_state(dev_handle, dev_ino,
 +				    HV_INTR_STATE_IDLE);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +		       "HV_INTR_STATE_IDLE): err(%d)\n",
 +		       dev_handle, dev_ino, err);
 +	err = sun4v_vintr_set_valid(dev_handle, dev_ino,
 +				    HV_INTR_ENABLED);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +		       "HV_INTR_ENABLED): err(%d)\n",
 +		       dev_handle, dev_ino, err);
 +}
 +
- static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
++static void sun4v_virt_set_affinity(unsigned int virt_irq,
++				    const struct cpumask *mask)
 +{
 +	unsigned long cpuid, dev_handle, dev_ino;
 +	int err;
 +
 +	cpuid = irq_choose_cpu(virt_irq);
 +
 +	dev_handle = virt_irq_table[virt_irq].dev_handle;
 +	dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +	err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 +		       "err(%d)\n",
 +		       dev_handle, dev_ino, cpuid, err);
 +}
 +
 +static void sun4v_virq_disable(unsigned int virt_irq)
 +{
 +	unsigned long dev_handle, dev_ino;
 +	int err;
 +
 +	dev_handle = virt_irq_table[virt_irq].dev_handle;
 +	dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +	err = sun4v_vintr_set_valid(dev_handle, dev_ino,
 +				    HV_INTR_DISABLED);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +		       "HV_INTR_DISABLED): err(%d)\n",
 +		       dev_handle, dev_ino, err);
 +}
 +
 +static void sun4v_virq_eoi(unsigned int virt_irq)
 +{
 +	struct irq_desc *desc = irq_desc + virt_irq;
 +	unsigned long dev_handle, dev_ino;
 +	int err;
 +
 +	if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
 +		return;
 +
 +	dev_handle = virt_irq_table[virt_irq].dev_handle;
 +	dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +	err = sun4v_vintr_set_state(dev_handle, dev_ino,
 +				    HV_INTR_STATE_IDLE);
 +	if (err != HV_EOK)
 +		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +		       "HV_INTR_STATE_IDLE): err(%d)\n",
 +		       dev_handle, dev_ino, err);
 +}
 +
 +static struct irq_chip sun4u_irq = {
 +	.typename	= "sun4u",
 +	.enable		= sun4u_irq_enable,
 +	.disable	= sun4u_irq_disable,
 +	.eoi		= sun4u_irq_eoi,
 +	.set_affinity	= sun4u_set_affinity,
 +};
 +
 +static struct irq_chip sun4v_irq = {
 +	.typename	= "sun4v",
 +	.enable		= sun4v_irq_enable,
 +	.disable	= sun4v_irq_disable,
 +	.eoi		= sun4v_irq_eoi,
 +	.set_affinity	= sun4v_set_affinity,
 +};
 +
 +static struct irq_chip sun4v_virq = {
 +	.typename	= "vsun4v",
 +	.enable		= sun4v_virq_enable,
 +	.disable	= sun4v_virq_disable,
 +	.eoi		= sun4v_virq_eoi,
 +	.set_affinity	= sun4v_virt_set_affinity,
 +};
 +
 +static void pre_flow_handler(unsigned int virt_irq,
 +				      struct irq_desc *desc)
 +{
 +	struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +
 +	data->pre_handler(ino, data->arg1, data->arg2);
 +
 +	handle_fasteoi_irq(virt_irq, desc);
 +}
 +
 +void irq_install_pre_handler(int virt_irq,
 +			     void (*func)(unsigned int, void *, void *),
 +			     void *arg1, void *arg2)
 +{
 +	struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +	struct irq_desc *desc = irq_desc + virt_irq;
 +
 +	data->pre_handler = func;
 +	data->arg1 = arg1;
 +	data->arg2 = arg2;
 +
 +	desc->handle_irq = pre_flow_handler;
 +}
 +
 +unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
 +{
 +	struct ino_bucket *bucket;
 +	struct irq_handler_data *data;
 +	unsigned int virt_irq;
 +	int ino;
 +
 +	BUG_ON(tlb_type == hypervisor);
 +
 +	ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
 +	bucket = &ivector_table[ino];
 +	virt_irq = bucket_get_virt_irq(__pa(bucket));
 +	if (!virt_irq) {
 +		virt_irq = virt_irq_alloc(0, ino);
 +		bucket_set_virt_irq(__pa(bucket), virt_irq);
 +		set_irq_chip_and_handler_name(virt_irq,
 +					      &sun4u_irq,
 +					      handle_fasteoi_irq,
 +					      "IVEC");
 +	}
 +
 +	data = get_irq_chip_data(virt_irq);
 +	if (unlikely(data))
 +		goto out;
 +
 +	data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
 +	if (unlikely(!data)) {
 +		prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
 +		prom_halt();
 +	}
 +	set_irq_chip_data(virt_irq, data);
 +
 +	data->imap  = imap;
 +	data->iclr  = iclr;
 +
 +out:
 +	return virt_irq;
 +}
 +
 +static unsigned int sun4v_build_common(unsigned long sysino,
 +				       struct irq_chip *chip)
 +{
 +	struct ino_bucket *bucket;
 +	struct irq_handler_data *data;
 +	unsigned int virt_irq;
 +
 +	BUG_ON(tlb_type != hypervisor);
 +
 +	bucket = &ivector_table[sysino];
 +	virt_irq = bucket_get_virt_irq(__pa(bucket));
 +	if (!virt_irq) {
 +		virt_irq = virt_irq_alloc(0, sysino);
 +		bucket_set_virt_irq(__pa(bucket), virt_irq);
 +		set_irq_chip_and_handler_name(virt_irq, chip,
 +					      handle_fasteoi_irq,
 +					      "IVEC");
 +	}
 +
 +	data = get_irq_chip_data(virt_irq);
 +	if (unlikely(data))
 +		goto out;
 +
 +	data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
 +	if (unlikely(!data)) {
 +		prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
 +		prom_halt();
 +	}
 +	set_irq_chip_data(virt_irq, data);
 +
 +	/* Catch accidental accesses to these things.  IMAP/ICLR handling
 +	 * is done by hypervisor calls on sun4v platforms, not by direct
 +	 * register accesses.
 +	 */
 +	data->imap = ~0UL;
 +	data->iclr = ~0UL;
 +
 +out:
 +	return virt_irq;
 +}
 +
 +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 +{
 +	unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
 +
 +	return sun4v_build_common(sysino, &sun4v_irq);
 +}
 +
 +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
 +{
 +	struct irq_handler_data *data;
 +	unsigned long hv_err, cookie;
 +	struct ino_bucket *bucket;
 +	struct irq_desc *desc;
 +	unsigned int virt_irq;
 +
 +	bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
 +	if (unlikely(!bucket))
 +		return 0;
 +	__flush_dcache_range((unsigned long) bucket,
 +			     ((unsigned long) bucket +
 +			      sizeof(struct ino_bucket)));
 +
 +	virt_irq = virt_irq_alloc(devhandle, devino);
 +	bucket_set_virt_irq(__pa(bucket), virt_irq);
 +
 +	set_irq_chip_and_handler_name(virt_irq, &sun4v_virq,
 +				      handle_fasteoi_irq,
 +				      "IVEC");
 +
 +	data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
 +	if (unlikely(!data))
 +		return 0;
 +
 +	/* In order to make the LDC channel startup sequence easier,
 +	 * especially wrt. locking, we do not let request_irq() enable
 +	 * the interrupt.
 +	 */
 +	desc = irq_desc + virt_irq;
 +	desc->status |= IRQ_NOAUTOEN;
 +
 +	set_irq_chip_data(virt_irq, data);
 +
 +	/* Catch accidental accesses to these things.  IMAP/ICLR handling
 +	 * is done by hypervisor calls on sun4v platforms, not by direct
 +	 * register accesses.
 +	 */
 +	data->imap = ~0UL;
 +	data->iclr = ~0UL;
 +
 +	cookie = ~__pa(bucket);
 +	hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
 +	if (hv_err) {
 +		prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
 +			    "err=%lu\n", devhandle, devino, hv_err);
 +		prom_halt();
 +	}
 +
 +	return virt_irq;
 +}
 +
 +void ack_bad_irq(unsigned int virt_irq)
 +{
 +	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +
 +	if (!ino)
 +		ino = 0xdeadbeef;
 +
 +	printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
 +	       ino, virt_irq);
 +}
 +
 +void *hardirq_stack[NR_CPUS];
 +void *softirq_stack[NR_CPUS];
 +
 +static __attribute__((always_inline)) void *set_hardirq_stack(void)
 +{
 +	void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
 +
 +	__asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
 +	if (orig_sp < sp ||
 +	    orig_sp > (sp + THREAD_SIZE)) {
 +		sp += THREAD_SIZE - 192 - STACK_BIAS;
 +		__asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
 +	}
 +
 +	return orig_sp;
 +}
 +static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
 +{
 +	__asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
 +}
 +
 +void handler_irq(int irq, struct pt_regs *regs)
 +{
 +	unsigned long pstate, bucket_pa;
 +	struct pt_regs *old_regs;
 +	void *orig_sp;
 +
 +	clear_softint(1 << irq);
 +
 +	old_regs = set_irq_regs(regs);
 +	irq_enter();
 +
 +	/* Grab an atomic snapshot of the pending IVECs.  */
 +	__asm__ __volatile__("rdpr	%%pstate, %0\n\t"
 +			     "wrpr	%0, %3, %%pstate\n\t"
 +			     "ldx	[%2], %1\n\t"
 +			     "stx	%%g0, [%2]\n\t"
 +			     "wrpr	%0, 0x0, %%pstate\n\t"
 +			     : "=&r" (pstate), "=&r" (bucket_pa)
 +			     : "r" (irq_work_pa(smp_processor_id())),
 +			       "i" (PSTATE_IE)
 +			     : "memory");
 +
 +	orig_sp = set_hardirq_stack();
 +
 +	while (bucket_pa) {
 +		struct irq_desc *desc;
 +		unsigned long next_pa;
 +		unsigned int virt_irq;
 +
 +		next_pa = bucket_get_chain_pa(bucket_pa);
 +		virt_irq = bucket_get_virt_irq(bucket_pa);
 +		bucket_clear_chain_pa(bucket_pa);
 +
 +		desc = irq_desc + virt_irq;
 +
 +		desc->handle_irq(virt_irq, desc);
 +
 +		bucket_pa = next_pa;
 +	}
 +
 +	restore_hardirq_stack(orig_sp);
 +
 +	irq_exit();
 +	set_irq_regs(old_regs);
 +}
 +
 +void do_softirq(void)
 +{
 +	unsigned long flags;
 +
 +	if (in_interrupt())
 +		return;
 +
 +	local_irq_save(flags);
 +
 +	if (local_softirq_pending()) {
 +		void *orig_sp, *sp = softirq_stack[smp_processor_id()];
 +
 +		sp += THREAD_SIZE - 192 - STACK_BIAS;
 +
 +		__asm__ __volatile__("mov %%sp, %0\n\t"
 +				     "mov %1, %%sp"
 +				     : "=&r" (orig_sp)
 +				     : "r" (sp));
 +		__do_softirq();
 +		__asm__ __volatile__("mov %0, %%sp"
 +				     : : "r" (orig_sp));
 +	}
 +
 +	local_irq_restore(flags);
 +}
 +
 +static void unhandled_perf_irq(struct pt_regs *regs)
 +{
 +	unsigned long pcr, pic;
 +
 +	read_pcr(pcr);
 +	read_pic(pic);
 +
 +	write_pcr(0);
 +
 +	printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
 +	       smp_processor_id());
 +	printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
 +	       smp_processor_id(), pcr, pic);
 +}
 +
 +/* Almost a direct copy of the powerpc PMC code.  */
 +static DEFINE_SPINLOCK(perf_irq_lock);
 +static void *perf_irq_owner_caller; /* mostly for debugging */
 +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
 +
 +/* Invoked from level 15 PIL handler in trap table.  */
 +void perfctr_irq(int irq, struct pt_regs *regs)
 +{
 +	clear_softint(1 << irq);
 +	perf_irq(regs);
 +}
 +
 +int register_perfctr_intr(void (*handler)(struct pt_regs *))
 +{
 +	int ret;
 +
 +	if (!handler)
 +		return -EINVAL;
 +
 +	spin_lock(&perf_irq_lock);
 +	if (perf_irq != unhandled_perf_irq) {
 +		printk(KERN_WARNING "register_perfctr_intr: "
 +		       "perf IRQ busy (reserved by caller %p)\n",
 +		       perf_irq_owner_caller);
 +		ret = -EBUSY;
 +		goto out;
 +	}
 +
 +	perf_irq_owner_caller = __builtin_return_address(0);
 +	perf_irq = handler;
 +
 +	ret = 0;
 +out:
 +	spin_unlock(&perf_irq_lock);
 +
 +	return ret;
 +}
 +EXPORT_SYMBOL_GPL(register_perfctr_intr);
 +
 +void release_perfctr_intr(void (*handler)(struct pt_regs *))
 +{
 +	spin_lock(&perf_irq_lock);
 +	perf_irq_owner_caller = NULL;
 +	perf_irq = unhandled_perf_irq;
 +	spin_unlock(&perf_irq_lock);
 +}
 +EXPORT_SYMBOL_GPL(release_perfctr_intr);
 +
 +#ifdef CONFIG_HOTPLUG_CPU
 +void fixup_irqs(void)
 +{
 +	unsigned int irq;
 +
 +	for (irq = 0; irq < NR_IRQS; irq++) {
 +		unsigned long flags;
 +
 +		spin_lock_irqsave(&irq_desc[irq].lock, flags);
 +		if (irq_desc[irq].action &&
 +		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
 +			if (irq_desc[irq].chip->set_affinity)
 +				irq_desc[irq].chip->set_affinity(irq,
- 					irq_desc[irq].affinity);
++					&irq_desc[irq].affinity);
 +		}
 +		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 +	}
 +
 +	tick_ops->disable_irq();
 +}
 +#endif
 +
 +struct sun5_timer {
 +	u64	count0;
 +	u64	limit0;
 +	u64	count1;
 +	u64	limit1;
 +};
 +
 +static struct sun5_timer *prom_timers;
 +static u64 prom_limit0, prom_limit1;
 +
 +static void map_prom_timers(void)
 +{
 +	struct device_node *dp;
 +	const unsigned int *addr;
 +
 +	/* PROM timer node hangs out in the top level of device siblings... */
 +	dp = of_find_node_by_path("/");
 +	dp = dp->child;
 +	while (dp) {
 +		if (!strcmp(dp->name, "counter-timer"))
 +			break;
 +		dp = dp->sibling;
 +	}
 +
 +	/* Assume if node is not present, PROM uses different tick mechanism
 +	 * which we should not care about.
 +	 */
 +	if (!dp) {
 +		prom_timers = (struct sun5_timer *) 0;
 +		return;
 +	}
 +
 +	/* If PROM is really using this, it must be mapped by him. */
 +	addr = of_get_property(dp, "address", NULL);
 +	if (!addr) {
 +		prom_printf("PROM does not have timer mapped, trying to continue.\n");
 +		prom_timers = (struct sun5_timer *) 0;
 +		return;
 +	}
 +	prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]);
 +}
 +
 +static void kill_prom_timer(void)
 +{
 +	if (!prom_timers)
 +		return;
 +
 +	/* Save them away for later. */
 +	prom_limit0 = prom_timers->limit0;
 +	prom_limit1 = prom_timers->limit1;
 +
 +	/* Just as in sun4c/sun4m PROM uses timer which ticks at IRQ 14.
 +	 * We turn both off here just to be paranoid.
 +	 */
 +	prom_timers->limit0 = 0;
 +	prom_timers->limit1 = 0;
 +
 +	/* Wheee, eat the interrupt packet too... */
 +	__asm__ __volatile__(
 +"	mov	0x40, %%g2\n"
 +"	ldxa	[%%g0] %0, %%g1\n"
 +"	ldxa	[%%g2] %1, %%g1\n"
 +"	stxa	%%g0, [%%g0] %0\n"
 +"	membar	#Sync\n"
 +	: /* no outputs */
 +	: "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R)
 +	: "g1", "g2");
 +}
 +
 +void notrace init_irqwork_curcpu(void)
 +{
 +	int cpu = hard_smp_processor_id();
 +
 +	trap_block[cpu].irq_worklist_pa = 0UL;
 +}
 +
 +/* Please be very careful with register_one_mondo() and
 + * sun4v_register_mondo_queues().
 + *
 + * On SMP this gets invoked from the CPU trampoline before
 + * the cpu has fully taken over the trap table from OBP,
 + * and it's kernel stack + %g6 thread register state is
 + * not fully cooked yet.
 + *
 + * Therefore you cannot make any OBP calls, not even prom_printf,
 + * from these two routines.
 + */
 +static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
 +{
 +	unsigned long num_entries = (qmask + 1) / 64;
 +	unsigned long status;
 +
 +	status = sun4v_cpu_qconf(type, paddr, num_entries);
 +	if (status != HV_EOK) {
 +		prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
 +			    "err %lu\n", type, paddr, num_entries, status);
 +		prom_halt();
 +	}
 +}
 +
 +void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu)
 +{
 +	struct trap_per_cpu *tb = &trap_block[this_cpu];
 +
 +	register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
 +			   tb->cpu_mondo_qmask);
 +	register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
 +			   tb->dev_mondo_qmask);
 +	register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
 +			   tb->resum_qmask);
 +	register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
 +			   tb->nonresum_qmask);
 +}
 +
 +static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
 +{
 +	unsigned long size = PAGE_ALIGN(qmask + 1);
 +	void *p = __alloc_bootmem(size, size, 0);
 +	if (!p) {
 +		prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
 +		prom_halt();
 +	}
 +
 +	*pa_ptr = __pa(p);
 +}
 +
 +static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
 +{
 +	unsigned long size = PAGE_ALIGN(qmask + 1);
 +	void *p = __alloc_bootmem(size, size, 0);
 +
 +	if (!p) {
 +		prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
 +		prom_halt();
 +	}
 +
 +	*pa_ptr = __pa(p);
 +}
 +
 +static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
 +{
 +#ifdef CONFIG_SMP
 +	void *page;
 +
 +	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
 +
 +	page = alloc_bootmem_pages(PAGE_SIZE);
 +	if (!page) {
 +		prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
 +		prom_halt();
 +	}
 +
 +	tb->cpu_mondo_block_pa = __pa(page);
 +	tb->cpu_list_pa = __pa(page + 64);
 +#endif
 +}
 +
 +/* Allocate mondo and error queues for all possible cpus.  */
 +static void __init sun4v_init_mondo_queues(void)
 +{
 +	int cpu;
 +
 +	for_each_possible_cpu(cpu) {
 +		struct trap_per_cpu *tb = &trap_block[cpu];
 +
 +		alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
 +		alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
 +		alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
 +		alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
 +		alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
 +		alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
 +			       tb->nonresum_qmask);
 +	}
 +}
 +
 +static void __init init_send_mondo_info(void)
 +{
 +	int cpu;
 +
 +	for_each_possible_cpu(cpu) {
 +		struct trap_per_cpu *tb = &trap_block[cpu];
 +
 +		init_cpu_send_mondo_info(tb);
 +	}
 +}
 +
 +static struct irqaction timer_irq_action = {
 +	.name = "timer",
 +};
 +
 +/* Only invoked on boot processor. */
 +void __init init_IRQ(void)
 +{
 +	unsigned long size;
 +
 +	map_prom_timers();
 +	kill_prom_timer();
 +
 +	size = sizeof(struct ino_bucket) * NUM_IVECS;
 +	ivector_table = alloc_bootmem(size);
 +	if (!ivector_table) {
 +		prom_printf("Fatal error, cannot allocate ivector_table\n");
 +		prom_halt();
 +	}
 +	__flush_dcache_range((unsigned long) ivector_table,
 +			     ((unsigned long) ivector_table) + size);
 +
 +	ivector_table_pa = __pa(ivector_table);
 +
 +	if (tlb_type == hypervisor)
 +		sun4v_init_mondo_queues();
 +
 +	init_send_mondo_info();
 +
 +	if (tlb_type == hypervisor) {
 +		/* Load up the boot cpu's entries.  */
 +		sun4v_register_mondo_queues(hard_smp_processor_id());
 +	}
 +
 +	/* We need to clear any IRQ's pending in the soft interrupt
 +	 * registers, a spurious one could be left around from the
 +	 * PROM timer which we just disabled.
 +	 */
 +	clear_softint(get_softint());
 +
 +	/* Now that ivector table is initialized, it is safe
 +	 * to receive IRQ vector traps.  We will normally take
 +	 * one or two right now, in case some device PROM used
 +	 * to boot us wants to speak to us.  We just ignore them.
 +	 */
 +	__asm__ __volatile__("rdpr	%%pstate, %%g1\n\t"
 +			     "or	%%g1, %0, %%g1\n\t"
 +			     "wrpr	%%g1, 0x0, %%pstate"
 +			     : /* No outputs */
 +			     : "i" (PSTATE_IE)
 +			     : "g1");
 +
 +	irq_desc[0].action = &timer_irq_action;
 +}
diff --cc arch/sparc/kernel/of_device_64.c
index 46e231f7c5ce,000000000000..322046cdf85f
mode 100644,000000..100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@@ -1,898 -1,0 +1,898 @@@
 +#include <linux/string.h>
 +#include <linux/kernel.h>
 +#include <linux/of.h>
 +#include <linux/init.h>
 +#include <linux/module.h>
 +#include <linux/mod_devicetable.h>
 +#include <linux/slab.h>
 +#include <linux/errno.h>
 +#include <linux/irq.h>
 +#include <linux/of_device.h>
 +#include <linux/of_platform.h>
 +
 +void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name)
 +{
 +	unsigned long ret = res->start + offset;
 +	struct resource *r;
 +
 +	if (res->flags & IORESOURCE_MEM)
 +		r = request_mem_region(ret, size, name);
 +	else
 +		r = request_region(ret, size, name);
 +	if (!r)
 +		ret = 0;
 +
 +	return (void __iomem *) ret;
 +}
 +EXPORT_SYMBOL(of_ioremap);
 +
 +void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
 +{
 +	if (res->flags & IORESOURCE_MEM)
 +		release_mem_region((unsigned long) base, size);
 +	else
 +		release_region((unsigned long) base, size);
 +}
 +EXPORT_SYMBOL(of_iounmap);
 +
 +static int node_match(struct device *dev, void *data)
 +{
 +	struct of_device *op = to_of_device(dev);
 +	struct device_node *dp = data;
 +
 +	return (op->node == dp);
 +}
 +
 +struct of_device *of_find_device_by_node(struct device_node *dp)
 +{
 +	struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
 +					     dp, node_match);
 +
 +	if (dev)
 +		return to_of_device(dev);
 +
 +	return NULL;
 +}
 +EXPORT_SYMBOL(of_find_device_by_node);
 +
 +unsigned int irq_of_parse_and_map(struct device_node *node, int index)
 +{
 +	struct of_device *op = of_find_device_by_node(node);
 +
 +	if (!op || index >= op->num_irqs)
 +		return 0;
 +
 +	return op->irqs[index];
 +}
 +EXPORT_SYMBOL(irq_of_parse_and_map);
 +
 +/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
 + * BUS and propagate to all child of_device objects.
 + */
 +void of_propagate_archdata(struct of_device *bus)
 +{
 +	struct dev_archdata *bus_sd = &bus->dev.archdata;
 +	struct device_node *bus_dp = bus->node;
 +	struct device_node *dp;
 +
 +	for (dp = bus_dp->child; dp; dp = dp->sibling) {
 +		struct of_device *op = of_find_device_by_node(dp);
 +
 +		op->dev.archdata.iommu = bus_sd->iommu;
 +		op->dev.archdata.stc = bus_sd->stc;
 +		op->dev.archdata.host_controller = bus_sd->host_controller;
 +		op->dev.archdata.numa_node = bus_sd->numa_node;
 +
 +		if (dp->child)
 +			of_propagate_archdata(op);
 +	}
 +}
 +
 +struct bus_type of_platform_bus_type;
 +EXPORT_SYMBOL(of_platform_bus_type);
 +
 +static inline u64 of_read_addr(const u32 *cell, int size)
 +{
 +	u64 r = 0;
 +	while (size--)
 +		r = (r << 32) | *(cell++);
 +	return r;
 +}
 +
 +static void __init get_cells(struct device_node *dp,
 +			     int *addrc, int *sizec)
 +{
 +	if (addrc)
 +		*addrc = of_n_addr_cells(dp);
 +	if (sizec)
 +		*sizec = of_n_size_cells(dp);
 +}
 +
 +/* Max address size we deal with */
 +#define OF_MAX_ADDR_CELLS	4
 +
 +struct of_bus {
 +	const char	*name;
 +	const char	*addr_prop_name;
 +	int		(*match)(struct device_node *parent);
 +	void		(*count_cells)(struct device_node *child,
 +				       int *addrc, int *sizec);
 +	int		(*map)(u32 *addr, const u32 *range,
 +			       int na, int ns, int pna);
 +	unsigned long	(*get_flags)(const u32 *addr, unsigned long);
 +};
 +
 +/*
 + * Default translator (generic bus)
 + */
 +
 +static void of_bus_default_count_cells(struct device_node *dev,
 +				       int *addrc, int *sizec)
 +{
 +	get_cells(dev, addrc, sizec);
 +}
 +
 +/* Make sure the least significant 64-bits are in-range.  Even
 + * for 3 or 4 cell values it is a good enough approximation.
 + */
 +static int of_out_of_range(const u32 *addr, const u32 *base,
 +			   const u32 *size, int na, int ns)
 +{
 +	u64 a = of_read_addr(addr, na);
 +	u64 b = of_read_addr(base, na);
 +
 +	if (a < b)
 +		return 1;
 +
 +	b += of_read_addr(size, ns);
 +	if (a >= b)
 +		return 1;
 +
 +	return 0;
 +}
 +
 +static int of_bus_default_map(u32 *addr, const u32 *range,
 +			      int na, int ns, int pna)
 +{
 +	u32 result[OF_MAX_ADDR_CELLS];
 +	int i;
 +
 +	if (ns > 2) {
 +		printk("of_device: Cannot handle size cells (%d) > 2.", ns);
 +		return -EINVAL;
 +	}
 +
 +	if (of_out_of_range(addr, range, range + na + pna, na, ns))
 +		return -EINVAL;
 +
 +	/* Start with the parent range base.  */
 +	memcpy(result, range + na, pna * 4);
 +
 +	/* Add in the child address offset.  */
 +	for (i = 0; i < na; i++)
 +		result[pna - 1 - i] +=
 +			(addr[na - 1 - i] -
 +			 range[na - 1 - i]);
 +
 +	memcpy(addr, result, pna * 4);
 +
 +	return 0;
 +}
 +
 +static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
 +{
 +	if (flags)
 +		return flags;
 +	return IORESOURCE_MEM;
 +}
 +
 +/*
 + * PCI bus specific translator
 + */
 +
 +static int of_bus_pci_match(struct device_node *np)
 +{
 +	if (!strcmp(np->name, "pci")) {
 +		const char *model = of_get_property(np, "model", NULL);
 +
 +		if (model && !strcmp(model, "SUNW,simba"))
 +			return 0;
 +
 +		/* Do not do PCI specific frobbing if the
 +		 * PCI bridge lacks a ranges property.  We
 +		 * want to pass it through up to the next
 +		 * parent as-is, not with the PCI translate
 +		 * method which chops off the top address cell.
 +		 */
 +		if (!of_find_property(np, "ranges", NULL))
 +			return 0;
 +
 +		return 1;
 +	}
 +
 +	return 0;
 +}
 +
 +static int of_bus_simba_match(struct device_node *np)
 +{
 +	const char *model = of_get_property(np, "model", NULL);
 +
 +	if (model && !strcmp(model, "SUNW,simba"))
 +		return 1;
 +
 +	/* Treat PCI busses lacking ranges property just like
 +	 * simba.
 +	 */
 +	if (!strcmp(np->name, "pci")) {
 +		if (!of_find_property(np, "ranges", NULL))
 +			return 1;
 +	}
 +
 +	return 0;
 +}
 +
 +static int of_bus_simba_map(u32 *addr, const u32 *range,
 +			    int na, int ns, int pna)
 +{
 +	return 0;
 +}
 +
 +static void of_bus_pci_count_cells(struct device_node *np,
 +				   int *addrc, int *sizec)
 +{
 +	if (addrc)
 +		*addrc = 3;
 +	if (sizec)
 +		*sizec = 2;
 +}
 +
 +static int of_bus_pci_map(u32 *addr, const u32 *range,
 +			  int na, int ns, int pna)
 +{
 +	u32 result[OF_MAX_ADDR_CELLS];
 +	int i;
 +
 +	/* Check address type match */
 +	if ((addr[0] ^ range[0]) & 0x03000000)
 +		return -EINVAL;
 +
 +	if (of_out_of_range(addr + 1, range + 1, range + na + pna,
 +			    na - 1, ns))
 +		return -EINVAL;
 +
 +	/* Start with the parent range base.  */
 +	memcpy(result, range + na, pna * 4);
 +
 +	/* Add in the child address offset, skipping high cell.  */
 +	for (i = 0; i < na - 1; i++)
 +		result[pna - 1 - i] +=
 +			(addr[na - 1 - i] -
 +			 range[na - 1 - i]);
 +
 +	memcpy(addr, result, pna * 4);
 +
 +	return 0;
 +}
 +
 +static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
 +{
 +	u32 w = addr[0];
 +
 +	/* For PCI, we override whatever child busses may have used.  */
 +	flags = 0;
 +	switch((w >> 24) & 0x03) {
 +	case 0x01:
 +		flags |= IORESOURCE_IO;
 +		break;
 +
 +	case 0x02: /* 32 bits */
 +	case 0x03: /* 64 bits */
 +		flags |= IORESOURCE_MEM;
 +		break;
 +	}
 +	if (w & 0x40000000)
 +		flags |= IORESOURCE_PREFETCH;
 +	return flags;
 +}
 +
 +/*
 + * SBUS bus specific translator
 + */
 +
 +static int of_bus_sbus_match(struct device_node *np)
 +{
 +	return !strcmp(np->name, "sbus") ||
 +		!strcmp(np->name, "sbi");
 +}
 +
 +static void of_bus_sbus_count_cells(struct device_node *child,
 +				   int *addrc, int *sizec)
 +{
 +	if (addrc)
 +		*addrc = 2;
 +	if (sizec)
 +		*sizec = 1;
 +}
 +
 +/*
 + * FHC/Central bus specific translator.
 + *
 + * This is just needed to hard-code the address and size cell
 + * counts.  'fhc' and 'central' nodes lack the #address-cells and
 + * #size-cells properties, and if you walk to the root on such
 + * Enterprise boxes all you'll get is a #size-cells of 2 which is
 + * not what we want to use.
 + */
 +static int of_bus_fhc_match(struct device_node *np)
 +{
 +	return !strcmp(np->name, "fhc") ||
 +		!strcmp(np->name, "central");
 +}
 +
 +#define of_bus_fhc_count_cells of_bus_sbus_count_cells
 +
 +/*
 + * Array of bus specific translators
 + */
 +
 +static struct of_bus of_busses[] = {
 +	/* PCI */
 +	{
 +		.name = "pci",
 +		.addr_prop_name = "assigned-addresses",
 +		.match = of_bus_pci_match,
 +		.count_cells = of_bus_pci_count_cells,
 +		.map = of_bus_pci_map,
 +		.get_flags = of_bus_pci_get_flags,
 +	},
 +	/* SIMBA */
 +	{
 +		.name = "simba",
 +		.addr_prop_name = "assigned-addresses",
 +		.match = of_bus_simba_match,
 +		.count_cells = of_bus_pci_count_cells,
 +		.map = of_bus_simba_map,
 +		.get_flags = of_bus_pci_get_flags,
 +	},
 +	/* SBUS */
 +	{
 +		.name = "sbus",
 +		.addr_prop_name = "reg",
 +		.match = of_bus_sbus_match,
 +		.count_cells = of_bus_sbus_count_cells,
 +		.map = of_bus_default_map,
 +		.get_flags = of_bus_default_get_flags,
 +	},
 +	/* FHC */
 +	{
 +		.name = "fhc",
 +		.addr_prop_name = "reg",
 +		.match = of_bus_fhc_match,
 +		.count_cells = of_bus_fhc_count_cells,
 +		.map = of_bus_default_map,
 +		.get_flags = of_bus_default_get_flags,
 +	},
 +	/* Default */
 +	{
 +		.name = "default",
 +		.addr_prop_name = "reg",
 +		.match = NULL,
 +		.count_cells = of_bus_default_count_cells,
 +		.map = of_bus_default_map,
 +		.get_flags = of_bus_default_get_flags,
 +	},
 +};
 +
 +static struct of_bus *of_match_bus(struct device_node *np)
 +{
 +	int i;
 +
 +	for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
 +		if (!of_busses[i].match || of_busses[i].match(np))
 +			return &of_busses[i];
 +	BUG();
 +	return NULL;
 +}
 +
 +static int __init build_one_resource(struct device_node *parent,
 +				     struct of_bus *bus,
 +				     struct of_bus *pbus,
 +				     u32 *addr,
 +				     int na, int ns, int pna)
 +{
 +	const u32 *ranges;
 +	int rone, rlen;
 +
 +	ranges = of_get_property(parent, "ranges", &rlen);
 +	if (ranges == NULL || rlen == 0) {
 +		u32 result[OF_MAX_ADDR_CELLS];
 +		int i;
 +
 +		memset(result, 0, pna * 4);
 +		for (i = 0; i < na; i++)
 +			result[pna - 1 - i] =
 +				addr[na - 1 - i];
 +
 +		memcpy(addr, result, pna * 4);
 +		return 0;
 +	}
 +
 +	/* Now walk through the ranges */
 +	rlen /= 4;
 +	rone = na + pna + ns;
 +	for (; rlen >= rone; rlen -= rone, ranges += rone) {
 +		if (!bus->map(addr, ranges, na, ns, pna))
 +			return 0;
 +	}
 +
 +	/* When we miss an I/O space match on PCI, just pass it up
 +	 * to the next PCI bridge and/or controller.
 +	 */
 +	if (!strcmp(bus->name, "pci") &&
 +	    (addr[0] & 0x03000000) == 0x01000000)
 +		return 0;
 +
 +	return 1;
 +}
 +
 +static int __init use_1to1_mapping(struct device_node *pp)
 +{
 +	/* If we have a ranges property in the parent, use it.  */
 +	if (of_find_property(pp, "ranges", NULL) != NULL)
 +		return 0;
 +
 +	/* If the parent is the dma node of an ISA bus, pass
 +	 * the translation up to the root.
 +	 *
 +	 * Some SBUS devices use intermediate nodes to express
 +	 * hierarchy within the device itself.  These aren't
 +	 * real bus nodes, and don't have a 'ranges' property.
 +	 * But, we should still pass the translation work up
 +	 * to the SBUS itself.
 +	 */
 +	if (!strcmp(pp->name, "dma") ||
 +	    !strcmp(pp->name, "espdma") ||
 +	    !strcmp(pp->name, "ledma") ||
 +	    !strcmp(pp->name, "lebuffer"))
 +		return 0;
 +
 +	/* Similarly for all PCI bridges, if we get this far
 +	 * it lacks a ranges property, and this will include
 +	 * cases like Simba.
 +	 */
 +	if (!strcmp(pp->name, "pci"))
 +		return 0;
 +
 +	return 1;
 +}
 +
 +static int of_resource_verbose;
 +
 +static void __init build_device_resources(struct of_device *op,
 +					  struct device *parent)
 +{
 +	struct of_device *p_op;
 +	struct of_bus *bus;
 +	int na, ns;
 +	int index, num_reg;
 +	const void *preg;
 +
 +	if (!parent)
 +		return;
 +
 +	p_op = to_of_device(parent);
 +	bus = of_match_bus(p_op->node);
 +	bus->count_cells(op->node, &na, &ns);
 +
 +	preg = of_get_property(op->node, bus->addr_prop_name, &num_reg);
 +	if (!preg || num_reg == 0)
 +		return;
 +
 +	/* Convert to num-cells.  */
 +	num_reg /= 4;
 +
 +	/* Convert to num-entries.  */
 +	num_reg /= na + ns;
 +
 +	/* Prevent overrunning the op->resources[] array.  */
 +	if (num_reg > PROMREG_MAX) {
 +		printk(KERN_WARNING "%s: Too many regs (%d), "
 +		       "limiting to %d.\n",
 +		       op->node->full_name, num_reg, PROMREG_MAX);
 +		num_reg = PROMREG_MAX;
 +	}
 +
 +	for (index = 0; index < num_reg; index++) {
 +		struct resource *r = &op->resource[index];
 +		u32 addr[OF_MAX_ADDR_CELLS];
 +		const u32 *reg = (preg + (index * ((na + ns) * 4)));
 +		struct device_node *dp = op->node;
 +		struct device_node *pp = p_op->node;
 +		struct of_bus *pbus, *dbus;
 +		u64 size, result = OF_BAD_ADDR;
 +		unsigned long flags;
 +		int dna, dns;
 +		int pna, pns;
 +
 +		size = of_read_addr(reg + na, ns);
 +		memcpy(addr, reg, na * 4);
 +
 +		flags = bus->get_flags(addr, 0);
 +
 +		if (use_1to1_mapping(pp)) {
 +			result = of_read_addr(addr, na);
 +			goto build_res;
 +		}
 +
 +		dna = na;
 +		dns = ns;
 +		dbus = bus;
 +
 +		while (1) {
 +			dp = pp;
 +			pp = dp->parent;
 +			if (!pp) {
 +				result = of_read_addr(addr, dna);
 +				break;
 +			}
 +
 +			pbus = of_match_bus(pp);
 +			pbus->count_cells(dp, &pna, &pns);
 +
 +			if (build_one_resource(dp, dbus, pbus, addr,
 +					       dna, dns, pna))
 +				break;
 +
 +			flags = pbus->get_flags(addr, flags);
 +
 +			dna = pna;
 +			dns = pns;
 +			dbus = pbus;
 +		}
 +
 +	build_res:
 +		memset(r, 0, sizeof(*r));
 +
 +		if (of_resource_verbose)
 +			printk("%s reg[%d] -> %lx\n",
 +			       op->node->full_name, index,
 +			       result);
 +
 +		if (result != OF_BAD_ADDR) {
 +			if (tlb_type == hypervisor)
 +				result &= 0x0fffffffffffffffUL;
 +
 +			r->start = result;
 +			r->end = result + size - 1;
 +			r->flags = flags;
 +		}
 +		r->name = op->node->name;
 +	}
 +}
 +
 +static struct device_node * __init
 +apply_interrupt_map(struct device_node *dp, struct device_node *pp,
 +		    const u32 *imap, int imlen, const u32 *imask,
 +		    unsigned int *irq_p)
 +{
 +	struct device_node *cp;
 +	unsigned int irq = *irq_p;
 +	struct of_bus *bus;
 +	phandle handle;
 +	const u32 *reg;
 +	int na, num_reg, i;
 +
 +	bus = of_match_bus(pp);
 +	bus->count_cells(dp, &na, NULL);
 +
 +	reg = of_get_property(dp, "reg", &num_reg);
 +	if (!reg || !num_reg)
 +		return NULL;
 +
 +	imlen /= ((na + 3) * 4);
 +	handle = 0;
 +	for (i = 0; i < imlen; i++) {
 +		int j;
 +
 +		for (j = 0; j < na; j++) {
 +			if ((reg[j] & imask[j]) != imap[j])
 +				goto next;
 +		}
 +		if (imap[na] == irq) {
 +			handle = imap[na + 1];
 +			irq = imap[na + 2];
 +			break;
 +		}
 +
 +	next:
 +		imap += (na + 3);
 +	}
 +	if (i == imlen) {
 +		/* Psycho and Sabre PCI controllers can have 'interrupt-map'
 +		 * properties that do not include the on-board device
 +		 * interrupts.  Instead, the device's 'interrupts' property
 +		 * is already a fully specified INO value.
 +		 *
 +		 * Handle this by deciding that, if we didn't get a
 +		 * match in the parent's 'interrupt-map', and the
 +		 * parent is an IRQ translater, then use the parent as
 +		 * our IRQ controller.
 +		 */
 +		if (pp->irq_trans)
 +			return pp;
 +
 +		return NULL;
 +	}
 +
 +	*irq_p = irq;
 +	cp = of_find_node_by_phandle(handle);
 +
 +	return cp;
 +}
 +
 +static unsigned int __init pci_irq_swizzle(struct device_node *dp,
 +					   struct device_node *pp,
 +					   unsigned int irq)
 +{
 +	const struct linux_prom_pci_registers *regs;
 +	unsigned int bus, devfn, slot, ret;
 +
 +	if (irq < 1 || irq > 4)
 +		return irq;
 +
 +	regs = of_get_property(dp, "reg", NULL);
 +	if (!regs)
 +		return irq;
 +
 +	bus = (regs->phys_hi >> 16) & 0xff;
 +	devfn = (regs->phys_hi >> 8) & 0xff;
 +	slot = (devfn >> 3) & 0x1f;
 +
 +	if (pp->irq_trans) {
 +		/* Derived from Table 8-3, U2P User's Manual.  This branch
 +		 * is handling a PCI controller that lacks a proper set of
 +		 * interrupt-map and interrupt-map-mask properties.  The
 +		 * Ultra-E450 is one example.
 +		 *
 +		 * The bit layout is BSSLL, where:
 +		 * B: 0 on bus A, 1 on bus B
 +		 * D: 2-bit slot number, derived from PCI device number as
 +		 *    (dev - 1) for bus A, or (dev - 2) for bus B
 +		 * L: 2-bit line number
 +		 */
 +		if (bus & 0x80) {
 +			/* PBM-A */
 +			bus  = 0x00;
 +			slot = (slot - 1) << 2;
 +		} else {
 +			/* PBM-B */
 +			bus  = 0x10;
 +			slot = (slot - 2) << 2;
 +		}
 +		irq -= 1;
 +
 +		ret = (bus | slot | irq);
 +	} else {
 +		/* Going through a PCI-PCI bridge that lacks a set of
 +		 * interrupt-map and interrupt-map-mask properties.
 +		 */
 +		ret = ((irq - 1 + (slot & 3)) & 3) + 1;
 +	}
 +
 +	return ret;
 +}
 +
 +static int of_irq_verbose;
 +
 +static unsigned int __init build_one_device_irq(struct of_device *op,
 +						struct device *parent,
 +						unsigned int irq)
 +{
 +	struct device_node *dp = op->node;
 +	struct device_node *pp, *ip;
 +	unsigned int orig_irq = irq;
 +	int nid;
 +
 +	if (irq == 0xffffffff)
 +		return irq;
 +
 +	if (dp->irq_trans) {
 +		irq = dp->irq_trans->irq_build(dp, irq,
 +					       dp->irq_trans->data);
 +
 +		if (of_irq_verbose)
 +			printk("%s: direct translate %x --> %x\n",
 +			       dp->full_name, orig_irq, irq);
 +
 +		goto out;
 +	}
 +
 +	/* Something more complicated.  Walk up to the root, applying
 +	 * interrupt-map or bus specific translations, until we hit
 +	 * an IRQ translator.
 +	 *
 +	 * If we hit a bus type or situation we cannot handle, we
 +	 * stop and assume that the original IRQ number was in a
 +	 * format which has special meaning to it's immediate parent.
 +	 */
 +	pp = dp->parent;
 +	ip = NULL;
 +	while (pp) {
 +		const void *imap, *imsk;
 +		int imlen;
 +
 +		imap = of_get_property(pp, "interrupt-map", &imlen);
 +		imsk = of_get_property(pp, "interrupt-map-mask", NULL);
 +		if (imap && imsk) {
 +			struct device_node *iret;
 +			int this_orig_irq = irq;
 +
 +			iret = apply_interrupt_map(dp, pp,
 +						   imap, imlen, imsk,
 +						   &irq);
 +
 +			if (of_irq_verbose)
 +				printk("%s: Apply [%s:%x] imap --> [%s:%x]\n",
 +				       op->node->full_name,
 +				       pp->full_name, this_orig_irq,
 +				       (iret ? iret->full_name : "NULL"), irq);
 +
 +			if (!iret)
 +				break;
 +
 +			if (iret->irq_trans) {
 +				ip = iret;
 +				break;
 +			}
 +		} else {
 +			if (!strcmp(pp->name, "pci")) {
 +				unsigned int this_orig_irq = irq;
 +
 +				irq = pci_irq_swizzle(dp, pp, irq);
 +				if (of_irq_verbose)
 +					printk("%s: PCI swizzle [%s] "
 +					       "%x --> %x\n",
 +					       op->node->full_name,
 +					       pp->full_name, this_orig_irq,
 +					       irq);
 +
 +			}
 +
 +			if (pp->irq_trans) {
 +				ip = pp;
 +				break;
 +			}
 +		}
 +		dp = pp;
 +		pp = pp->parent;
 +	}
 +	if (!ip)
 +		return orig_irq;
 +
 +	irq = ip->irq_trans->irq_build(op->node, irq,
 +				       ip->irq_trans->data);
 +	if (of_irq_verbose)
 +		printk("%s: Apply IRQ trans [%s] %x --> %x\n",
 +		       op->node->full_name, ip->full_name, orig_irq, irq);
 +
 +out:
 +	nid = of_node_to_nid(dp);
 +	if (nid != -1) {
 +		cpumask_t numa_mask = node_to_cpumask(nid);
 +
- 		irq_set_affinity(irq, numa_mask);
++		irq_set_affinity(irq, &numa_mask);
 +	}
 +
 +	return irq;
 +}
 +
 +static struct of_device * __init scan_one_device(struct device_node *dp,
 +						 struct device *parent)
 +{
 +	struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL);
 +	const unsigned int *irq;
 +	struct dev_archdata *sd;
 +	int len, i;
 +
 +	if (!op)
 +		return NULL;
 +
 +	sd = &op->dev.archdata;
 +	sd->prom_node = dp;
 +	sd->op = op;
 +
 +	op->node = dp;
 +
 +	op->clock_freq = of_getintprop_default(dp, "clock-frequency",
 +					       (25*1000*1000));
 +	op->portid = of_getintprop_default(dp, "upa-portid", -1);
 +	if (op->portid == -1)
 +		op->portid = of_getintprop_default(dp, "portid", -1);
 +
 +	irq = of_get_property(dp, "interrupts", &len);
 +	if (irq) {
 +		op->num_irqs = len / 4;
 +
 +		/* Prevent overrunning the op->irqs[] array.  */
 +		if (op->num_irqs > PROMINTR_MAX) {
 +			printk(KERN_WARNING "%s: Too many irqs (%d), "
 +			       "limiting to %d.\n",
 +			       dp->full_name, op->num_irqs, PROMINTR_MAX);
 +			op->num_irqs = PROMINTR_MAX;
 +		}
 +		memcpy(op->irqs, irq, op->num_irqs * 4);
 +	} else {
 +		op->num_irqs = 0;
 +	}
 +
 +	build_device_resources(op, parent);
 +	for (i = 0; i < op->num_irqs; i++)
 +		op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
 +
 +	op->dev.parent = parent;
 +	op->dev.bus = &of_platform_bus_type;
 +	if (!parent)
 +		dev_set_name(&op->dev, "root");
 +	else
 +		dev_set_name(&op->dev, "%08x", dp->node);
 +
 +	if (of_device_register(op)) {
 +		printk("%s: Could not register of device.\n",
 +		       dp->full_name);
 +		kfree(op);
 +		op = NULL;
 +	}
 +
 +	return op;
 +}
 +
 +static void __init scan_tree(struct device_node *dp, struct device *parent)
 +{
 +	while (dp) {
 +		struct of_device *op = scan_one_device(dp, parent);
 +
 +		if (op)
 +			scan_tree(dp->child, &op->dev);
 +
 +		dp = dp->sibling;
 +	}
 +}
 +
 +static void __init scan_of_devices(void)
 +{
 +	struct device_node *root = of_find_node_by_path("/");
 +	struct of_device *parent;
 +
 +	parent = scan_one_device(root, NULL);
 +	if (!parent)
 +		return;
 +
 +	scan_tree(root->child, &parent->dev);
 +}
 +
 +static int __init of_bus_driver_init(void)
 +{
 +	int err;
 +
 +	err = of_bus_type_init(&of_platform_bus_type, "of");
 +	if (!err)
 +		scan_of_devices();
 +
 +	return err;
 +}
 +
 +postcore_initcall(of_bus_driver_init);
 +
 +static int __init of_debug(char *str)
 +{
 +	int val = 0;
 +
 +	get_option(&str, &val);
 +	if (val & 1)
 +		of_resource_verbose = 1;
 +	if (val & 2)
 +		of_irq_verbose = 1;
 +	return 1;
 +}
 +
 +__setup("of_debug=", of_debug);
diff --cc arch/sparc/kernel/pci_msi.c
index 2e680f34f727,000000000000..0d0cd815e83e
mode 100644,000000..100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@@ -1,447 -1,0 +1,447 @@@
 +/* pci_msi.c: Sparc64 MSI support common layer.
 + *
 + * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
 + */
 +#include <linux/kernel.h>
 +#include <linux/interrupt.h>
 +#include <linux/irq.h>
 +
 +#include "pci_impl.h"
 +
 +static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie)
 +{
 +	struct sparc64_msiq_cookie *msiq_cookie = cookie;
 +	struct pci_pbm_info *pbm = msiq_cookie->pbm;
 +	unsigned long msiqid = msiq_cookie->msiqid;
 +	const struct sparc64_msiq_ops *ops;
 +	unsigned long orig_head, head;
 +	int err;
 +
 +	ops = pbm->msi_ops;
 +
 +	err = ops->get_head(pbm, msiqid, &head);
 +	if (unlikely(err < 0))
 +		goto err_get_head;
 +
 +	orig_head = head;
 +	for (;;) {
 +		unsigned long msi;
 +
 +		err = ops->dequeue_msi(pbm, msiqid, &head, &msi);
 +		if (likely(err > 0)) {
 +			struct irq_desc *desc;
 +			unsigned int virt_irq;
 +
 +			virt_irq = pbm->msi_irq_table[msi - pbm->msi_first];
 +			desc = irq_desc + virt_irq;
 +
 +			desc->handle_irq(virt_irq, desc);
 +		}
 +
 +		if (unlikely(err < 0))
 +			goto err_dequeue;
 +
 +		if (err == 0)
 +			break;
 +	}
 +	if (likely(head != orig_head)) {
 +		err = ops->set_head(pbm, msiqid, head);
 +		if (unlikely(err < 0))
 +			goto err_set_head;
 +	}
 +	return IRQ_HANDLED;
 +
 +err_get_head:
 +	printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n",
 +	       msiqid, err);
 +	goto err_out;
 +
 +err_dequeue:
 +	printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] "
 +	       "gives error %d\n",
 +	       head, msiqid, err);
 +	goto err_out;
 +
 +err_set_head:
 +	printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] "
 +	       "gives error %d\n",
 +	       head, msiqid, err);
 +	goto err_out;
 +
 +err_out:
 +	return IRQ_NONE;
 +}
 +
 +static u32 pick_msiq(struct pci_pbm_info *pbm)
 +{
 +	static DEFINE_SPINLOCK(rotor_lock);
 +	unsigned long flags;
 +	u32 ret, rotor;
 +
 +	spin_lock_irqsave(&rotor_lock, flags);
 +
 +	rotor = pbm->msiq_rotor;
 +	ret = pbm->msiq_first + rotor;
 +
 +	if (++rotor >= pbm->msiq_num)
 +		rotor = 0;
 +	pbm->msiq_rotor = rotor;
 +
 +	spin_unlock_irqrestore(&rotor_lock, flags);
 +
 +	return ret;
 +}
 +
 +
 +static int alloc_msi(struct pci_pbm_info *pbm)
 +{
 +	int i;
 +
 +	for (i = 0; i < pbm->msi_num; i++) {
 +		if (!test_and_set_bit(i, pbm->msi_bitmap))
 +			return i + pbm->msi_first;
 +	}
 +
 +	return -ENOENT;
 +}
 +
 +static void free_msi(struct pci_pbm_info *pbm, int msi_num)
 +{
 +	msi_num -= pbm->msi_first;
 +	clear_bit(msi_num, pbm->msi_bitmap);
 +}
 +
 +static struct irq_chip msi_irq = {
 +	.typename	= "PCI-MSI",
 +	.mask		= mask_msi_irq,
 +	.unmask		= unmask_msi_irq,
 +	.enable		= unmask_msi_irq,
 +	.disable	= mask_msi_irq,
 +	/* XXX affinity XXX */
 +};
 +
 +static int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
 +				 struct pci_dev *pdev,
 +				 struct msi_desc *entry)
 +{
 +	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
 +	const struct sparc64_msiq_ops *ops = pbm->msi_ops;
 +	struct msi_msg msg;
 +	int msi, err;
 +	u32 msiqid;
 +
 +	*virt_irq_p = virt_irq_alloc(0, 0);
 +	err = -ENOMEM;
 +	if (!*virt_irq_p)
 +		goto out_err;
 +
 +	set_irq_chip_and_handler_name(*virt_irq_p, &msi_irq,
 +				      handle_simple_irq, "MSI");
 +
 +	err = alloc_msi(pbm);
 +	if (unlikely(err < 0))
 +		goto out_virt_irq_free;
 +
 +	msi = err;
 +
 +	msiqid = pick_msiq(pbm);
 +
 +	err = ops->msi_setup(pbm, msiqid, msi,
 +			     (entry->msi_attrib.is_64 ? 1 : 0));
 +	if (err)
 +		goto out_msi_free;
 +
 +	pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p;
 +
 +	if (entry->msi_attrib.is_64) {
 +		msg.address_hi = pbm->msi64_start >> 32;
 +		msg.address_lo = pbm->msi64_start & 0xffffffff;
 +	} else {
 +		msg.address_hi = 0;
 +		msg.address_lo = pbm->msi32_start;
 +	}
 +	msg.data = msi;
 +
 +	set_irq_msi(*virt_irq_p, entry);
 +	write_msi_msg(*virt_irq_p, &msg);
 +
 +	return 0;
 +
 +out_msi_free:
 +	free_msi(pbm, msi);
 +
 +out_virt_irq_free:
 +	set_irq_chip(*virt_irq_p, NULL);
 +	virt_irq_free(*virt_irq_p);
 +	*virt_irq_p = 0;
 +
 +out_err:
 +	return err;
 +}
 +
 +static void sparc64_teardown_msi_irq(unsigned int virt_irq,
 +				     struct pci_dev *pdev)
 +{
 +	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
 +	const struct sparc64_msiq_ops *ops = pbm->msi_ops;
 +	unsigned int msi_num;
 +	int i, err;
 +
 +	for (i = 0; i < pbm->msi_num; i++) {
 +		if (pbm->msi_irq_table[i] == virt_irq)
 +			break;
 +	}
 +	if (i >= pbm->msi_num) {
 +		printk(KERN_ERR "%s: teardown: No MSI for irq %u\n",
 +		       pbm->name, virt_irq);
 +		return;
 +	}
 +
 +	msi_num = pbm->msi_first + i;
 +	pbm->msi_irq_table[i] = ~0U;
 +
 +	err = ops->msi_teardown(pbm, msi_num);
 +	if (err) {
 +		printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, "
 +		       "irq %u, gives error %d\n",
 +		       pbm->name, msi_num, virt_irq, err);
 +		return;
 +	}
 +
 +	free_msi(pbm, msi_num);
 +
 +	set_irq_chip(virt_irq, NULL);
 +	virt_irq_free(virt_irq);
 +}
 +
 +static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
 +{
 +	unsigned long size, bits_per_ulong;
 +
 +	bits_per_ulong = sizeof(unsigned long) * 8;
 +	size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
 +	size /= 8;
 +	BUG_ON(size % sizeof(unsigned long));
 +
 +	pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
 +	if (!pbm->msi_bitmap)
 +		return -ENOMEM;
 +
 +	return 0;
 +}
 +
 +static void msi_bitmap_free(struct pci_pbm_info *pbm)
 +{
 +	kfree(pbm->msi_bitmap);
 +	pbm->msi_bitmap = NULL;
 +}
 +
 +static int msi_table_alloc(struct pci_pbm_info *pbm)
 +{
 +	int size, i;
 +
 +	size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie);
 +	pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL);
 +	if (!pbm->msiq_irq_cookies)
 +		return -ENOMEM;
 +
 +	for (i = 0; i < pbm->msiq_num; i++) {
 +		struct sparc64_msiq_cookie *p;
 +
 +		p = &pbm->msiq_irq_cookies[i];
 +		p->pbm = pbm;
 +		p->msiqid = pbm->msiq_first + i;
 +	}
 +
 +	size = pbm->msi_num * sizeof(unsigned int);
 +	pbm->msi_irq_table = kzalloc(size, GFP_KERNEL);
 +	if (!pbm->msi_irq_table) {
 +		kfree(pbm->msiq_irq_cookies);
 +		pbm->msiq_irq_cookies = NULL;
 +		return -ENOMEM;
 +	}
 +
 +	return 0;
 +}
 +
 +static void msi_table_free(struct pci_pbm_info *pbm)
 +{
 +	kfree(pbm->msiq_irq_cookies);
 +	pbm->msiq_irq_cookies = NULL;
 +
 +	kfree(pbm->msi_irq_table);
 +	pbm->msi_irq_table = NULL;
 +}
 +
 +static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 +				 const struct sparc64_msiq_ops *ops,
 +				 unsigned long msiqid,
 +				 unsigned long devino)
 +{
 +	int irq = ops->msiq_build_irq(pbm, msiqid, devino);
 +	int err, nid;
 +
 +	if (irq < 0)
 +		return irq;
 +
 +	nid = pbm->numa_node;
 +	if (nid != -1) {
 +		cpumask_t numa_mask = node_to_cpumask(nid);
 +
- 		irq_set_affinity(irq, numa_mask);
++		irq_set_affinity(irq, &numa_mask);
 +	}
 +	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 +			  "MSIQ",
 +			  &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
 +	if (err)
 +		return err;
 +
 +	return 0;
 +}
 +
 +static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm,
 +				      const struct sparc64_msiq_ops *ops)
 +{
 +	int i;
 +
 +	for (i = 0; i < pbm->msiq_num; i++) {
 +		unsigned long msiqid = i + pbm->msiq_first;
 +		unsigned long devino = i + pbm->msiq_first_devino;
 +		int err;
 +
 +		err = bringup_one_msi_queue(pbm, ops, msiqid, devino);
 +		if (err)
 +			return err;
 +	}
 +
 +	return 0;
 +}
 +
 +void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
 +			  const struct sparc64_msiq_ops *ops)
 +{
 +	const u32 *val;
 +	int len;
 +
 +	val = of_get_property(pbm->op->node, "#msi-eqs", &len);
 +	if (!val || len != 4)
 +		goto no_msi;
 +	pbm->msiq_num = *val;
 +	if (pbm->msiq_num) {
 +		const struct msiq_prop {
 +			u32 first_msiq;
 +			u32 num_msiq;
 +			u32 first_devino;
 +		} *mqp;
 +		const struct msi_range_prop {
 +			u32 first_msi;
 +			u32 num_msi;
 +		} *mrng;
 +		const struct addr_range_prop {
 +			u32 msi32_high;
 +			u32 msi32_low;
 +			u32 msi32_len;
 +			u32 msi64_high;
 +			u32 msi64_low;
 +			u32 msi64_len;
 +		} *arng;
 +
 +		val = of_get_property(pbm->op->node, "msi-eq-size", &len);
 +		if (!val || len != 4)
 +			goto no_msi;
 +
 +		pbm->msiq_ent_count = *val;
 +
 +		mqp = of_get_property(pbm->op->node,
 +				      "msi-eq-to-devino", &len);
 +		if (!mqp)
 +			mqp = of_get_property(pbm->op->node,
 +					      "msi-eq-devino", &len);
 +		if (!mqp || len != sizeof(struct msiq_prop))
 +			goto no_msi;
 +
 +		pbm->msiq_first = mqp->first_msiq;
 +		pbm->msiq_first_devino = mqp->first_devino;
 +
 +		val = of_get_property(pbm->op->node, "#msi", &len);
 +		if (!val || len != 4)
 +			goto no_msi;
 +		pbm->msi_num = *val;
 +
 +		mrng = of_get_property(pbm->op->node, "msi-ranges", &len);
 +		if (!mrng || len != sizeof(struct msi_range_prop))
 +			goto no_msi;
 +		pbm->msi_first = mrng->first_msi;
 +
 +		val = of_get_property(pbm->op->node, "msi-data-mask", &len);
 +		if (!val || len != 4)
 +			goto no_msi;
 +		pbm->msi_data_mask = *val;
 +
 +		val = of_get_property(pbm->op->node, "msix-data-width", &len);
 +		if (!val || len != 4)
 +			goto no_msi;
 +		pbm->msix_data_width = *val;
 +
 +		arng = of_get_property(pbm->op->node, "msi-address-ranges",
 +				       &len);
 +		if (!arng || len != sizeof(struct addr_range_prop))
 +			goto no_msi;
 +		pbm->msi32_start = ((u64)arng->msi32_high << 32) |
 +			(u64) arng->msi32_low;
 +		pbm->msi64_start = ((u64)arng->msi64_high << 32) |
 +			(u64) arng->msi64_low;
 +		pbm->msi32_len = arng->msi32_len;
 +		pbm->msi64_len = arng->msi64_len;
 +
 +		if (msi_bitmap_alloc(pbm))
 +			goto no_msi;
 +
 +		if (msi_table_alloc(pbm)) {
 +			msi_bitmap_free(pbm);
 +			goto no_msi;
 +		}
 +
 +		if (ops->msiq_alloc(pbm)) {
 +			msi_table_free(pbm);
 +			msi_bitmap_free(pbm);
 +			goto no_msi;
 +		}
 +
 +		if (sparc64_bringup_msi_queues(pbm, ops)) {
 +			ops->msiq_free(pbm);
 +			msi_table_free(pbm);
 +			msi_bitmap_free(pbm);
 +			goto no_msi;
 +		}
 +
 +		printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
 +		       "devino[0x%x]\n",
 +		       pbm->name,
 +		       pbm->msiq_first, pbm->msiq_num,
 +		       pbm->msiq_ent_count,
 +		       pbm->msiq_first_devino);
 +		printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
 +		       "width[%u]\n",
 +		       pbm->name,
 +		       pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
 +		       pbm->msix_data_width);
 +		printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
 +		       "addr64[0x%lx:0x%x]\n",
 +		       pbm->name,
 +		       pbm->msi32_start, pbm->msi32_len,
 +		       pbm->msi64_start, pbm->msi64_len);
 +		printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
 +		       pbm->name,
 +		       __pa(pbm->msi_queues));
 +
 +		pbm->msi_ops = ops;
 +		pbm->setup_msi_irq = sparc64_setup_msi_irq;
 +		pbm->teardown_msi_irq = sparc64_teardown_msi_irq;
 +	}
 +	return;
 +
 +no_msi:
 +	pbm->msiq_num = 0;
 +	printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
 +}
diff --cc arch/sparc/kernel/smp_32.c
index e396c1f17a92,000000000000..1e5ac4e282e1
mode 100644,000000..100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@@ -1,423 -1,0 +1,421 @@@
 +/* smp.c: Sparc SMP support.
 + *
 + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
 + * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 + * Copyright (C) 2004 Keith M Wesolowski (wesolows@foobazco.org)
 + */
 +
 +#include <asm/head.h>
 +
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
 +#include <linux/threads.h>
 +#include <linux/smp.h>
 +#include <linux/interrupt.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/init.h>
 +#include <linux/spinlock.h>
 +#include <linux/mm.h>
 +#include <linux/fs.h>
 +#include <linux/seq_file.h>
 +#include <linux/cache.h>
 +#include <linux/delay.h>
 +
 +#include <asm/ptrace.h>
 +#include <asm/atomic.h>
 +
 +#include <asm/irq.h>
 +#include <asm/page.h>
 +#include <asm/pgalloc.h>
 +#include <asm/pgtable.h>
 +#include <asm/oplib.h>
 +#include <asm/cacheflush.h>
 +#include <asm/tlbflush.h>
 +#include <asm/cpudata.h>
 +
 +#include "irq.h"
 +
 +volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
 +unsigned char boot_cpu_id = 0;
 +unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 +
- cpumask_t cpu_online_map = CPU_MASK_NONE;
- cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 +cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 +
 +/* The only guaranteed locking primitive available on all Sparc
 + * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
 + * places the current byte at the effective address into dest_reg and
 + * places 0xff there afterwards.  Pretty lame locking primitive
 + * compared to the Alpha and the Intel no?  Most Sparcs have 'swap'
 + * instruction which is much better...
 + */
 +
 +void __cpuinit smp_store_cpu_info(int id)
 +{
 +	int cpu_node;
 +
 +	cpu_data(id).udelay_val = loops_per_jiffy;
 +
 +	cpu_find_by_mid(id, &cpu_node);
 +	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
 +						     "clock-frequency", 0);
 +	cpu_data(id).prom_node = cpu_node;
 +	cpu_data(id).mid = cpu_get_hwmid(cpu_node);
 +
 +	if (cpu_data(id).mid < 0)
 +		panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
 +}
 +
 +void __init smp_cpus_done(unsigned int max_cpus)
 +{
 +	extern void smp4m_smp_done(void);
 +	extern void smp4d_smp_done(void);
 +	unsigned long bogosum = 0;
 +	int cpu, num;
 +
 +	for (cpu = 0, num = 0; cpu < NR_CPUS; cpu++)
 +		if (cpu_online(cpu)) {
 +			num++;
 +			bogosum += cpu_data(cpu).udelay_val;
 +		}
 +
 +	printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 +		num, bogosum/(500000/HZ),
 +		(bogosum/(5000/HZ))%100);
 +
 +	switch(sparc_cpu_model) {
 +	case sun4:
 +		printk("SUN4\n");
 +		BUG();
 +		break;
 +	case sun4c:
 +		printk("SUN4C\n");
 +		BUG();
 +		break;
 +	case sun4m:
 +		smp4m_smp_done();
 +		break;
 +	case sun4d:
 +		smp4d_smp_done();
 +		break;
 +	case sun4e:
 +		printk("SUN4E\n");
 +		BUG();
 +		break;
 +	case sun4u:
 +		printk("SUN4U\n");
 +		BUG();
 +		break;
 +	default:
 +		printk("UNKNOWN!\n");
 +		BUG();
 +		break;
 +	};
 +}
 +
 +void cpu_panic(void)
 +{
 +	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
 +	panic("SMP bolixed\n");
 +}
 +
 +struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 +
 +void smp_send_reschedule(int cpu)
 +{
 +	/* See sparc64 */
 +}
 +
 +void smp_send_stop(void)
 +{
 +}
 +
 +void smp_flush_cache_all(void)
 +{
 +	xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
 +	local_flush_cache_all();
 +}
 +
 +void smp_flush_tlb_all(void)
 +{
 +	xc0((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_all));
 +	local_flush_tlb_all();
 +}
 +
 +void smp_flush_cache_mm(struct mm_struct *mm)
 +{
 +	if(mm->context != NO_CONTEXT) {
 +		cpumask_t cpu_mask = mm->cpu_vm_mask;
 +		cpu_clear(smp_processor_id(), cpu_mask);
 +		if (!cpus_empty(cpu_mask))
 +			xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
 +		local_flush_cache_mm(mm);
 +	}
 +}
 +
 +void smp_flush_tlb_mm(struct mm_struct *mm)
 +{
 +	if(mm->context != NO_CONTEXT) {
 +		cpumask_t cpu_mask = mm->cpu_vm_mask;
 +		cpu_clear(smp_processor_id(), cpu_mask);
 +		if (!cpus_empty(cpu_mask)) {
 +			xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
 +			if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
 +				mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id());
 +		}
 +		local_flush_tlb_mm(mm);
 +	}
 +}
 +
 +void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 +			   unsigned long end)
 +{
 +	struct mm_struct *mm = vma->vm_mm;
 +
 +	if (mm->context != NO_CONTEXT) {
 +		cpumask_t cpu_mask = mm->cpu_vm_mask;
 +		cpu_clear(smp_processor_id(), cpu_mask);
 +		if (!cpus_empty(cpu_mask))
 +			xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
 +		local_flush_cache_range(vma, start, end);
 +	}
 +}
 +
 +void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 +			 unsigned long end)
 +{
 +	struct mm_struct *mm = vma->vm_mm;
 +
 +	if (mm->context != NO_CONTEXT) {
 +		cpumask_t cpu_mask = mm->cpu_vm_mask;
 +		cpu_clear(smp_processor_id(), cpu_mask);
 +		if (!cpus_empty(cpu_mask))
 +			xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
 +		local_flush_tlb_range(vma, start, end);
 +	}
 +}
 +
 +void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 +{
 +	struct mm_struct *mm = vma->vm_mm;
 +
 +	if(mm->context != NO_CONTEXT) {
 +		cpumask_t cpu_mask = mm->cpu_vm_mask;
 +		cpu_clear(smp_processor_id(), cpu_mask);
 +		if (!cpus_empty(cpu_mask))
 +			xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
 +		local_flush_cache_page(vma, page);
 +	}
 +}
 +
 +void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 +{
 +	struct mm_struct *mm = vma->vm_mm;
 +
 +	if(mm->context != NO_CONTEXT) {
 +		cpumask_t cpu_mask = mm->cpu_vm_mask;
 +		cpu_clear(smp_processor_id(), cpu_mask);
 +		if (!cpus_empty(cpu_mask))
 +			xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
 +		local_flush_tlb_page(vma, page);
 +	}
 +}
 +
 +void smp_reschedule_irq(void)
 +{
 +	set_need_resched();
 +}
 +
 +void smp_flush_page_to_ram(unsigned long page)
 +{
 +	/* Current theory is that those who call this are the one's
 +	 * who have just dirtied their cache with the pages contents
 +	 * in kernel space, therefore we only run this on local cpu.
 +	 *
 +	 * XXX This experiment failed, research further... -DaveM
 +	 */
 +#if 1
 +	xc1((smpfunc_t) BTFIXUP_CALL(local_flush_page_to_ram), page);
 +#endif
 +	local_flush_page_to_ram(page);
 +}
 +
 +void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 +{
 +	cpumask_t cpu_mask = mm->cpu_vm_mask;
 +	cpu_clear(smp_processor_id(), cpu_mask);
 +	if (!cpus_empty(cpu_mask))
 +		xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
 +	local_flush_sig_insns(mm, insn_addr);
 +}
 +
 +extern unsigned int lvl14_resolution;
 +
 +/* /proc/profile writes can call this, don't __init it please. */
 +static DEFINE_SPINLOCK(prof_setup_lock);
 +
 +int setup_profiling_timer(unsigned int multiplier)
 +{
 +	int i;
 +	unsigned long flags;
 +
 +	/* Prevent level14 ticker IRQ flooding. */
 +	if((!multiplier) || (lvl14_resolution / multiplier) < 500)
 +		return -EINVAL;
 +
 +	spin_lock_irqsave(&prof_setup_lock, flags);
 +	for_each_possible_cpu(i) {
 +		load_profile_irq(i, lvl14_resolution / multiplier);
 +		prof_multiplier(i) = multiplier;
 +	}
 +	spin_unlock_irqrestore(&prof_setup_lock, flags);
 +
 +	return 0;
 +}
 +
 +void __init smp_prepare_cpus(unsigned int max_cpus)
 +{
 +	extern void __init smp4m_boot_cpus(void);
 +	extern void __init smp4d_boot_cpus(void);
 +	int i, cpuid, extra;
 +
 +	printk("Entering SMP Mode...\n");
 +
 +	extra = 0;
 +	for (i = 0; !cpu_find_by_instance(i, NULL, &cpuid); i++) {
 +		if (cpuid >= NR_CPUS)
 +			extra++;
 +	}
 +	/* i = number of cpus */
 +	if (extra && max_cpus > i - extra)
 +		printk("Warning: NR_CPUS is too low to start all cpus\n");
 +
 +	smp_store_cpu_info(boot_cpu_id);
 +
 +	switch(sparc_cpu_model) {
 +	case sun4:
 +		printk("SUN4\n");
 +		BUG();
 +		break;
 +	case sun4c:
 +		printk("SUN4C\n");
 +		BUG();
 +		break;
 +	case sun4m:
 +		smp4m_boot_cpus();
 +		break;
 +	case sun4d:
 +		smp4d_boot_cpus();
 +		break;
 +	case sun4e:
 +		printk("SUN4E\n");
 +		BUG();
 +		break;
 +	case sun4u:
 +		printk("SUN4U\n");
 +		BUG();
 +		break;
 +	default:
 +		printk("UNKNOWN!\n");
 +		BUG();
 +		break;
 +	};
 +}
 +
 +/* Set this up early so that things like the scheduler can init
 + * properly.  We use the same cpu mask for both the present and
 + * possible cpu map.
 + */
 +void __init smp_setup_cpu_possible_map(void)
 +{
 +	int instance, mid;
 +
 +	instance = 0;
 +	while (!cpu_find_by_instance(instance, NULL, &mid)) {
 +		if (mid < NR_CPUS) {
- 			cpu_set(mid, phys_cpu_present_map);
++			cpu_set(mid, cpu_possible_map);
 +			cpu_set(mid, cpu_present_map);
 +		}
 +		instance++;
 +	}
 +}
 +
 +void __init smp_prepare_boot_cpu(void)
 +{
 +	int cpuid = hard_smp_processor_id();
 +
 +	if (cpuid >= NR_CPUS) {
 +		prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
 +		prom_halt();
 +	}
 +	if (cpuid != 0)
 +		printk("boot cpu id != 0, this could work but is untested\n");
 +
 +	current_thread_info()->cpu = cpuid;
 +	cpu_set(cpuid, cpu_online_map);
- 	cpu_set(cpuid, phys_cpu_present_map);
++	cpu_set(cpuid, cpu_possible_map);
 +}
 +
 +int __cpuinit __cpu_up(unsigned int cpu)
 +{
 +	extern int __cpuinit smp4m_boot_one_cpu(int);
 +	extern int __cpuinit smp4d_boot_one_cpu(int);
 +	int ret=0;
 +
 +	switch(sparc_cpu_model) {
 +	case sun4:
 +		printk("SUN4\n");
 +		BUG();
 +		break;
 +	case sun4c:
 +		printk("SUN4C\n");
 +		BUG();
 +		break;
 +	case sun4m:
 +		ret = smp4m_boot_one_cpu(cpu);
 +		break;
 +	case sun4d:
 +		ret = smp4d_boot_one_cpu(cpu);
 +		break;
 +	case sun4e:
 +		printk("SUN4E\n");
 +		BUG();
 +		break;
 +	case sun4u:
 +		printk("SUN4U\n");
 +		BUG();
 +		break;
 +	default:
 +		printk("UNKNOWN!\n");
 +		BUG();
 +		break;
 +	};
 +
 +	if (!ret) {
 +		cpu_set(cpu, smp_commenced_mask);
 +		while (!cpu_online(cpu))
 +			mb();
 +	}
 +	return ret;
 +}
 +
 +void smp_bogo(struct seq_file *m)
 +{
 +	int i;
 +	
 +	for_each_online_cpu(i) {
 +		seq_printf(m,
 +			   "Cpu%dBogo\t: %lu.%02lu\n",
 +			   i,
 +			   cpu_data(i).udelay_val/(500000/HZ),
 +			   (cpu_data(i).udelay_val/(5000/HZ))%100);
 +	}
 +}
 +
 +void smp_info(struct seq_file *m)
 +{
 +	int i;
 +
 +	seq_printf(m, "State:\n");
 +	for_each_online_cpu(i)
 +		seq_printf(m, "CPU%d\t\t: online\n", i);
 +}
diff --cc arch/sparc/kernel/smp_64.c
index bfe99d82d458,000000000000..46329799f346
mode 100644,000000..100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@@ -1,1412 -1,0 +1,1408 @@@
 +/* smp.c: Sparc64 SMP support.
 + *
 + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
 + */
 +
 +#include <linux/module.h>
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
 +#include <linux/mm.h>
 +#include <linux/pagemap.h>
 +#include <linux/threads.h>
 +#include <linux/smp.h>
 +#include <linux/interrupt.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/delay.h>
 +#include <linux/init.h>
 +#include <linux/spinlock.h>
 +#include <linux/fs.h>
 +#include <linux/seq_file.h>
 +#include <linux/cache.h>
 +#include <linux/jiffies.h>
 +#include <linux/profile.h>
 +#include <linux/lmb.h>
 +#include <linux/cpu.h>
 +
 +#include <asm/head.h>
 +#include <asm/ptrace.h>
 +#include <asm/atomic.h>
 +#include <asm/tlbflush.h>
 +#include <asm/mmu_context.h>
 +#include <asm/cpudata.h>
 +#include <asm/hvtramp.h>
 +#include <asm/io.h>
 +#include <asm/timer.h>
 +
 +#include <asm/irq.h>
 +#include <asm/irq_regs.h>
 +#include <asm/page.h>
 +#include <asm/pgtable.h>
 +#include <asm/oplib.h>
 +#include <asm/uaccess.h>
 +#include <asm/starfire.h>
 +#include <asm/tlb.h>
 +#include <asm/sections.h>
 +#include <asm/prom.h>
 +#include <asm/mdesc.h>
 +#include <asm/ldc.h>
 +#include <asm/hypervisor.h>
 +
 +int sparc64_multi_core __read_mostly;
 +
- cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
- cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
 +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 +cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 +	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 +
- EXPORT_SYMBOL(cpu_possible_map);
- EXPORT_SYMBOL(cpu_online_map);
 +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 +EXPORT_SYMBOL(cpu_core_map);
 +
 +static cpumask_t smp_commenced_mask;
 +
 +void smp_info(struct seq_file *m)
 +{
 +	int i;
 +	
 +	seq_printf(m, "State:\n");
 +	for_each_online_cpu(i)
 +		seq_printf(m, "CPU%d:\t\tonline\n", i);
 +}
 +
 +void smp_bogo(struct seq_file *m)
 +{
 +	int i;
 +	
 +	for_each_online_cpu(i)
 +		seq_printf(m,
 +			   "Cpu%dClkTck\t: %016lx\n",
 +			   i, cpu_data(i).clock_tick);
 +}
 +
 +extern void setup_sparc64_timer(void);
 +
 +static volatile unsigned long callin_flag = 0;
 +
 +void __cpuinit smp_callin(void)
 +{
 +	int cpuid = hard_smp_processor_id();
 +
 +	__local_per_cpu_offset = __per_cpu_offset(cpuid);
 +
 +	if (tlb_type == hypervisor)
 +		sun4v_ktsb_register();
 +
 +	__flush_tlb_all();
 +
 +	setup_sparc64_timer();
 +
 +	if (cheetah_pcache_forced_on)
 +		cheetah_enable_pcache();
 +
 +	local_irq_enable();
 +
 +	callin_flag = 1;
 +	__asm__ __volatile__("membar #Sync\n\t"
 +			     "flush  %%g6" : : : "memory");
 +
 +	/* Clear this or we will die instantly when we
 +	 * schedule back to this idler...
 +	 */
 +	current_thread_info()->new_child = 0;
 +
 +	/* Attach to the address space of init_task. */
 +	atomic_inc(&init_mm.mm_count);
 +	current->active_mm = &init_mm;
 +
 +	/* inform the notifiers about the new cpu */
 +	notify_cpu_starting(cpuid);
 +
 +	while (!cpu_isset(cpuid, smp_commenced_mask))
 +		rmb();
 +
 +	ipi_call_lock();
 +	cpu_set(cpuid, cpu_online_map);
 +	ipi_call_unlock();
 +
 +	/* idle thread is expected to have preempt disabled */
 +	preempt_disable();
 +}
 +
 +void cpu_panic(void)
 +{
 +	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
 +	panic("SMP bolixed\n");
 +}
 +
 +/* This tick register synchronization scheme is taken entirely from
 + * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
 + *
 + * The only change I've made is to rework it so that the master
 + * initiates the synchonization instead of the slave. -DaveM
 + */
 +
 +#define MASTER	0
 +#define SLAVE	(SMP_CACHE_BYTES/sizeof(unsigned long))
 +
 +#define NUM_ROUNDS	64	/* magic value */
 +#define NUM_ITERS	5	/* likewise */
 +
 +static DEFINE_SPINLOCK(itc_sync_lock);
 +static unsigned long go[SLAVE + 1];
 +
 +#define DEBUG_TICK_SYNC	0
 +
 +static inline long get_delta (long *rt, long *master)
 +{
 +	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
 +	unsigned long tcenter, t0, t1, tm;
 +	unsigned long i;
 +
 +	for (i = 0; i < NUM_ITERS; i++) {
 +		t0 = tick_ops->get_tick();
 +		go[MASTER] = 1;
 +		membar_safe("#StoreLoad");
 +		while (!(tm = go[SLAVE]))
 +			rmb();
 +		go[SLAVE] = 0;
 +		wmb();
 +		t1 = tick_ops->get_tick();
 +
 +		if (t1 - t0 < best_t1 - best_t0)
 +			best_t0 = t0, best_t1 = t1, best_tm = tm;
 +	}
 +
 +	*rt = best_t1 - best_t0;
 +	*master = best_tm - best_t0;
 +
 +	/* average best_t0 and best_t1 without overflow: */
 +	tcenter = (best_t0/2 + best_t1/2);
 +	if (best_t0 % 2 + best_t1 % 2 == 2)
 +		tcenter++;
 +	return tcenter - best_tm;
 +}
 +
 +void smp_synchronize_tick_client(void)
 +{
 +	long i, delta, adj, adjust_latency = 0, done = 0;
 +	unsigned long flags, rt, master_time_stamp, bound;
 +#if DEBUG_TICK_SYNC
 +	struct {
 +		long rt;	/* roundtrip time */
 +		long master;	/* master's timestamp */
 +		long diff;	/* difference between midpoint and master's timestamp */
 +		long lat;	/* estimate of itc adjustment latency */
 +	} t[NUM_ROUNDS];
 +#endif
 +
 +	go[MASTER] = 1;
 +
 +	while (go[MASTER])
 +		rmb();
 +
 +	local_irq_save(flags);
 +	{
 +		for (i = 0; i < NUM_ROUNDS; i++) {
 +			delta = get_delta(&rt, &master_time_stamp);
 +			if (delta == 0) {
 +				done = 1;	/* let's lock on to this... */
 +				bound = rt;
 +			}
 +
 +			if (!done) {
 +				if (i > 0) {
 +					adjust_latency += -delta;
 +					adj = -delta + adjust_latency/4;
 +				} else
 +					adj = -delta;
 +
 +				tick_ops->add_tick(adj);
 +			}
 +#if DEBUG_TICK_SYNC
 +			t[i].rt = rt;
 +			t[i].master = master_time_stamp;
 +			t[i].diff = delta;
 +			t[i].lat = adjust_latency/4;
 +#endif
 +		}
 +	}
 +	local_irq_restore(flags);
 +
 +#if DEBUG_TICK_SYNC
 +	for (i = 0; i < NUM_ROUNDS; i++)
 +		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
 +		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
 +#endif
 +
 +	printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
 +	       "(last diff %ld cycles, maxerr %lu cycles)\n",
 +	       smp_processor_id(), delta, rt);
 +}
 +
 +static void smp_start_sync_tick_client(int cpu);
 +
 +static void smp_synchronize_one_tick(int cpu)
 +{
 +	unsigned long flags, i;
 +
 +	go[MASTER] = 0;
 +
 +	smp_start_sync_tick_client(cpu);
 +
 +	/* wait for client to be ready */
 +	while (!go[MASTER])
 +		rmb();
 +
 +	/* now let the client proceed into his loop */
 +	go[MASTER] = 0;
 +	membar_safe("#StoreLoad");
 +
 +	spin_lock_irqsave(&itc_sync_lock, flags);
 +	{
 +		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
 +			while (!go[MASTER])
 +				rmb();
 +			go[MASTER] = 0;
 +			wmb();
 +			go[SLAVE] = tick_ops->get_tick();
 +			membar_safe("#StoreLoad");
 +		}
 +	}
 +	spin_unlock_irqrestore(&itc_sync_lock, flags);
 +}
 +
 +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 +/* XXX Put this in some common place. XXX */
 +static unsigned long kimage_addr_to_ra(void *p)
 +{
 +	unsigned long val = (unsigned long) p;
 +
 +	return kern_base + (val - KERNBASE);
 +}
 +
 +static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
 +{
 +	extern unsigned long sparc64_ttable_tl0;
 +	extern unsigned long kern_locked_tte_data;
 +	struct hvtramp_descr *hdesc;
 +	unsigned long trampoline_ra;
 +	struct trap_per_cpu *tb;
 +	u64 tte_vaddr, tte_data;
 +	unsigned long hv_err;
 +	int i;
 +
 +	hdesc = kzalloc(sizeof(*hdesc) +
 +			(sizeof(struct hvtramp_mapping) *
 +			 num_kernel_image_mappings - 1),
 +			GFP_KERNEL);
 +	if (!hdesc) {
 +		printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
 +		       "hvtramp_descr.\n");
 +		return;
 +	}
 +
 +	hdesc->cpu = cpu;
 +	hdesc->num_mappings = num_kernel_image_mappings;
 +
 +	tb = &trap_block[cpu];
 +	tb->hdesc = hdesc;
 +
 +	hdesc->fault_info_va = (unsigned long) &tb->fault_info;
 +	hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
 +
 +	hdesc->thread_reg = thread_reg;
 +
 +	tte_vaddr = (unsigned long) KERNBASE;
 +	tte_data = kern_locked_tte_data;
 +
 +	for (i = 0; i < hdesc->num_mappings; i++) {
 +		hdesc->maps[i].vaddr = tte_vaddr;
 +		hdesc->maps[i].tte   = tte_data;
 +		tte_vaddr += 0x400000;
 +		tte_data  += 0x400000;
 +	}
 +
 +	trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
 +
 +	hv_err = sun4v_cpu_start(cpu, trampoline_ra,
 +				 kimage_addr_to_ra(&sparc64_ttable_tl0),
 +				 __pa(hdesc));
 +	if (hv_err)
 +		printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
 +		       "gives error %lu\n", hv_err);
 +}
 +#endif
 +
 +extern unsigned long sparc64_cpu_startup;
 +
 +/* The OBP cpu startup callback truncates the 3rd arg cookie to
 + * 32-bits (I think) so to be safe we have it read the pointer
 + * contained here so we work on >4GB machines. -DaveM
 + */
 +static struct thread_info *cpu_new_thread = NULL;
 +
 +static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 +{
 +	struct trap_per_cpu *tb = &trap_block[cpu];
 +	unsigned long entry =
 +		(unsigned long)(&sparc64_cpu_startup);
 +	unsigned long cookie =
 +		(unsigned long)(&cpu_new_thread);
 +	struct task_struct *p;
 +	int timeout, ret;
 +
 +	p = fork_idle(cpu);
 +	if (IS_ERR(p))
 +		return PTR_ERR(p);
 +	callin_flag = 0;
 +	cpu_new_thread = task_thread_info(p);
 +
 +	if (tlb_type == hypervisor) {
 +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 +		if (ldom_domaining_enabled)
 +			ldom_startcpu_cpuid(cpu,
 +					    (unsigned long) cpu_new_thread);
 +		else
 +#endif
 +			prom_startcpu_cpuid(cpu, entry, cookie);
 +	} else {
 +		struct device_node *dp = of_find_node_by_cpuid(cpu);
 +
 +		prom_startcpu(dp->node, entry, cookie);
 +	}
 +
 +	for (timeout = 0; timeout < 50000; timeout++) {
 +		if (callin_flag)
 +			break;
 +		udelay(100);
 +	}
 +
 +	if (callin_flag) {
 +		ret = 0;
 +	} else {
 +		printk("Processor %d is stuck.\n", cpu);
 +		ret = -ENODEV;
 +	}
 +	cpu_new_thread = NULL;
 +
 +	if (tb->hdesc) {
 +		kfree(tb->hdesc);
 +		tb->hdesc = NULL;
 +	}
 +
 +	return ret;
 +}
 +
 +static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
 +{
 +	u64 result, target;
 +	int stuck, tmp;
 +
 +	if (this_is_starfire) {
 +		/* map to real upaid */
 +		cpu = (((cpu & 0x3c) << 1) |
 +			((cpu & 0x40) >> 4) |
 +			(cpu & 0x3));
 +	}
 +
 +	target = (cpu << 14) | 0x70;
 +again:
 +	/* Ok, this is the real Spitfire Errata #54.
 +	 * One must read back from a UDB internal register
 +	 * after writes to the UDB interrupt dispatch, but
 +	 * before the membar Sync for that write.
 +	 * So we use the high UDB control register (ASI 0x7f,
 +	 * ADDR 0x20) for the dummy read. -DaveM
 +	 */
 +	tmp = 0x40;
 +	__asm__ __volatile__(
 +	"wrpr	%1, %2, %%pstate\n\t"
 +	"stxa	%4, [%0] %3\n\t"
 +	"stxa	%5, [%0+%8] %3\n\t"
 +	"add	%0, %8, %0\n\t"
 +	"stxa	%6, [%0+%8] %3\n\t"
 +	"membar	#Sync\n\t"
 +	"stxa	%%g0, [%7] %3\n\t"
 +	"membar	#Sync\n\t"
 +	"mov	0x20, %%g1\n\t"
 +	"ldxa	[%%g1] 0x7f, %%g0\n\t"
 +	"membar	#Sync"
 +	: "=r" (tmp)
 +	: "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
 +	  "r" (data0), "r" (data1), "r" (data2), "r" (target),
 +	  "r" (0x10), "0" (tmp)
 +        : "g1");
 +
 +	/* NOTE: PSTATE_IE is still clear. */
 +	stuck = 100000;
 +	do {
 +		__asm__ __volatile__("ldxa [%%g0] %1, %0"
 +			: "=r" (result)
 +			: "i" (ASI_INTR_DISPATCH_STAT));
 +		if (result == 0) {
 +			__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +					     : : "r" (pstate));
 +			return;
 +		}
 +		stuck -= 1;
 +		if (stuck == 0)
 +			break;
 +	} while (result & 0x1);
 +	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +			     : : "r" (pstate));
 +	if (stuck == 0) {
 +		printk("CPU[%d]: mondo stuckage result[%016lx]\n",
 +		       smp_processor_id(), result);
 +	} else {
 +		udelay(2);
 +		goto again;
 +	}
 +}
 +
 +static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 +{
 +	u64 *mondo, data0, data1, data2;
 +	u16 *cpu_list;
 +	u64 pstate;
 +	int i;
 +
 +	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 +	cpu_list = __va(tb->cpu_list_pa);
 +	mondo = __va(tb->cpu_mondo_block_pa);
 +	data0 = mondo[0];
 +	data1 = mondo[1];
 +	data2 = mondo[2];
 +	for (i = 0; i < cnt; i++)
 +		spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
 +}
 +
 +/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
 + * packet, but we have no use for that.  However we do take advantage of
 + * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
 + */
 +static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 +{
 +	int nack_busy_id, is_jbus, need_more;
 +	u64 *mondo, pstate, ver, busy_mask;
 +	u16 *cpu_list;
 +
 +	cpu_list = __va(tb->cpu_list_pa);
 +	mondo = __va(tb->cpu_mondo_block_pa);
 +
 +	/* Unfortunately, someone at Sun had the brilliant idea to make the
 +	 * busy/nack fields hard-coded by ITID number for this Ultra-III
 +	 * derivative processor.
 +	 */
 +	__asm__ ("rdpr %%ver, %0" : "=r" (ver));
 +	is_jbus = ((ver >> 32) == __JALAPENO_ID ||
 +		   (ver >> 32) == __SERRANO_ID);
 +
 +	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 +
 +retry:
 +	need_more = 0;
 +	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
 +			     : : "r" (pstate), "i" (PSTATE_IE));
 +
 +	/* Setup the dispatch data registers. */
 +	__asm__ __volatile__("stxa	%0, [%3] %6\n\t"
 +			     "stxa	%1, [%4] %6\n\t"
 +			     "stxa	%2, [%5] %6\n\t"
 +			     "membar	#Sync\n\t"
 +			     : /* no outputs */
 +			     : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
 +			       "r" (0x40), "r" (0x50), "r" (0x60),
 +			       "i" (ASI_INTR_W));
 +
 +	nack_busy_id = 0;
 +	busy_mask = 0;
 +	{
 +		int i;
 +
 +		for (i = 0; i < cnt; i++) {
 +			u64 target, nr;
 +
 +			nr = cpu_list[i];
 +			if (nr == 0xffff)
 +				continue;
 +
 +			target = (nr << 14) | 0x70;
 +			if (is_jbus) {
 +				busy_mask |= (0x1UL << (nr * 2));
 +			} else {
 +				target |= (nack_busy_id << 24);
 +				busy_mask |= (0x1UL <<
 +					      (nack_busy_id * 2));
 +			}
 +			__asm__ __volatile__(
 +				"stxa	%%g0, [%0] %1\n\t"
 +				"membar	#Sync\n\t"
 +				: /* no outputs */
 +				: "r" (target), "i" (ASI_INTR_W));
 +			nack_busy_id++;
 +			if (nack_busy_id == 32) {
 +				need_more = 1;
 +				break;
 +			}
 +		}
 +	}
 +
 +	/* Now, poll for completion. */
 +	{
 +		u64 dispatch_stat, nack_mask;
 +		long stuck;
 +
 +		stuck = 100000 * nack_busy_id;
 +		nack_mask = busy_mask << 1;
 +		do {
 +			__asm__ __volatile__("ldxa	[%%g0] %1, %0"
 +					     : "=r" (dispatch_stat)
 +					     : "i" (ASI_INTR_DISPATCH_STAT));
 +			if (!(dispatch_stat & (busy_mask | nack_mask))) {
 +				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +						     : : "r" (pstate));
 +				if (unlikely(need_more)) {
 +					int i, this_cnt = 0;
 +					for (i = 0; i < cnt; i++) {
 +						if (cpu_list[i] == 0xffff)
 +							continue;
 +						cpu_list[i] = 0xffff;
 +						this_cnt++;
 +						if (this_cnt == 32)
 +							break;
 +					}
 +					goto retry;
 +				}
 +				return;
 +			}
 +			if (!--stuck)
 +				break;
 +		} while (dispatch_stat & busy_mask);
 +
 +		__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +				     : : "r" (pstate));
 +
 +		if (dispatch_stat & busy_mask) {
 +			/* Busy bits will not clear, continue instead
 +			 * of freezing up on this cpu.
 +			 */
 +			printk("CPU[%d]: mondo stuckage result[%016lx]\n",
 +			       smp_processor_id(), dispatch_stat);
 +		} else {
 +			int i, this_busy_nack = 0;
 +
 +			/* Delay some random time with interrupts enabled
 +			 * to prevent deadlock.
 +			 */
 +			udelay(2 * nack_busy_id);
 +
 +			/* Clear out the mask bits for cpus which did not
 +			 * NACK us.
 +			 */
 +			for (i = 0; i < cnt; i++) {
 +				u64 check_mask, nr;
 +
 +				nr = cpu_list[i];
 +				if (nr == 0xffff)
 +					continue;
 +
 +				if (is_jbus)
 +					check_mask = (0x2UL << (2*nr));
 +				else
 +					check_mask = (0x2UL <<
 +						      this_busy_nack);
 +				if ((dispatch_stat & check_mask) == 0)
 +					cpu_list[i] = 0xffff;
 +				this_busy_nack += 2;
 +				if (this_busy_nack == 64)
 +					break;
 +			}
 +
 +			goto retry;
 +		}
 +	}
 +}
 +
 +/* Multi-cpu list version.  */
 +static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 +{
 +	int retries, this_cpu, prev_sent, i, saw_cpu_error;
 +	unsigned long status;
 +	u16 *cpu_list;
 +
 +	this_cpu = smp_processor_id();
 +
 +	cpu_list = __va(tb->cpu_list_pa);
 +
 +	saw_cpu_error = 0;
 +	retries = 0;
 +	prev_sent = 0;
 +	do {
 +		int forward_progress, n_sent;
 +
 +		status = sun4v_cpu_mondo_send(cnt,
 +					      tb->cpu_list_pa,
 +					      tb->cpu_mondo_block_pa);
 +
 +		/* HV_EOK means all cpus received the xcall, we're done.  */
 +		if (likely(status == HV_EOK))
 +			break;
 +
 +		/* First, see if we made any forward progress.
 +		 *
 +		 * The hypervisor indicates successful sends by setting
 +		 * cpu list entries to the value 0xffff.
 +		 */
 +		n_sent = 0;
 +		for (i = 0; i < cnt; i++) {
 +			if (likely(cpu_list[i] == 0xffff))
 +				n_sent++;
 +		}
 +
 +		forward_progress = 0;
 +		if (n_sent > prev_sent)
 +			forward_progress = 1;
 +
 +		prev_sent = n_sent;
 +
 +		/* If we get a HV_ECPUERROR, then one or more of the cpus
 +		 * in the list are in error state.  Use the cpu_state()
 +		 * hypervisor call to find out which cpus are in error state.
 +		 */
 +		if (unlikely(status == HV_ECPUERROR)) {
 +			for (i = 0; i < cnt; i++) {
 +				long err;
 +				u16 cpu;
 +
 +				cpu = cpu_list[i];
 +				if (cpu == 0xffff)
 +					continue;
 +
 +				err = sun4v_cpu_state(cpu);
 +				if (err == HV_CPU_STATE_ERROR) {
 +					saw_cpu_error = (cpu + 1);
 +					cpu_list[i] = 0xffff;
 +				}
 +			}
 +		} else if (unlikely(status != HV_EWOULDBLOCK))
 +			goto fatal_mondo_error;
 +
 +		/* Don't bother rewriting the CPU list, just leave the
 +		 * 0xffff and non-0xffff entries in there and the
 +		 * hypervisor will do the right thing.
 +		 *
 +		 * Only advance timeout state if we didn't make any
 +		 * forward progress.
 +		 */
 +		if (unlikely(!forward_progress)) {
 +			if (unlikely(++retries > 10000))
 +				goto fatal_mondo_timeout;
 +
 +			/* Delay a little bit to let other cpus catch up
 +			 * on their cpu mondo queue work.
 +			 */
 +			udelay(2 * cnt);
 +		}
 +	} while (1);
 +
 +	if (unlikely(saw_cpu_error))
 +		goto fatal_mondo_cpu_error;
 +
 +	return;
 +
 +fatal_mondo_cpu_error:
 +	printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
 +	       "(including %d) were in error state\n",
 +	       this_cpu, saw_cpu_error - 1);
 +	return;
 +
 +fatal_mondo_timeout:
 +	printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
 +	       " progress after %d retries.\n",
 +	       this_cpu, retries);
 +	goto dump_cpu_list_and_out;
 +
 +fatal_mondo_error:
 +	printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
 +	       this_cpu, status);
 +	printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
 +	       "mondo_block_pa(%lx)\n",
 +	       this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
 +
 +dump_cpu_list_and_out:
 +	printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
 +	for (i = 0; i < cnt; i++)
 +		printk("%u ", cpu_list[i]);
 +	printk("]\n");
 +}
 +
 +static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
 +
 +static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
 +{
 +	struct trap_per_cpu *tb;
 +	int this_cpu, i, cnt;
 +	unsigned long flags;
 +	u16 *cpu_list;
 +	u64 *mondo;
 +
 +	/* We have to do this whole thing with interrupts fully disabled.
 +	 * Otherwise if we send an xcall from interrupt context it will
 +	 * corrupt both our mondo block and cpu list state.
 +	 *
 +	 * One consequence of this is that we cannot use timeout mechanisms
 +	 * that depend upon interrupts being delivered locally.  So, for
 +	 * example, we cannot sample jiffies and expect it to advance.
 +	 *
 +	 * Fortunately, udelay() uses %stick/%tick so we can use that.
 +	 */
 +	local_irq_save(flags);
 +
 +	this_cpu = smp_processor_id();
 +	tb = &trap_block[this_cpu];
 +
 +	mondo = __va(tb->cpu_mondo_block_pa);
 +	mondo[0] = data0;
 +	mondo[1] = data1;
 +	mondo[2] = data2;
 +	wmb();
 +
 +	cpu_list = __va(tb->cpu_list_pa);
 +
 +	/* Setup the initial cpu list.  */
 +	cnt = 0;
 +	for_each_cpu(i, mask) {
 +		if (i == this_cpu || !cpu_online(i))
 +			continue;
 +		cpu_list[cnt++] = i;
 +	}
 +
 +	if (cnt)
 +		xcall_deliver_impl(tb, cnt);
 +
 +	local_irq_restore(flags);
 +}
 +
 +/* Send cross call to all processors mentioned in MASK_P
 + * except self.  Really, there are only two cases currently,
 + * "&cpu_online_map" and "&mm->cpu_vm_mask".
 + */
 +static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
 +{
 +	u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
 +
 +	xcall_deliver(data0, data1, data2, mask);
 +}
 +
 +/* Send cross call to all processors except self. */
 +static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
 +{
 +	smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
 +}
 +
 +extern unsigned long xcall_sync_tick;
 +
 +static void smp_start_sync_tick_client(int cpu)
 +{
 +	xcall_deliver((u64) &xcall_sync_tick, 0, 0,
 +		      &cpumask_of_cpu(cpu));
 +}
 +
 +extern unsigned long xcall_call_function;
 +
 +void arch_send_call_function_ipi(cpumask_t mask)
 +{
 +	xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
 +}
 +
 +extern unsigned long xcall_call_function_single;
 +
 +void arch_send_call_function_single_ipi(int cpu)
 +{
 +	xcall_deliver((u64) &xcall_call_function_single, 0, 0,
 +		      &cpumask_of_cpu(cpu));
 +}
 +
 +void smp_call_function_client(int irq, struct pt_regs *regs)
 +{
 +	clear_softint(1 << irq);
 +	generic_smp_call_function_interrupt();
 +}
 +
 +void smp_call_function_single_client(int irq, struct pt_regs *regs)
 +{
 +	clear_softint(1 << irq);
 +	generic_smp_call_function_single_interrupt();
 +}
 +
 +static void tsb_sync(void *info)
 +{
 +	struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
 +	struct mm_struct *mm = info;
 +
 +	/* It is not valid to test "currrent->active_mm == mm" here.
 +	 *
 +	 * The value of "current" is not changed atomically with
 +	 * switch_mm().  But that's OK, we just need to check the
 +	 * current cpu's trap block PGD physical address.
 +	 */
 +	if (tp->pgd_paddr == __pa(mm->pgd))
 +		tsb_context_switch(mm);
 +}
 +
 +void smp_tsb_sync(struct mm_struct *mm)
 +{
 +	smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
 +}
 +
 +extern unsigned long xcall_flush_tlb_mm;
 +extern unsigned long xcall_flush_tlb_pending;
 +extern unsigned long xcall_flush_tlb_kernel_range;
 +extern unsigned long xcall_fetch_glob_regs;
 +extern unsigned long xcall_receive_signal;
 +extern unsigned long xcall_new_mmu_context_version;
 +#ifdef CONFIG_KGDB
 +extern unsigned long xcall_kgdb_capture;
 +#endif
 +
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +extern unsigned long xcall_flush_dcache_page_cheetah;
 +#endif
 +extern unsigned long xcall_flush_dcache_page_spitfire;
 +
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +extern atomic_t dcpage_flushes;
 +extern atomic_t dcpage_flushes_xcall;
 +#endif
 +
 +static inline void __local_flush_dcache_page(struct page *page)
 +{
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +	__flush_dcache_page(page_address(page),
 +			    ((tlb_type == spitfire) &&
 +			     page_mapping(page) != NULL));
 +#else
 +	if (page_mapping(page) != NULL &&
 +	    tlb_type == spitfire)
 +		__flush_icache_page(__pa(page_address(page)));
 +#endif
 +}
 +
 +void smp_flush_dcache_page_impl(struct page *page, int cpu)
 +{
 +	int this_cpu;
 +
 +	if (tlb_type == hypervisor)
 +		return;
 +
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +	atomic_inc(&dcpage_flushes);
 +#endif
 +
 +	this_cpu = get_cpu();
 +
 +	if (cpu == this_cpu) {
 +		__local_flush_dcache_page(page);
 +	} else if (cpu_online(cpu)) {
 +		void *pg_addr = page_address(page);
 +		u64 data0 = 0;
 +
 +		if (tlb_type == spitfire) {
 +			data0 = ((u64)&xcall_flush_dcache_page_spitfire);
 +			if (page_mapping(page) != NULL)
 +				data0 |= ((u64)1 << 32);
 +		} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +			data0 =	((u64)&xcall_flush_dcache_page_cheetah);
 +#endif
 +		}
 +		if (data0) {
 +			xcall_deliver(data0, __pa(pg_addr),
 +				      (u64) pg_addr, &cpumask_of_cpu(cpu));
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +			atomic_inc(&dcpage_flushes_xcall);
 +#endif
 +		}
 +	}
 +
 +	put_cpu();
 +}
 +
 +void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 +{
 +	void *pg_addr;
 +	int this_cpu;
 +	u64 data0;
 +
 +	if (tlb_type == hypervisor)
 +		return;
 +
 +	this_cpu = get_cpu();
 +
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +	atomic_inc(&dcpage_flushes);
 +#endif
 +	data0 = 0;
 +	pg_addr = page_address(page);
 +	if (tlb_type == spitfire) {
 +		data0 = ((u64)&xcall_flush_dcache_page_spitfire);
 +		if (page_mapping(page) != NULL)
 +			data0 |= ((u64)1 << 32);
 +	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +		data0 = ((u64)&xcall_flush_dcache_page_cheetah);
 +#endif
 +	}
 +	if (data0) {
 +		xcall_deliver(data0, __pa(pg_addr),
 +			      (u64) pg_addr, &cpu_online_map);
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +		atomic_inc(&dcpage_flushes_xcall);
 +#endif
 +	}
 +	__local_flush_dcache_page(page);
 +
 +	put_cpu();
 +}
 +
 +void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
 +{
 +	struct mm_struct *mm;
 +	unsigned long flags;
 +
 +	clear_softint(1 << irq);
 +
 +	/* See if we need to allocate a new TLB context because
 +	 * the version of the one we are using is now out of date.
 +	 */
 +	mm = current->active_mm;
 +	if (unlikely(!mm || (mm == &init_mm)))
 +		return;
 +
 +	spin_lock_irqsave(&mm->context.lock, flags);
 +
 +	if (unlikely(!CTX_VALID(mm->context)))
 +		get_new_mmu_context(mm);
 +
 +	spin_unlock_irqrestore(&mm->context.lock, flags);
 +
 +	load_secondary_context(mm);
 +	__flush_tlb_mm(CTX_HWBITS(mm->context),
 +		       SECONDARY_CONTEXT);
 +}
 +
 +void smp_new_mmu_context_version(void)
 +{
 +	smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
 +}
 +
 +#ifdef CONFIG_KGDB
 +void kgdb_roundup_cpus(unsigned long flags)
 +{
 +	smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
 +}
 +#endif
 +
 +void smp_fetch_global_regs(void)
 +{
 +	smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
 +}
 +
 +/* We know that the window frames of the user have been flushed
 + * to the stack before we get here because all callers of us
 + * are flush_tlb_*() routines, and these run after flush_cache_*()
 + * which performs the flushw.
 + *
 + * The SMP TLB coherency scheme we use works as follows:
 + *
 + * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
 + *    space has (potentially) executed on, this is the heuristic
 + *    we use to avoid doing cross calls.
 + *
 + *    Also, for flushing from kswapd and also for clones, we
 + *    use cpu_vm_mask as the list of cpus to make run the TLB.
 + *
 + * 2) TLB context numbers are shared globally across all processors
 + *    in the system, this allows us to play several games to avoid
 + *    cross calls.
 + *
 + *    One invariant is that when a cpu switches to a process, and
 + *    that processes tsk->active_mm->cpu_vm_mask does not have the
 + *    current cpu's bit set, that tlb context is flushed locally.
 + *
 + *    If the address space is non-shared (ie. mm->count == 1) we avoid
 + *    cross calls when we want to flush the currently running process's
 + *    tlb state.  This is done by clearing all cpu bits except the current
 + *    processor's in current->active_mm->cpu_vm_mask and performing the
 + *    flush locally only.  This will force any subsequent cpus which run
 + *    this task to flush the context from the local tlb if the process
 + *    migrates to another cpu (again).
 + *
 + * 3) For shared address spaces (threads) and swapping we bite the
 + *    bullet for most cases and perform the cross call (but only to
 + *    the cpus listed in cpu_vm_mask).
 + *
 + *    The performance gain from "optimizing" away the cross call for threads is
 + *    questionable (in theory the big win for threads is the massive sharing of
 + *    address space state across processors).
 + */
 +
 +/* This currently is only used by the hugetlb arch pre-fault
 + * hook on UltraSPARC-III+ and later when changing the pagesize
 + * bits of the context register for an address space.
 + */
 +void smp_flush_tlb_mm(struct mm_struct *mm)
 +{
 +	u32 ctx = CTX_HWBITS(mm->context);
 +	int cpu = get_cpu();
 +
 +	if (atomic_read(&mm->mm_users) == 1) {
 +		mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 +		goto local_flush_and_out;
 +	}
 +
 +	smp_cross_call_masked(&xcall_flush_tlb_mm,
 +			      ctx, 0, 0,
 +			      &mm->cpu_vm_mask);
 +
 +local_flush_and_out:
 +	__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
 +
 +	put_cpu();
 +}
 +
 +void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
 +{
 +	u32 ctx = CTX_HWBITS(mm->context);
 +	int cpu = get_cpu();
 +
 +	if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1)
 +		mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 +	else
 +		smp_cross_call_masked(&xcall_flush_tlb_pending,
 +				      ctx, nr, (unsigned long) vaddrs,
 +				      &mm->cpu_vm_mask);
 +
 +	__flush_tlb_pending(ctx, nr, vaddrs);
 +
 +	put_cpu();
 +}
 +
 +void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 +{
 +	start &= PAGE_MASK;
 +	end    = PAGE_ALIGN(end);
 +	if (start != end) {
 +		smp_cross_call(&xcall_flush_tlb_kernel_range,
 +			       0, start, end);
 +
 +		__flush_tlb_kernel_range(start, end);
 +	}
 +}
 +
 +/* CPU capture. */
 +/* #define CAPTURE_DEBUG */
 +extern unsigned long xcall_capture;
 +
 +static atomic_t smp_capture_depth = ATOMIC_INIT(0);
 +static atomic_t smp_capture_registry = ATOMIC_INIT(0);
 +static unsigned long penguins_are_doing_time;
 +
 +void smp_capture(void)
 +{
 +	int result = atomic_add_ret(1, &smp_capture_depth);
 +
 +	if (result == 1) {
 +		int ncpus = num_online_cpus();
 +
 +#ifdef CAPTURE_DEBUG
 +		printk("CPU[%d]: Sending penguins to jail...",
 +		       smp_processor_id());
 +#endif
 +		penguins_are_doing_time = 1;
 +		atomic_inc(&smp_capture_registry);
 +		smp_cross_call(&xcall_capture, 0, 0, 0);
 +		while (atomic_read(&smp_capture_registry) != ncpus)
 +			rmb();
 +#ifdef CAPTURE_DEBUG
 +		printk("done\n");
 +#endif
 +	}
 +}
 +
 +void smp_release(void)
 +{
 +	if (atomic_dec_and_test(&smp_capture_depth)) {
 +#ifdef CAPTURE_DEBUG
 +		printk("CPU[%d]: Giving pardon to "
 +		       "imprisoned penguins\n",
 +		       smp_processor_id());
 +#endif
 +		penguins_are_doing_time = 0;
 +		membar_safe("#StoreLoad");
 +		atomic_dec(&smp_capture_registry);
 +	}
 +}
 +
 +/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
 + * set, so they can service tlb flush xcalls...
 + */
 +extern void prom_world(int);
 +
 +void smp_penguin_jailcell(int irq, struct pt_regs *regs)
 +{
 +	clear_softint(1 << irq);
 +
 +	preempt_disable();
 +
 +	__asm__ __volatile__("flushw");
 +	prom_world(1);
 +	atomic_inc(&smp_capture_registry);
 +	membar_safe("#StoreLoad");
 +	while (penguins_are_doing_time)
 +		rmb();
 +	atomic_dec(&smp_capture_registry);
 +	prom_world(0);
 +
 +	preempt_enable();
 +}
 +
 +/* /proc/profile writes can call this, don't __init it please. */
 +int setup_profiling_timer(unsigned int multiplier)
 +{
 +	return -EINVAL;
 +}
 +
 +void __init smp_prepare_cpus(unsigned int max_cpus)
 +{
 +}
 +
 +void __devinit smp_prepare_boot_cpu(void)
 +{
 +}
 +
 +void __init smp_setup_processor_id(void)
 +{
 +	if (tlb_type == spitfire)
 +		xcall_deliver_impl = spitfire_xcall_deliver;
 +	else if (tlb_type == cheetah || tlb_type == cheetah_plus)
 +		xcall_deliver_impl = cheetah_xcall_deliver;
 +	else
 +		xcall_deliver_impl = hypervisor_xcall_deliver;
 +}
 +
 +void __devinit smp_fill_in_sib_core_maps(void)
 +{
 +	unsigned int i;
 +
 +	for_each_present_cpu(i) {
 +		unsigned int j;
 +
 +		cpus_clear(cpu_core_map[i]);
 +		if (cpu_data(i).core_id == 0) {
 +			cpu_set(i, cpu_core_map[i]);
 +			continue;
 +		}
 +
 +		for_each_present_cpu(j) {
 +			if (cpu_data(i).core_id ==
 +			    cpu_data(j).core_id)
 +				cpu_set(j, cpu_core_map[i]);
 +		}
 +	}
 +
 +	for_each_present_cpu(i) {
 +		unsigned int j;
 +
 +		cpus_clear(per_cpu(cpu_sibling_map, i));
 +		if (cpu_data(i).proc_id == -1) {
 +			cpu_set(i, per_cpu(cpu_sibling_map, i));
 +			continue;
 +		}
 +
 +		for_each_present_cpu(j) {
 +			if (cpu_data(i).proc_id ==
 +			    cpu_data(j).proc_id)
 +				cpu_set(j, per_cpu(cpu_sibling_map, i));
 +		}
 +	}
 +}
 +
 +int __cpuinit __cpu_up(unsigned int cpu)
 +{
 +	int ret = smp_boot_one_cpu(cpu);
 +
 +	if (!ret) {
 +		cpu_set(cpu, smp_commenced_mask);
 +		while (!cpu_isset(cpu, cpu_online_map))
 +			mb();
 +		if (!cpu_isset(cpu, cpu_online_map)) {
 +			ret = -ENODEV;
 +		} else {
 +			/* On SUN4V, writes to %tick and %stick are
 +			 * not allowed.
 +			 */
 +			if (tlb_type != hypervisor)
 +				smp_synchronize_one_tick(cpu);
 +		}
 +	}
 +	return ret;
 +}
 +
 +#ifdef CONFIG_HOTPLUG_CPU
 +void cpu_play_dead(void)
 +{
 +	int cpu = smp_processor_id();
 +	unsigned long pstate;
 +
 +	idle_task_exit();
 +
 +	if (tlb_type == hypervisor) {
 +		struct trap_per_cpu *tb = &trap_block[cpu];
 +
 +		sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
 +				tb->cpu_mondo_pa, 0);
 +		sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
 +				tb->dev_mondo_pa, 0);
 +		sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
 +				tb->resum_mondo_pa, 0);
 +		sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
 +				tb->nonresum_mondo_pa, 0);
 +	}
 +
 +	cpu_clear(cpu, smp_commenced_mask);
 +	membar_safe("#Sync");
 +
 +	local_irq_disable();
 +
 +	__asm__ __volatile__(
 +		"rdpr	%%pstate, %0\n\t"
 +		"wrpr	%0, %1, %%pstate"
 +		: "=r" (pstate)
 +		: "i" (PSTATE_IE));
 +
 +	while (1)
 +		barrier();
 +}
 +
 +int __cpu_disable(void)
 +{
 +	int cpu = smp_processor_id();
 +	cpuinfo_sparc *c;
 +	int i;
 +
 +	for_each_cpu_mask(i, cpu_core_map[cpu])
 +		cpu_clear(cpu, cpu_core_map[i]);
 +	cpus_clear(cpu_core_map[cpu]);
 +
 +	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
 +		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
 +	cpus_clear(per_cpu(cpu_sibling_map, cpu));
 +
 +	c = &cpu_data(cpu);
 +
 +	c->core_id = 0;
 +	c->proc_id = -1;
 +
 +	smp_wmb();
 +
 +	/* Make sure no interrupts point to this cpu.  */
 +	fixup_irqs();
 +
 +	local_irq_enable();
 +	mdelay(1);
 +	local_irq_disable();
 +
 +	ipi_call_lock();
 +	cpu_clear(cpu, cpu_online_map);
 +	ipi_call_unlock();
 +
 +	return 0;
 +}
 +
 +void __cpu_die(unsigned int cpu)
 +{
 +	int i;
 +
 +	for (i = 0; i < 100; i++) {
 +		smp_rmb();
 +		if (!cpu_isset(cpu, smp_commenced_mask))
 +			break;
 +		msleep(100);
 +	}
 +	if (cpu_isset(cpu, smp_commenced_mask)) {
 +		printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 +	} else {
 +#if defined(CONFIG_SUN_LDOMS)
 +		unsigned long hv_err;
 +		int limit = 100;
 +
 +		do {
 +			hv_err = sun4v_cpu_stop(cpu);
 +			if (hv_err == HV_EOK) {
 +				cpu_clear(cpu, cpu_present_map);
 +				break;
 +			}
 +		} while (--limit > 0);
 +		if (limit <= 0) {
 +			printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
 +			       hv_err);
 +		}
 +#endif
 +	}
 +}
 +#endif
 +
 +void __init smp_cpus_done(unsigned int max_cpus)
 +{
 +}
 +
 +void smp_send_reschedule(int cpu)
 +{
 +	xcall_deliver((u64) &xcall_receive_signal, 0, 0,
 +		      &cpumask_of_cpu(cpu));
 +}
 +
 +void smp_receive_signal_client(int irq, struct pt_regs *regs)
 +{
 +	clear_softint(1 << irq);
 +}
 +
 +/* This is a nop because we capture all other cpus
 + * anyways when making the PROM active.
 + */
 +void smp_send_stop(void)
 +{
 +}
 +
 +unsigned long __per_cpu_base __read_mostly;
 +unsigned long __per_cpu_shift __read_mostly;
 +
 +EXPORT_SYMBOL(__per_cpu_base);
 +EXPORT_SYMBOL(__per_cpu_shift);
 +
 +void __init real_setup_per_cpu_areas(void)
 +{
 +	unsigned long paddr, goal, size, i;
 +	char *ptr;
 +
 +	/* Copy section for each CPU (we discard the original) */
 +	goal = PERCPU_ENOUGH_ROOM;
 +
 +	__per_cpu_shift = PAGE_SHIFT;
 +	for (size = PAGE_SIZE; size < goal; size <<= 1UL)
 +		__per_cpu_shift++;
 +
 +	paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
 +	if (!paddr) {
 +		prom_printf("Cannot allocate per-cpu memory.\n");
 +		prom_halt();
 +	}
 +
 +	ptr = __va(paddr);
 +	__per_cpu_base = ptr - __per_cpu_start;
 +
 +	for (i = 0; i < NR_CPUS; i++, ptr += size)
 +		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 +
 +	/* Setup %g5 for the boot cpu.  */
 +	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
 +}
diff --cc arch/sparc/kernel/sparc_ksyms_32.c
index a4d45fc29b21,000000000000..e1e97639231b
mode 100644,000000..100644
--- a/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@@ -1,257 -1,0 +1,253 @@@
 +/*
 + * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support.
 + *
 + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
 + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
 + */
 +
 +/* Tell string.h we don't want memcpy etc. as cpp defines */
 +#define EXPORT_SYMTAB_STROPS
 +#define PROMLIB_INTERNAL
 +
 +#include <linux/module.h>
 +#include <linux/init.h>
 +#include <linux/smp.h>
 +#include <linux/types.h>
 +#include <linux/string.h>
 +#include <linux/sched.h>
 +#include <linux/interrupt.h>
 +#include <linux/in6.h>
 +#include <linux/spinlock.h>
 +#include <linux/mm.h>
 +#include <linux/syscalls.h>
 +#ifdef CONFIG_PCI
 +#include <linux/pci.h>
 +#endif
 +#include <linux/pm.h>
 +#ifdef CONFIG_HIGHMEM
 +#include <linux/highmem.h>
 +#endif
 +
 +#include <asm/oplib.h>
 +#include <asm/delay.h>
 +#include <asm/system.h>
 +#include <asm/auxio.h>
 +#include <asm/pgtable.h>
 +#include <asm/io.h>
 +#include <asm/irq.h>
 +#include <asm/idprom.h>
 +#include <asm/head.h>
 +#include <asm/smp.h>
 +#include <asm/ptrace.h>
 +#include <asm/uaccess.h>
 +#include <asm/checksum.h>
 +#ifdef CONFIG_SBUS
 +#include <asm/dma.h>
 +#endif
 +#include <asm/io-unit.h>
 +#include <asm/bug.h>
 +
 +extern spinlock_t rtc_lock;
 +
 +struct poll {
 +	int fd;
 +	short events;
 +	short revents;
 +};
 +
 +extern void (*__copy_1page)(void *, const void *);
 +extern void __memmove(void *, const void *, __kernel_size_t);
 +extern void (*bzero_1page)(void *);
 +extern void *__bzero(void *, size_t);
 +extern void *__memscan_zero(void *, size_t);
 +extern void *__memscan_generic(void *, int, size_t);
 +extern int __strncmp(const char *, const char *, __kernel_size_t);
 +
 +extern int __ashrdi3(int, int);
 +extern int __ashldi3(int, int);
 +extern int __lshrdi3(int, int);
 +extern int __muldi3(int, int);
 +extern int __divdi3(int, int);
 +
 +/* Private functions with odd calling conventions. */
 +extern void ___atomic24_add(void);
 +extern void ___atomic24_sub(void);
 +extern void ___rw_read_enter(void);
 +extern void ___rw_read_try(void);
 +extern void ___rw_read_exit(void);
 +extern void ___rw_write_enter(void);
 +
 +/* Alias functions whose names begin with "." and export the aliases.
 + * The module references will be fixed up by module_frob_arch_sections.
 + */
 +extern int _Div(int, int);
 +extern int _Mul(int, int);
 +extern int _Rem(int, int);
 +extern unsigned _Udiv(unsigned, unsigned);
 +extern unsigned _Umul(unsigned, unsigned);
 +extern unsigned _Urem(unsigned, unsigned);
 +
 +/* used by various drivers */
 +EXPORT_SYMBOL(sparc_cpu_model);
 +EXPORT_SYMBOL(kernel_thread);
 +#ifdef CONFIG_SMP
 +// XXX find what uses (or used) these.   AV: see asm/spinlock.h
 +EXPORT_SYMBOL(___rw_read_enter);
 +EXPORT_SYMBOL(___rw_read_try);
 +EXPORT_SYMBOL(___rw_read_exit);
 +EXPORT_SYMBOL(___rw_write_enter);
 +#endif
 +
 +EXPORT_SYMBOL(sparc_valid_addr_bitmap);
 +EXPORT_SYMBOL(phys_base);
 +EXPORT_SYMBOL(pfn_base);
 +
 +/* Atomic operations. */
 +EXPORT_SYMBOL(___atomic24_add);
 +EXPORT_SYMBOL(___atomic24_sub);
 +
 +/* Per-CPU information table */
 +EXPORT_PER_CPU_SYMBOL(__cpu_data);
 +
 +#ifdef CONFIG_SMP
 +/* IRQ implementation. */
 +EXPORT_SYMBOL(synchronize_irq);
- 
- /* CPU online map and active count. */
- EXPORT_SYMBOL(cpu_online_map);
- EXPORT_SYMBOL(phys_cpu_present_map);
 +#endif
 +
 +EXPORT_SYMBOL(__udelay);
 +EXPORT_SYMBOL(__ndelay);
 +EXPORT_SYMBOL(rtc_lock);
 +EXPORT_SYMBOL(set_auxio);
 +EXPORT_SYMBOL(get_auxio);
 +EXPORT_SYMBOL(io_remap_pfn_range);
 +
 +#ifndef CONFIG_SMP
 +EXPORT_SYMBOL(BTFIXUP_CALL(___xchg32));
 +#else
 +EXPORT_SYMBOL(BTFIXUP_CALL(__hard_smp_processor_id));
 +#endif
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_unlockarea));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_lockarea));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_sgl));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_one));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_sgl));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_one));
 +
 +EXPORT_SYMBOL(BTFIXUP_CALL(pgprot_noncached));
 +
 +#ifdef CONFIG_SBUS
 +EXPORT_SYMBOL(sbus_set_sbus64);
 +#endif
 +#ifdef CONFIG_PCI
 +EXPORT_SYMBOL(insb);
 +EXPORT_SYMBOL(outsb);
 +EXPORT_SYMBOL(insw);
 +EXPORT_SYMBOL(outsw);
 +EXPORT_SYMBOL(insl);
 +EXPORT_SYMBOL(outsl);
 +EXPORT_SYMBOL(pci_alloc_consistent);
 +EXPORT_SYMBOL(pci_free_consistent);
 +EXPORT_SYMBOL(pci_map_single);
 +EXPORT_SYMBOL(pci_unmap_single);
 +EXPORT_SYMBOL(pci_dma_sync_single_for_cpu);
 +EXPORT_SYMBOL(pci_dma_sync_single_for_device);
 +EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu);
 +EXPORT_SYMBOL(pci_dma_sync_sg_for_device);
 +EXPORT_SYMBOL(pci_map_sg);
 +EXPORT_SYMBOL(pci_unmap_sg);
 +EXPORT_SYMBOL(pci_map_page);
 +EXPORT_SYMBOL(pci_unmap_page);
 +/* Actually, ioremap/iounmap are not PCI specific. But it is ok for drivers. */
 +EXPORT_SYMBOL(ioremap);
 +EXPORT_SYMBOL(iounmap);
 +#endif
 +
 +/* in arch/sparc/mm/highmem.c */
 +#ifdef CONFIG_HIGHMEM
 +EXPORT_SYMBOL(kmap_atomic);
 +EXPORT_SYMBOL(kunmap_atomic);
 +#endif
 +
 +/* prom symbols */
 +EXPORT_SYMBOL(idprom);
 +EXPORT_SYMBOL(prom_root_node);
 +EXPORT_SYMBOL(prom_getchild);
 +EXPORT_SYMBOL(prom_getsibling);
 +EXPORT_SYMBOL(prom_searchsiblings);
 +EXPORT_SYMBOL(prom_firstprop);
 +EXPORT_SYMBOL(prom_nextprop);
 +EXPORT_SYMBOL(prom_getproplen);
 +EXPORT_SYMBOL(prom_getproperty);
 +EXPORT_SYMBOL(prom_node_has_property);
 +EXPORT_SYMBOL(prom_setprop);
 +EXPORT_SYMBOL(saved_command_line);
 +EXPORT_SYMBOL(prom_apply_obio_ranges);
 +EXPORT_SYMBOL(prom_feval);
 +EXPORT_SYMBOL(prom_getbool);
 +EXPORT_SYMBOL(prom_getstring);
 +EXPORT_SYMBOL(prom_getint);
 +EXPORT_SYMBOL(prom_getintdefault);
 +EXPORT_SYMBOL(prom_finddevice);
 +EXPORT_SYMBOL(romvec);
 +EXPORT_SYMBOL(__prom_getchild);
 +EXPORT_SYMBOL(__prom_getsibling);
 +
 +/* sparc library symbols */
 +EXPORT_SYMBOL(memscan);
 +EXPORT_SYMBOL(strlen);
 +EXPORT_SYMBOL(strncmp);
 +EXPORT_SYMBOL(page_kernel);
 +
 +/* Special internal versions of library functions. */
 +EXPORT_SYMBOL(__copy_1page);
 +EXPORT_SYMBOL(__memcpy);
 +EXPORT_SYMBOL(__memset);
 +EXPORT_SYMBOL(bzero_1page);
 +EXPORT_SYMBOL(__bzero);
 +EXPORT_SYMBOL(__memscan_zero);
 +EXPORT_SYMBOL(__memscan_generic);
 +EXPORT_SYMBOL(__strncmp);
 +EXPORT_SYMBOL(__memmove);
 +
 +/* Moving data to/from userspace. */
 +EXPORT_SYMBOL(__copy_user);
 +EXPORT_SYMBOL(__strncpy_from_user);
 +EXPORT_SYMBOL(__strnlen_user);
 +
 +/* Networking helper routines. */
 +EXPORT_SYMBOL(__csum_partial_copy_sparc_generic);
 +EXPORT_SYMBOL(csum_partial);
 +
 +/* Cache flushing.  */
 +EXPORT_SYMBOL(sparc_flush_page_to_ram);
 +
 +/* For when serial stuff is built as modules. */
 +EXPORT_SYMBOL(sun_do_break);
 +
 +EXPORT_SYMBOL(__ret_efault);
 +
 +EXPORT_SYMBOL(memcmp);
 +EXPORT_SYMBOL(memcpy);
 +EXPORT_SYMBOL(memset);
 +EXPORT_SYMBOL(memmove);
 +EXPORT_SYMBOL(__ashrdi3);
 +EXPORT_SYMBOL(__ashldi3);
 +EXPORT_SYMBOL(__lshrdi3);
 +EXPORT_SYMBOL(__muldi3);
 +EXPORT_SYMBOL(__divdi3);
 +
 +EXPORT_SYMBOL(_Rem);
 +EXPORT_SYMBOL(_Urem);
 +EXPORT_SYMBOL(_Mul);
 +EXPORT_SYMBOL(_Umul);
 +EXPORT_SYMBOL(_Div);
 +EXPORT_SYMBOL(_Udiv);
 +
 +#ifdef CONFIG_DEBUG_BUGVERBOSE
 +EXPORT_SYMBOL(do_BUG);
 +#endif
 +
 +/* Sun Power Management Idle Handler */
 +EXPORT_SYMBOL(pm_idle);
 +
 +EXPORT_SYMBOL(empty_zero_page);
diff --cc arch/sparc/kernel/time_64.c
index 141da3759091,000000000000..9df8f095a8b1
mode 100644,000000..100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@@ -1,862 -1,0 +1,862 @@@
 +/* time.c: UltraSparc timer and TOD clock support.
 + *
 + * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
 + * Copyright (C) 1998 Eddie C. Dost   (ecd@skynet.be)
 + *
 + * Based largely on code which is:
 + *
 + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
 + */
 +
 +#include <linux/errno.h>
 +#include <linux/module.h>
 +#include <linux/sched.h>
 +#include <linux/smp_lock.h>
 +#include <linux/kernel.h>
 +#include <linux/param.h>
 +#include <linux/string.h>
 +#include <linux/mm.h>
 +#include <linux/interrupt.h>
 +#include <linux/time.h>
 +#include <linux/timex.h>
 +#include <linux/init.h>
 +#include <linux/ioport.h>
 +#include <linux/mc146818rtc.h>
 +#include <linux/delay.h>
 +#include <linux/profile.h>
 +#include <linux/bcd.h>
 +#include <linux/jiffies.h>
 +#include <linux/cpufreq.h>
 +#include <linux/percpu.h>
 +#include <linux/miscdevice.h>
 +#include <linux/rtc.h>
 +#include <linux/rtc/m48t59.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/clockchips.h>
 +#include <linux/clocksource.h>
 +#include <linux/of_device.h>
 +#include <linux/platform_device.h>
 +
 +#include <asm/oplib.h>
 +#include <asm/timer.h>
 +#include <asm/irq.h>
 +#include <asm/io.h>
 +#include <asm/prom.h>
 +#include <asm/starfire.h>
 +#include <asm/smp.h>
 +#include <asm/sections.h>
 +#include <asm/cpudata.h>
 +#include <asm/uaccess.h>
 +#include <asm/irq_regs.h>
 +
 +#include "entry.h"
 +
 +DEFINE_SPINLOCK(rtc_lock);
 +
 +#define TICK_PRIV_BIT	(1UL << 63)
 +#define TICKCMP_IRQ_BIT	(1UL << 63)
 +
 +#ifdef CONFIG_SMP
 +unsigned long profile_pc(struct pt_regs *regs)
 +{
 +	unsigned long pc = instruction_pointer(regs);
 +
 +	if (in_lock_functions(pc))
 +		return regs->u_regs[UREG_RETPC];
 +	return pc;
 +}
 +EXPORT_SYMBOL(profile_pc);
 +#endif
 +
 +static void tick_disable_protection(void)
 +{
 +	/* Set things up so user can access tick register for profiling
 +	 * purposes.  Also workaround BB_ERRATA_1 by doing a dummy
 +	 * read back of %tick after writing it.
 +	 */
 +	__asm__ __volatile__(
 +	"	ba,pt	%%xcc, 1f\n"
 +	"	 nop\n"
 +	"	.align	64\n"
 +	"1:	rd	%%tick, %%g2\n"
 +	"	add	%%g2, 6, %%g2\n"
 +	"	andn	%%g2, %0, %%g2\n"
 +	"	wrpr	%%g2, 0, %%tick\n"
 +	"	rdpr	%%tick, %%g0"
 +	: /* no outputs */
 +	: "r" (TICK_PRIV_BIT)
 +	: "g2");
 +}
 +
 +static void tick_disable_irq(void)
 +{
 +	__asm__ __volatile__(
 +	"	ba,pt	%%xcc, 1f\n"
 +	"	 nop\n"
 +	"	.align	64\n"
 +	"1:	wr	%0, 0x0, %%tick_cmpr\n"
 +	"	rd	%%tick_cmpr, %%g0"
 +	: /* no outputs */
 +	: "r" (TICKCMP_IRQ_BIT));
 +}
 +
 +static void tick_init_tick(void)
 +{
 +	tick_disable_protection();
 +	tick_disable_irq();
 +}
 +
 +static unsigned long tick_get_tick(void)
 +{
 +	unsigned long ret;
 +
 +	__asm__ __volatile__("rd	%%tick, %0\n\t"
 +			     "mov	%0, %0"
 +			     : "=r" (ret));
 +
 +	return ret & ~TICK_PRIV_BIT;
 +}
 +
 +static int tick_add_compare(unsigned long adj)
 +{
 +	unsigned long orig_tick, new_tick, new_compare;
 +
 +	__asm__ __volatile__("rd	%%tick, %0"
 +			     : "=r" (orig_tick));
 +
 +	orig_tick &= ~TICKCMP_IRQ_BIT;
 +
 +	/* Workaround for Spitfire Errata (#54 I think??), I discovered
 +	 * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
 +	 * number 103640.
 +	 *
 +	 * On Blackbird writes to %tick_cmpr can fail, the
 +	 * workaround seems to be to execute the wr instruction
 +	 * at the start of an I-cache line, and perform a dummy
 +	 * read back from %tick_cmpr right after writing to it. -DaveM
 +	 */
 +	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t"
 +			     " add	%1, %2, %0\n\t"
 +			     ".align	64\n"
 +			     "1:\n\t"
 +			     "wr	%0, 0, %%tick_cmpr\n\t"
 +			     "rd	%%tick_cmpr, %%g0\n\t"
 +			     : "=r" (new_compare)
 +			     : "r" (orig_tick), "r" (adj));
 +
 +	__asm__ __volatile__("rd	%%tick, %0"
 +			     : "=r" (new_tick));
 +	new_tick &= ~TICKCMP_IRQ_BIT;
 +
 +	return ((long)(new_tick - (orig_tick+adj))) > 0L;
 +}
 +
 +static unsigned long tick_add_tick(unsigned long adj)
 +{
 +	unsigned long new_tick;
 +
 +	/* Also need to handle Blackbird bug here too. */
 +	__asm__ __volatile__("rd	%%tick, %0\n\t"
 +			     "add	%0, %1, %0\n\t"
 +			     "wrpr	%0, 0, %%tick\n\t"
 +			     : "=&r" (new_tick)
 +			     : "r" (adj));
 +
 +	return new_tick;
 +}
 +
 +static struct sparc64_tick_ops tick_operations __read_mostly = {
 +	.name		=	"tick",
 +	.init_tick	=	tick_init_tick,
 +	.disable_irq	=	tick_disable_irq,
 +	.get_tick	=	tick_get_tick,
 +	.add_tick	=	tick_add_tick,
 +	.add_compare	=	tick_add_compare,
 +	.softint_mask	=	1UL << 0,
 +};
 +
 +struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations;
 +
 +static void stick_disable_irq(void)
 +{
 +	__asm__ __volatile__(
 +	"wr	%0, 0x0, %%asr25"
 +	: /* no outputs */
 +	: "r" (TICKCMP_IRQ_BIT));
 +}
 +
 +static void stick_init_tick(void)
 +{
 +	/* Writes to the %tick and %stick register are not
 +	 * allowed on sun4v.  The Hypervisor controls that
 +	 * bit, per-strand.
 +	 */
 +	if (tlb_type != hypervisor) {
 +		tick_disable_protection();
 +		tick_disable_irq();
 +
 +		/* Let the user get at STICK too. */
 +		__asm__ __volatile__(
 +		"	rd	%%asr24, %%g2\n"
 +		"	andn	%%g2, %0, %%g2\n"
 +		"	wr	%%g2, 0, %%asr24"
 +		: /* no outputs */
 +		: "r" (TICK_PRIV_BIT)
 +		: "g1", "g2");
 +	}
 +
 +	stick_disable_irq();
 +}
 +
 +static unsigned long stick_get_tick(void)
 +{
 +	unsigned long ret;
 +
 +	__asm__ __volatile__("rd	%%asr24, %0"
 +			     : "=r" (ret));
 +
 +	return ret & ~TICK_PRIV_BIT;
 +}
 +
 +static unsigned long stick_add_tick(unsigned long adj)
 +{
 +	unsigned long new_tick;
 +
 +	__asm__ __volatile__("rd	%%asr24, %0\n\t"
 +			     "add	%0, %1, %0\n\t"
 +			     "wr	%0, 0, %%asr24\n\t"
 +			     : "=&r" (new_tick)
 +			     : "r" (adj));
 +
 +	return new_tick;
 +}
 +
 +static int stick_add_compare(unsigned long adj)
 +{
 +	unsigned long orig_tick, new_tick;
 +
 +	__asm__ __volatile__("rd	%%asr24, %0"
 +			     : "=r" (orig_tick));
 +	orig_tick &= ~TICKCMP_IRQ_BIT;
 +
 +	__asm__ __volatile__("wr	%0, 0, %%asr25"
 +			     : /* no outputs */
 +			     : "r" (orig_tick + adj));
 +
 +	__asm__ __volatile__("rd	%%asr24, %0"
 +			     : "=r" (new_tick));
 +	new_tick &= ~TICKCMP_IRQ_BIT;
 +
 +	return ((long)(new_tick - (orig_tick+adj))) > 0L;
 +}
 +
 +static struct sparc64_tick_ops stick_operations __read_mostly = {
 +	.name		=	"stick",
 +	.init_tick	=	stick_init_tick,
 +	.disable_irq	=	stick_disable_irq,
 +	.get_tick	=	stick_get_tick,
 +	.add_tick	=	stick_add_tick,
 +	.add_compare	=	stick_add_compare,
 +	.softint_mask	=	1UL << 16,
 +};
 +
 +/* On Hummingbird the STICK/STICK_CMPR register is implemented
 + * in I/O space.  There are two 64-bit registers each, the
 + * first holds the low 32-bits of the value and the second holds
 + * the high 32-bits.
 + *
 + * Since STICK is constantly updating, we have to access it carefully.
 + *
 + * The sequence we use to read is:
 + * 1) read high
 + * 2) read low
 + * 3) read high again, if it rolled re-read both low and high again.
 + *
 + * Writing STICK safely is also tricky:
 + * 1) write low to zero
 + * 2) write high
 + * 3) write low
 + */
 +#define HBIRD_STICKCMP_ADDR	0x1fe0000f060UL
 +#define HBIRD_STICK_ADDR	0x1fe0000f070UL
 +
 +static unsigned long __hbird_read_stick(void)
 +{
 +	unsigned long ret, tmp1, tmp2, tmp3;
 +	unsigned long addr = HBIRD_STICK_ADDR+8;
 +
 +	__asm__ __volatile__("ldxa	[%1] %5, %2\n"
 +			     "1:\n\t"
 +			     "sub	%1, 0x8, %1\n\t"
 +			     "ldxa	[%1] %5, %3\n\t"
 +			     "add	%1, 0x8, %1\n\t"
 +			     "ldxa	[%1] %5, %4\n\t"
 +			     "cmp	%4, %2\n\t"
 +			     "bne,a,pn	%%xcc, 1b\n\t"
 +			     " mov	%4, %2\n\t"
 +			     "sllx	%4, 32, %4\n\t"
 +			     "or	%3, %4, %0\n\t"
 +			     : "=&r" (ret), "=&r" (addr),
 +			       "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3)
 +			     : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr));
 +
 +	return ret;
 +}
 +
 +static void __hbird_write_stick(unsigned long val)
 +{
 +	unsigned long low = (val & 0xffffffffUL);
 +	unsigned long high = (val >> 32UL);
 +	unsigned long addr = HBIRD_STICK_ADDR;
 +
 +	__asm__ __volatile__("stxa	%%g0, [%0] %4\n\t"
 +			     "add	%0, 0x8, %0\n\t"
 +			     "stxa	%3, [%0] %4\n\t"
 +			     "sub	%0, 0x8, %0\n\t"
 +			     "stxa	%2, [%0] %4"
 +			     : "=&r" (addr)
 +			     : "0" (addr), "r" (low), "r" (high),
 +			       "i" (ASI_PHYS_BYPASS_EC_E));
 +}
 +
 +static void __hbird_write_compare(unsigned long val)
 +{
 +	unsigned long low = (val & 0xffffffffUL);
 +	unsigned long high = (val >> 32UL);
 +	unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL;
 +
 +	__asm__ __volatile__("stxa	%3, [%0] %4\n\t"
 +			     "sub	%0, 0x8, %0\n\t"
 +			     "stxa	%2, [%0] %4"
 +			     : "=&r" (addr)
 +			     : "0" (addr), "r" (low), "r" (high),
 +			       "i" (ASI_PHYS_BYPASS_EC_E));
 +}
 +
 +static void hbtick_disable_irq(void)
 +{
 +	__hbird_write_compare(TICKCMP_IRQ_BIT);
 +}
 +
 +static void hbtick_init_tick(void)
 +{
 +	tick_disable_protection();
 +
 +	/* XXX This seems to be necessary to 'jumpstart' Hummingbird
 +	 * XXX into actually sending STICK interrupts.  I think because
 +	 * XXX of how we store %tick_cmpr in head.S this somehow resets the
 +	 * XXX {TICK + STICK} interrupt mux.  -DaveM
 +	 */
 +	__hbird_write_stick(__hbird_read_stick());
 +
 +	hbtick_disable_irq();
 +}
 +
 +static unsigned long hbtick_get_tick(void)
 +{
 +	return __hbird_read_stick() & ~TICK_PRIV_BIT;
 +}
 +
 +static unsigned long hbtick_add_tick(unsigned long adj)
 +{
 +	unsigned long val;
 +
 +	val = __hbird_read_stick() + adj;
 +	__hbird_write_stick(val);
 +
 +	return val;
 +}
 +
 +static int hbtick_add_compare(unsigned long adj)
 +{
 +	unsigned long val = __hbird_read_stick();
 +	unsigned long val2;
 +
 +	val &= ~TICKCMP_IRQ_BIT;
 +	val += adj;
 +	__hbird_write_compare(val);
 +
 +	val2 = __hbird_read_stick() & ~TICKCMP_IRQ_BIT;
 +
 +	return ((long)(val2 - val)) > 0L;
 +}
 +
 +static struct sparc64_tick_ops hbtick_operations __read_mostly = {
 +	.name		=	"hbtick",
 +	.init_tick	=	hbtick_init_tick,
 +	.disable_irq	=	hbtick_disable_irq,
 +	.get_tick	=	hbtick_get_tick,
 +	.add_tick	=	hbtick_add_tick,
 +	.add_compare	=	hbtick_add_compare,
 +	.softint_mask	=	1UL << 0,
 +};
 +
 +static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
 +
 +int update_persistent_clock(struct timespec now)
 +{
 +	struct rtc_device *rtc = rtc_class_open("rtc0");
 +	int err = -1;
 +
 +	if (rtc) {
 +		err = rtc_set_mmss(rtc, now.tv_sec);
 +		rtc_class_close(rtc);
 +	}
 +
 +	return err;
 +}
 +
 +unsigned long cmos_regs;
 +EXPORT_SYMBOL(cmos_regs);
 +
 +static struct resource rtc_cmos_resource;
 +
 +static struct platform_device rtc_cmos_device = {
 +	.name		= "rtc_cmos",
 +	.id		= -1,
 +	.resource	= &rtc_cmos_resource,
 +	.num_resources	= 1,
 +};
 +
 +static int __devinit rtc_probe(struct of_device *op, const struct of_device_id *match)
 +{
 +	struct resource *r;
 +
 +	printk(KERN_INFO "%s: RTC regs at 0x%lx\n",
 +	       op->node->full_name, op->resource[0].start);
 +
 +	/* The CMOS RTC driver only accepts IORESOURCE_IO, so cons
 +	 * up a fake resource so that the probe works for all cases.
 +	 * When the RTC is behind an ISA bus it will have IORESOURCE_IO
 +	 * already, whereas when it's behind EBUS is will be IORESOURCE_MEM.
 +	 */
 +
 +	r = &rtc_cmos_resource;
 +	r->flags = IORESOURCE_IO;
 +	r->name = op->resource[0].name;
 +	r->start = op->resource[0].start;
 +	r->end = op->resource[0].end;
 +
 +	cmos_regs = op->resource[0].start;
 +	return platform_device_register(&rtc_cmos_device);
 +}
 +
 +static struct of_device_id __initdata rtc_match[] = {
 +	{
 +		.name = "rtc",
 +		.compatible = "m5819",
 +	},
 +	{
 +		.name = "rtc",
 +		.compatible = "isa-m5819p",
 +	},
 +	{
 +		.name = "rtc",
 +		.compatible = "isa-m5823p",
 +	},
 +	{
 +		.name = "rtc",
 +		.compatible = "ds1287",
 +	},
 +	{},
 +};
 +
 +static struct of_platform_driver rtc_driver = {
 +	.match_table	= rtc_match,
 +	.probe		= rtc_probe,
 +	.driver		= {
 +		.name	= "rtc",
 +	},
 +};
 +
 +static struct platform_device rtc_bq4802_device = {
 +	.name		= "rtc-bq4802",
 +	.id		= -1,
 +	.num_resources	= 1,
 +};
 +
 +static int __devinit bq4802_probe(struct of_device *op, const struct of_device_id *match)
 +{
 +
 +	printk(KERN_INFO "%s: BQ4802 regs at 0x%lx\n",
 +	       op->node->full_name, op->resource[0].start);
 +
 +	rtc_bq4802_device.resource = &op->resource[0];
 +	return platform_device_register(&rtc_bq4802_device);
 +}
 +
 +static struct of_device_id __initdata bq4802_match[] = {
 +	{
 +		.name = "rtc",
 +		.compatible = "bq4802",
 +	},
 +	{},
 +};
 +
 +static struct of_platform_driver bq4802_driver = {
 +	.match_table	= bq4802_match,
 +	.probe		= bq4802_probe,
 +	.driver		= {
 +		.name	= "bq4802",
 +	},
 +};
 +
 +static unsigned char mostek_read_byte(struct device *dev, u32 ofs)
 +{
 +	struct platform_device *pdev = to_platform_device(dev);
 +	void __iomem *regs = (void __iomem *) pdev->resource[0].start;
 +
 +	return readb(regs + ofs);
 +}
 +
 +static void mostek_write_byte(struct device *dev, u32 ofs, u8 val)
 +{
 +	struct platform_device *pdev = to_platform_device(dev);
 +	void __iomem *regs = (void __iomem *) pdev->resource[0].start;
 +
 +	writeb(val, regs + ofs);
 +}
 +
 +static struct m48t59_plat_data m48t59_data = {
 +	.read_byte	= mostek_read_byte,
 +	.write_byte	= mostek_write_byte,
 +};
 +
 +static struct platform_device m48t59_rtc = {
 +	.name		= "rtc-m48t59",
 +	.id		= 0,
 +	.num_resources	= 1,
 +	.dev	= {
 +		.platform_data = &m48t59_data,
 +	},
 +};
 +
 +static int __devinit mostek_probe(struct of_device *op, const struct of_device_id *match)
 +{
 +	struct device_node *dp = op->node;
 +
 +	/* On an Enterprise system there can be multiple mostek clocks.
 +	 * We should only match the one that is on the central FHC bus.
 +	 */
 +	if (!strcmp(dp->parent->name, "fhc") &&
 +	    strcmp(dp->parent->parent->name, "central") != 0)
 +		return -ENODEV;
 +
 +	printk(KERN_INFO "%s: Mostek regs at 0x%lx\n",
 +	       dp->full_name, op->resource[0].start);
 +
 +	m48t59_rtc.resource = &op->resource[0];
 +	return platform_device_register(&m48t59_rtc);
 +}
 +
 +static struct of_device_id __initdata mostek_match[] = {
 +	{
 +		.name = "eeprom",
 +	},
 +	{},
 +};
 +
 +static struct of_platform_driver mostek_driver = {
 +	.match_table	= mostek_match,
 +	.probe		= mostek_probe,
 +	.driver		= {
 +		.name	= "mostek",
 +	},
 +};
 +
 +static struct platform_device rtc_sun4v_device = {
 +	.name		= "rtc-sun4v",
 +	.id		= -1,
 +};
 +
 +static struct platform_device rtc_starfire_device = {
 +	.name		= "rtc-starfire",
 +	.id		= -1,
 +};
 +
 +static int __init clock_init(void)
 +{
 +	if (this_is_starfire)
 +		return platform_device_register(&rtc_starfire_device);
 +
 +	if (tlb_type == hypervisor)
 +		return platform_device_register(&rtc_sun4v_device);
 +
 +	(void) of_register_driver(&rtc_driver, &of_platform_bus_type);
 +	(void) of_register_driver(&mostek_driver, &of_platform_bus_type);
 +	(void) of_register_driver(&bq4802_driver, &of_platform_bus_type);
 +
 +	return 0;
 +}
 +
 +/* Must be after subsys_initcall() so that busses are probed.  Must
 + * be before device_initcall() because things like the RTC driver
 + * need to see the clock registers.
 + */
 +fs_initcall(clock_init);
 +
 +/* This is gets the master TICK_INT timer going. */
 +static unsigned long sparc64_init_timers(void)
 +{
 +	struct device_node *dp;
 +	unsigned long freq;
 +
 +	dp = of_find_node_by_path("/");
 +	if (tlb_type == spitfire) {
 +		unsigned long ver, manuf, impl;
 +
 +		__asm__ __volatile__ ("rdpr %%ver, %0"
 +				      : "=&r" (ver));
 +		manuf = ((ver >> 48) & 0xffff);
 +		impl = ((ver >> 32) & 0xffff);
 +		if (manuf == 0x17 && impl == 0x13) {
 +			/* Hummingbird, aka Ultra-IIe */
 +			tick_ops = &hbtick_operations;
 +			freq = of_getintprop_default(dp, "stick-frequency", 0);
 +		} else {
 +			tick_ops = &tick_operations;
 +			freq = local_cpu_data().clock_tick;
 +		}
 +	} else {
 +		tick_ops = &stick_operations;
 +		freq = of_getintprop_default(dp, "stick-frequency", 0);
 +	}
 +
 +	return freq;
 +}
 +
 +struct freq_table {
 +	unsigned long clock_tick_ref;
 +	unsigned int ref_freq;
 +};
 +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 };
 +
 +unsigned long sparc64_get_clock_tick(unsigned int cpu)
 +{
 +	struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
 +
 +	if (ft->clock_tick_ref)
 +		return ft->clock_tick_ref;
 +	return cpu_data(cpu).clock_tick;
 +}
 +
 +#ifdef CONFIG_CPU_FREQ
 +
 +static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 +				    void *data)
 +{
 +	struct cpufreq_freqs *freq = data;
 +	unsigned int cpu = freq->cpu;
 +	struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
 +
 +	if (!ft->ref_freq) {
 +		ft->ref_freq = freq->old;
 +		ft->clock_tick_ref = cpu_data(cpu).clock_tick;
 +	}
 +	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
 +	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
 +	    (val == CPUFREQ_RESUMECHANGE)) {
 +		cpu_data(cpu).clock_tick =
 +			cpufreq_scale(ft->clock_tick_ref,
 +				      ft->ref_freq,
 +				      freq->new);
 +	}
 +
 +	return 0;
 +}
 +
 +static struct notifier_block sparc64_cpufreq_notifier_block = {
 +	.notifier_call	= sparc64_cpufreq_notifier
 +};
 +
 +static int __init register_sparc64_cpufreq_notifier(void)
 +{
 +
 +	cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
 +				  CPUFREQ_TRANSITION_NOTIFIER);
 +	return 0;
 +}
 +
 +core_initcall(register_sparc64_cpufreq_notifier);
 +
 +#endif /* CONFIG_CPU_FREQ */
 +
 +static int sparc64_next_event(unsigned long delta,
 +			      struct clock_event_device *evt)
 +{
 +	return tick_ops->add_compare(delta) ? -ETIME : 0;
 +}
 +
 +static void sparc64_timer_setup(enum clock_event_mode mode,
 +				struct clock_event_device *evt)
 +{
 +	switch (mode) {
 +	case CLOCK_EVT_MODE_ONESHOT:
 +	case CLOCK_EVT_MODE_RESUME:
 +		break;
 +
 +	case CLOCK_EVT_MODE_SHUTDOWN:
 +		tick_ops->disable_irq();
 +		break;
 +
 +	case CLOCK_EVT_MODE_PERIODIC:
 +	case CLOCK_EVT_MODE_UNUSED:
 +		WARN_ON(1);
 +		break;
 +	};
 +}
 +
 +static struct clock_event_device sparc64_clockevent = {
 +	.features	= CLOCK_EVT_FEAT_ONESHOT,
 +	.set_mode	= sparc64_timer_setup,
 +	.set_next_event	= sparc64_next_event,
 +	.rating		= 100,
 +	.shift		= 30,
 +	.irq		= -1,
 +};
 +static DEFINE_PER_CPU(struct clock_event_device, sparc64_events);
 +
 +void timer_interrupt(int irq, struct pt_regs *regs)
 +{
 +	struct pt_regs *old_regs = set_irq_regs(regs);
 +	unsigned long tick_mask = tick_ops->softint_mask;
 +	int cpu = smp_processor_id();
 +	struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
 +
 +	clear_softint(tick_mask);
 +
 +	irq_enter();
 +
 +	kstat_this_cpu.irqs[0]++;
 +
 +	if (unlikely(!evt->event_handler)) {
 +		printk(KERN_WARNING
 +		       "Spurious SPARC64 timer interrupt on cpu %d\n", cpu);
 +	} else
 +		evt->event_handler(evt);
 +
 +	irq_exit();
 +
 +	set_irq_regs(old_regs);
 +}
 +
 +void __devinit setup_sparc64_timer(void)
 +{
 +	struct clock_event_device *sevt;
 +	unsigned long pstate;
 +
 +	/* Guarantee that the following sequences execute
 +	 * uninterrupted.
 +	 */
 +	__asm__ __volatile__("rdpr	%%pstate, %0\n\t"
 +			     "wrpr	%0, %1, %%pstate"
 +			     : "=r" (pstate)
 +			     : "i" (PSTATE_IE));
 +
 +	tick_ops->init_tick();
 +
 +	/* Restore PSTATE_IE. */
 +	__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"
 +			     : /* no outputs */
 +			     : "r" (pstate));
 +
 +	sevt = &__get_cpu_var(sparc64_events);
 +
 +	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
- 	sevt->cpumask = cpumask_of_cpu(smp_processor_id());
++	sevt->cpumask = cpumask_of(smp_processor_id());
 +
 +	clockevents_register_device(sevt);
 +}
 +
 +#define SPARC64_NSEC_PER_CYC_SHIFT	10UL
 +
 +static struct clocksource clocksource_tick = {
 +	.rating		= 100,
 +	.mask		= CLOCKSOURCE_MASK(64),
 +	.shift		= 16,
 +	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 +};
 +
 +static void __init setup_clockevent_multiplier(unsigned long hz)
 +{
 +	unsigned long mult, shift = 32;
 +
 +	while (1) {
 +		mult = div_sc(hz, NSEC_PER_SEC, shift);
 +		if (mult && (mult >> 32UL) == 0UL)
 +			break;
 +
 +		shift--;
 +	}
 +
 +	sparc64_clockevent.shift = shift;
 +	sparc64_clockevent.mult = mult;
 +}
 +
 +static unsigned long tb_ticks_per_usec __read_mostly;
 +
 +void __delay(unsigned long loops)
 +{
 +	unsigned long bclock, now;
 +
 +	bclock = tick_ops->get_tick();
 +	do {
 +		now = tick_ops->get_tick();
 +	} while ((now-bclock) < loops);
 +}
 +EXPORT_SYMBOL(__delay);
 +
 +void udelay(unsigned long usecs)
 +{
 +	__delay(tb_ticks_per_usec * usecs);
 +}
 +EXPORT_SYMBOL(udelay);
 +
 +void __init time_init(void)
 +{
 +	unsigned long freq = sparc64_init_timers();
 +
 +	tb_ticks_per_usec = freq / USEC_PER_SEC;
 +
 +	timer_ticks_per_nsec_quotient =
 +		clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
 +
 +	clocksource_tick.name = tick_ops->name;
 +	clocksource_tick.mult =
 +		clocksource_hz2mult(freq,
 +				    clocksource_tick.shift);
 +	clocksource_tick.read = tick_ops->get_tick;
 +
 +	printk("clocksource: mult[%x] shift[%d]\n",
 +	       clocksource_tick.mult, clocksource_tick.shift);
 +
 +	clocksource_register(&clocksource_tick);
 +
 +	sparc64_clockevent.name = tick_ops->name;
 +
 +	setup_clockevent_multiplier(freq);
 +
 +	sparc64_clockevent.max_delta_ns =
 +		clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent);
 +	sparc64_clockevent.min_delta_ns =
 +		clockevent_delta2ns(0xF, &sparc64_clockevent);
 +
 +	printk("clockevent: mult[%lx] shift[%d]\n",
 +	       sparc64_clockevent.mult, sparc64_clockevent.shift);
 +
 +	setup_sparc64_timer();
 +}
 +
 +unsigned long long sched_clock(void)
 +{
 +	unsigned long ticks = tick_ops->get_tick();
 +
 +	return (ticks * timer_ticks_per_nsec_quotient)
 +		>> SPARC64_NSEC_PER_CYC_SHIFT;
 +}
 +
 +int __devinit read_current_timer(unsigned long *timer_val)
 +{
 +	*timer_val = tick_ops->get_tick();
 +	return 0;
 +}
diff --cc arch/x86/include/asm/irq.h
index 28e409fc73f3,4bb732e45a85..592688ed04d3
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@@ -31,9 -31,13 +31,9 @@@ static inline int irq_canonicalize(int 
  # endif
  #endif
  
 -#ifdef CONFIG_IRQBALANCE
 -extern int irqbalance_disable(char *str);
 -#endif
 -
  #ifdef CONFIG_HOTPLUG_CPU
  #include <linux/cpumask.h>
- extern void fixup_irqs(cpumask_t map);
+ extern void fixup_irqs(void);
  #endif
  
  extern unsigned int do_IRQ(struct pt_regs *regs);
diff --cc arch/x86/kernel/io_apic.c
index 74917658b004,1cbf7c8d46e0..62ecfc991e1e
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -152,25 -152,25 +152,25 @@@ static struct irq_cfg irq_cfgx[] = 
  #else
  static struct irq_cfg irq_cfgx[NR_IRQS] = {
  #endif
- 	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
- 	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
- 	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
- 	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
- 	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
- 	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
- 	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
- 	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
- 	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
- 	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
- 	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- 	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- 	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- 	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- 	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ 	[0]  = { .vector = IRQ0_VECTOR,  },
+ 	[1]  = { .vector = IRQ1_VECTOR,  },
+ 	[2]  = { .vector = IRQ2_VECTOR,  },
+ 	[3]  = { .vector = IRQ3_VECTOR,  },
+ 	[4]  = { .vector = IRQ4_VECTOR,  },
+ 	[5]  = { .vector = IRQ5_VECTOR,  },
+ 	[6]  = { .vector = IRQ6_VECTOR,  },
+ 	[7]  = { .vector = IRQ7_VECTOR,  },
+ 	[8]  = { .vector = IRQ8_VECTOR,  },
+ 	[9]  = { .vector = IRQ9_VECTOR,  },
+ 	[10] = { .vector = IRQ10_VECTOR, },
+ 	[11] = { .vector = IRQ11_VECTOR, },
+ 	[12] = { .vector = IRQ12_VECTOR, },
+ 	[13] = { .vector = IRQ13_VECTOR, },
+ 	[14] = { .vector = IRQ14_VECTOR, },
+ 	[15] = { .vector = IRQ15_VECTOR, },
  };
  
 -void __init arch_early_irq_init(void)
 +int __init arch_early_irq_init(void)
  {
  	struct irq_cfg *cfg;
  	struct irq_desc *desc;
@@@ -183,9 -183,11 +183,13 @@@
  	for (i = 0; i < count; i++) {
  		desc = irq_to_desc(i);
  		desc->chip_data = &cfg[i];
+ 		alloc_bootmem_cpumask_var(&cfg[i].domain);
+ 		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+ 		if (i < NR_IRQS_LEGACY)
+ 			cpumask_setall(cfg[i].domain);
  	}
 +
 +	return 0;
  }
  
  #ifdef CONFIG_SPARSE_IRQ
@@@ -1349,8 -1400,10 +1404,8 @@@ void __setup_vector_irq(int cpu
  
  	/* Mark the inuse vectors */
  	for_each_irq_desc(irq, desc) {
 -		if (!desc)
 -			continue;
  		cfg = desc->chip_data;
- 		if (!cpu_isset(cpu, cfg->domain))
+ 		if (!cpumask_test_cpu(cpu, cfg->domain))
  			continue;
  		vector = cfg->vector;
  		per_cpu(vector_irq, cpu)[vector] = irq;
diff --cc kernel/sched.c
index fff1c4a20b65,756d981d91a4..27ba1d642f0f
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@@ -5426,8 -5477,17 +5495,16 @@@ long sched_setaffinity(pid_t pid, cons
  	get_task_struct(p);
  	read_unlock(&tasklist_lock);
  
+ 	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ 		retval = -ENOMEM;
+ 		goto out_put_task;
+ 	}
+ 	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+ 		retval = -ENOMEM;
+ 		goto out_free_cpus_allowed;
+ 	}
  	retval = -EPERM;
 -	if ((current->euid != p->euid) && (current->euid != p->uid) &&
 -			!capable(CAP_SYS_NICE))
 +	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
  		goto out_unlock;
  
  	retval = security_task_setscheduler(p, 0, NULL);
diff --cc kernel/time/tick-sched.c
index 8f3fc2582d38,70f872c71f4e..76a574bbef97
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@@ -282,31 -282,8 +282,31 @@@ void tick_nohz_stop_sched_tick(int inid
  	/* Schedule the tick, if we are at least one jiffie off */
  	if ((long)delta_jiffies >= 1) {
  
 +		/*
 +		* calculate the expiry time for the next timer wheel
 +		* timer
 +		*/
 +		expires = ktime_add_ns(last_update, tick_period.tv64 *
 +				   delta_jiffies);
 +
 +		/*
 +		 * If this cpu is the one which updates jiffies, then
 +		 * give up the assignment and let it be taken by the
 +		 * cpu which runs the tick timer next, which might be
 +		 * this cpu as well. If we don't drop this here the
 +		 * jiffies might be stale and do_timer() never
 +		 * invoked.
 +		 */
 +		if (cpu == tick_do_timer_cpu)
 +			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 +
  		if (delta_jiffies > 1)
- 			cpu_set(cpu, nohz_cpu_mask);
+ 			cpumask_set_cpu(cpu, nohz_cpu_mask);
 +
 +		/* Skip reprogram of event if its not changed */
 +		if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
 +			goto out;
 +
  		/*
  		 * nohz_stop_sched_tick can be called several times before
  		 * the nohz_restart_sched_tick is called. This happens when