blob: 67e4e28f4df8d79d9b97d76b7e2ec0fbf794f5e1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
Andi Kleena8ab26f2005-04-16 15:25:19 -070015 * This code is released under the GNU General Public License version 2
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
Andi Kleena8ab26f2005-04-16 15:25:19 -070033 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
Ashok Raj76e4f662005-06-25 14:55:00 -070037 * Ashok Raj : CPU hotplug support
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 */
39
Andi Kleena8ab26f2005-04-16 15:25:19 -070040
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/config.h>
42#include <linux/init.h>
43
44#include <linux/mm.h>
45#include <linux/kernel_stat.h>
46#include <linux/smp_lock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#include <linux/bootmem.h>
48#include <linux/thread_info.h>
49#include <linux/module.h>
50
51#include <linux/delay.h>
52#include <linux/mc146818rtc.h>
53#include <asm/mtrr.h>
54#include <asm/pgalloc.h>
55#include <asm/desc.h>
56#include <asm/kdebug.h>
57#include <asm/tlbflush.h>
58#include <asm/proto.h>
Andi Kleen75152112005-05-16 21:53:34 -070059#include <asm/nmi.h>
Al Viro9cdd3042005-09-12 18:49:25 +020060#include <asm/irq.h>
61#include <asm/hw_irq.h>
Ravikiran G Thirumalai488fc082006-02-07 12:58:23 -080062#include <asm/numa.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070063
64/* Number of siblings per CPU package */
65int smp_num_siblings = 1;
66/* Package ID of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070067u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
Siddha, Suresh B94605ef2005-11-05 17:25:54 +010068/* core ID of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070069u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71/* Bitmask of currently online CPUs */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070072cpumask_t cpu_online_map __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
Andi Kleena8ab26f2005-04-16 15:25:19 -070074EXPORT_SYMBOL(cpu_online_map);
75
76/*
77 * Private maps to synchronize booting between AP and BP.
78 * Probably not needed anymore, but it makes for easier debugging. -AK
79 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070080cpumask_t cpu_callin_map;
81cpumask_t cpu_callout_map;
Andi Kleena8ab26f2005-04-16 15:25:19 -070082
83cpumask_t cpu_possible_map;
84EXPORT_SYMBOL(cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86/* Per CPU bogomips and other parameters */
87struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
88
Andi Kleena8ab26f2005-04-16 15:25:19 -070089/* Set when the idlers are all forked */
90int smp_threads_ready;
91
Siddha, Suresh B94605ef2005-11-05 17:25:54 +010092/* representing HT siblings of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070093cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +010094
95/* representing HT and core siblings of each logical CPU */
Ravikiran G Thirumalai6c231b72005-09-06 15:17:45 -070096cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
Andi Kleen2df9fa32005-05-20 14:27:59 -070097EXPORT_SYMBOL(cpu_core_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
99/*
100 * Trampoline 80x86 program as an array.
101 */
102
Andi Kleena8ab26f2005-04-16 15:25:19 -0700103extern unsigned char trampoline_data[];
104extern unsigned char trampoline_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
Ashok Raj76e4f662005-06-25 14:55:00 -0700106/* State of each CPU */
107DEFINE_PER_CPU(int, cpu_state) = { 0 };
108
109/*
110 * Store all idle threads, this can be reused instead of creating
111 * a new thread. Also avoids complicated thread destroy functionality
112 * for idle threads.
113 */
114struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
115
116#define get_idle_for_cpu(x) (idle_thread_array[(x)])
117#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
118
119/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 * Currently trivial. Write the real->protected mode
121 * bootstrap into the page concerned. The caller
122 * has made sure it's suitably aligned.
123 */
124
Andi Kleena8ab26f2005-04-16 15:25:19 -0700125static unsigned long __cpuinit setup_trampoline(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126{
127 void *tramp = __va(SMP_TRAMPOLINE_BASE);
128 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
129 return virt_to_phys(tramp);
130}
131
132/*
133 * The bootstrap kernel entry code has set these up. Save them for
134 * a given CPU
135 */
136
Andi Kleena8ab26f2005-04-16 15:25:19 -0700137static void __cpuinit smp_store_cpu_info(int id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138{
139 struct cpuinfo_x86 *c = cpu_data + id;
140
141 *c = boot_cpu_data;
142 identify_cpu(c);
Andi Kleendda50e72005-05-16 21:53:25 -0700143 print_cpu_info(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144}
145
146/*
Andi Kleendda50e72005-05-16 21:53:25 -0700147 * New Funky TSC sync algorithm borrowed from IA64.
148 * Main advantage is that it doesn't reset the TSCs fully and
149 * in general looks more robust and it works better than my earlier
150 * attempts. I believe it was written by David Mosberger. Some minor
151 * adjustments for x86-64 by me -AK
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 *
Andi Kleendda50e72005-05-16 21:53:25 -0700153 * Original comment reproduced below.
154 *
155 * Synchronize TSC of the current (slave) CPU with the TSC of the
156 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
157 * eliminate the possibility of unaccounted-for errors (such as
158 * getting a machine check in the middle of a calibration step). The
159 * basic idea is for the slave to ask the master what itc value it has
160 * and to read its own itc before and after the master responds. Each
161 * iteration gives us three timestamps:
162 *
163 * slave master
164 *
165 * t0 ---\
166 * ---\
167 * --->
168 * tm
169 * /---
170 * /---
171 * t1 <---
172 *
173 *
174 * The goal is to adjust the slave's TSC such that tm falls exactly
175 * half-way between t0 and t1. If we achieve this, the clocks are
176 * synchronized provided the interconnect between the slave and the
177 * master is symmetric. Even if the interconnect were asymmetric, we
178 * would still know that the synchronization error is smaller than the
179 * roundtrip latency (t0 - t1).
180 *
181 * When the interconnect is quiet and symmetric, this lets us
182 * synchronize the TSC to within one or two cycles. However, we can
183 * only *guarantee* that the synchronization is accurate to within a
184 * round-trip time, which is typically in the range of several hundred
185 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
186 * are usually almost perfectly synchronized, but we shouldn't assume
187 * that the accuracy is much better than half a micro second or so.
188 *
189 * [there are other errors like the latency of RDTSC and of the
190 * WRMSR. These can also account to hundreds of cycles. So it's
191 * probably worse. It claims 153 cycles error on a dual Opteron,
192 * but I suspect the numbers are actually somewhat worse -AK]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 */
194
Andi Kleendda50e72005-05-16 21:53:25 -0700195#define MASTER 0
196#define SLAVE (SMP_CACHE_BYTES/8)
197
198/* Intentionally don't use cpu_relax() while TSC synchronization
199 because we don't want to go into funky power save modi or cause
200 hypervisors to schedule us away. Going to sleep would likely affect
201 latency and low latency is the primary objective here. -AK */
202#define no_cpu_relax() barrier()
203
Andi Kleena8ab26f2005-04-16 15:25:19 -0700204static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
Andi Kleendda50e72005-05-16 21:53:25 -0700205static volatile __cpuinitdata unsigned long go[SLAVE + 1];
206static int notscsync __cpuinitdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207
Andi Kleendda50e72005-05-16 21:53:25 -0700208#undef DEBUG_TSC_SYNC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
Andi Kleendda50e72005-05-16 21:53:25 -0700210#define NUM_ROUNDS 64 /* magic value */
211#define NUM_ITERS 5 /* likewise */
212
213/* Callback on boot CPU */
214static __cpuinit void sync_master(void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215{
Andi Kleendda50e72005-05-16 21:53:25 -0700216 unsigned long flags, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
Andi Kleendda50e72005-05-16 21:53:25 -0700218 go[MASTER] = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700219
Andi Kleendda50e72005-05-16 21:53:25 -0700220 local_irq_save(flags);
221 {
222 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
223 while (!go[MASTER])
224 no_cpu_relax();
225 go[MASTER] = 0;
226 rdtscll(go[SLAVE]);
227 }
Andi Kleena8ab26f2005-04-16 15:25:19 -0700228 }
Andi Kleendda50e72005-05-16 21:53:25 -0700229 local_irq_restore(flags);
Andi Kleena8ab26f2005-04-16 15:25:19 -0700230}
231
Andi Kleendda50e72005-05-16 21:53:25 -0700232/*
233 * Return the number of cycles by which our tsc differs from the tsc
234 * on the master (time-keeper) CPU. A positive number indicates our
235 * tsc is ahead of the master, negative that it is behind.
236 */
237static inline long
238get_delta(long *rt, long *master)
239{
240 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
241 unsigned long tcenter, t0, t1, tm;
242 int i;
243
244 for (i = 0; i < NUM_ITERS; ++i) {
245 rdtscll(t0);
246 go[MASTER] = 1;
247 while (!(tm = go[SLAVE]))
248 no_cpu_relax();
249 go[SLAVE] = 0;
250 rdtscll(t1);
251
252 if (t1 - t0 < best_t1 - best_t0)
253 best_t0 = t0, best_t1 = t1, best_tm = tm;
254 }
255
256 *rt = best_t1 - best_t0;
257 *master = best_tm - best_t0;
258
259 /* average best_t0 and best_t1 without overflow: */
260 tcenter = (best_t0/2 + best_t1/2);
261 if (best_t0 % 2 + best_t1 % 2 == 2)
262 ++tcenter;
263 return tcenter - best_tm;
264}
265
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700266static __cpuinit void sync_tsc(unsigned int master)
Andi Kleendda50e72005-05-16 21:53:25 -0700267{
268 int i, done = 0;
269 long delta, adj, adjust_latency = 0;
270 unsigned long flags, rt, master_time_stamp, bound;
Olaf Hering44456d32005-07-27 11:45:17 -0700271#ifdef DEBUG_TSC_SYNC
Andi Kleendda50e72005-05-16 21:53:25 -0700272 static struct syncdebug {
273 long rt; /* roundtrip time */
274 long master; /* master's timestamp */
275 long diff; /* difference between midpoint and master's timestamp */
276 long lat; /* estimate of tsc adjustment latency */
277 } t[NUM_ROUNDS] __cpuinitdata;
278#endif
279
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700280 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
281 smp_processor_id(), master);
282
Andi Kleendda50e72005-05-16 21:53:25 -0700283 go[MASTER] = 1;
284
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700285 /* It is dangerous to broadcast IPI as cpus are coming up,
286 * as they may not be ready to accept them. So since
287 * we only need to send the ipi to the boot cpu direct
288 * the message, and avoid the race.
289 */
290 smp_call_function_single(master, sync_master, NULL, 1, 0);
Andi Kleendda50e72005-05-16 21:53:25 -0700291
292 while (go[MASTER]) /* wait for master to be ready */
293 no_cpu_relax();
294
295 spin_lock_irqsave(&tsc_sync_lock, flags);
296 {
297 for (i = 0; i < NUM_ROUNDS; ++i) {
298 delta = get_delta(&rt, &master_time_stamp);
299 if (delta == 0) {
300 done = 1; /* let's lock on to this... */
301 bound = rt;
302 }
303
304 if (!done) {
305 unsigned long t;
306 if (i > 0) {
307 adjust_latency += -delta;
308 adj = -delta + adjust_latency/4;
309 } else
310 adj = -delta;
311
312 rdtscll(t);
313 wrmsrl(MSR_IA32_TSC, t + adj);
314 }
Olaf Hering44456d32005-07-27 11:45:17 -0700315#ifdef DEBUG_TSC_SYNC
Andi Kleendda50e72005-05-16 21:53:25 -0700316 t[i].rt = rt;
317 t[i].master = master_time_stamp;
318 t[i].diff = delta;
319 t[i].lat = adjust_latency/4;
320#endif
321 }
322 }
323 spin_unlock_irqrestore(&tsc_sync_lock, flags);
324
Olaf Hering44456d32005-07-27 11:45:17 -0700325#ifdef DEBUG_TSC_SYNC
Andi Kleendda50e72005-05-16 21:53:25 -0700326 for (i = 0; i < NUM_ROUNDS; ++i)
327 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
328 t[i].rt, t[i].master, t[i].diff, t[i].lat);
329#endif
330
331 printk(KERN_INFO
332 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
333 "maxerr %lu cycles)\n",
Eric W. Biederman3d483f42005-07-29 14:03:29 -0700334 smp_processor_id(), master, delta, rt);
Andi Kleendda50e72005-05-16 21:53:25 -0700335}
336
337static void __cpuinit tsc_sync_wait(void)
338{
Andi Kleen737c5c32006-01-11 22:45:15 +0100339 /*
340 * When the CPU has synchronized TSCs assume the BIOS
341 * or the hardware already synced. Otherwise we could
342 * mess up a possible perfect synchronization with a
343 * not-quite-perfect algorithm.
344 */
345 if (notscsync || !cpu_has_tsc || !unsynchronized_tsc())
Andi Kleendda50e72005-05-16 21:53:25 -0700346 return;
Eric W. Biederman349188f2005-08-11 22:26:25 -0600347 sync_tsc(0);
Andi Kleendda50e72005-05-16 21:53:25 -0700348}
349
350static __init int notscsync_setup(char *s)
351{
352 notscsync = 1;
353 return 0;
354}
355__setup("notscsync", notscsync_setup);
356
Andi Kleena8ab26f2005-04-16 15:25:19 -0700357static atomic_t init_deasserted __cpuinitdata;
358
359/*
360 * Report back to the Boot Processor.
361 * Running on AP.
362 */
363void __cpuinit smp_callin(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364{
365 int cpuid, phys_id;
366 unsigned long timeout;
367
368 /*
369 * If waken up by an INIT in an 82489DX configuration
370 * we may get here before an INIT-deassert IPI reaches
371 * our local APIC. We have to wait for the IPI or we'll
372 * lock up on an APIC access.
373 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700374 while (!atomic_read(&init_deasserted))
375 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
377 /*
378 * (This works even if the APIC is not enabled.)
379 */
380 phys_id = GET_APIC_ID(apic_read(APIC_ID));
381 cpuid = smp_processor_id();
382 if (cpu_isset(cpuid, cpu_callin_map)) {
383 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
384 phys_id, cpuid);
385 }
386 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
387
388 /*
389 * STARTUP IPIs are fragile beasts as they might sometimes
390 * trigger some glue motherboard logic. Complete APIC bus
391 * silence for 1 second, this overestimates the time the
392 * boot CPU is spending to send the up to 2 STARTUP IPIs
393 * by a factor of two. This should be enough.
394 */
395
396 /*
397 * Waiting 2s total for startup (udelay is not yet working)
398 */
399 timeout = jiffies + 2*HZ;
400 while (time_before(jiffies, timeout)) {
401 /*
402 * Has the boot CPU finished it's STARTUP sequence?
403 */
404 if (cpu_isset(cpuid, cpu_callout_map))
405 break;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700406 cpu_relax();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 }
408
409 if (!time_before(jiffies, timeout)) {
410 panic("smp_callin: CPU%d started up but did not get a callout!\n",
411 cpuid);
412 }
413
414 /*
415 * the boot CPU has finished the init stage and is spinning
416 * on callin_map until we finish. We are free to set up this
417 * CPU, first the APIC. (this is probably redundant on most
418 * boards)
419 */
420
421 Dprintk("CALLIN, before setup_local_APIC().\n");
422 setup_local_APIC();
423
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 /*
425 * Get our bogomips.
Andi Kleenb4452212005-09-12 18:49:24 +0200426 *
427 * Need to enable IRQs because it can take longer and then
428 * the NMI watchdog might kill us.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 */
Andi Kleenb4452212005-09-12 18:49:24 +0200430 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 calibrate_delay();
Andi Kleenb4452212005-09-12 18:49:24 +0200432 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 Dprintk("Stack at about %p\n",&cpuid);
434
435 disable_APIC_timer();
436
437 /*
438 * Save our processor parameters
439 */
440 smp_store_cpu_info(cpuid);
441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 /*
443 * Allow the master to continue.
444 */
445 cpu_set(cpuid, cpu_callin_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446}
447
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100448/* representing cpus for which sibling maps can be computed */
449static cpumask_t cpu_sibling_setup_map;
450
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700451static inline void set_cpu_sibling_map(int cpu)
452{
453 int i;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100454 struct cpuinfo_x86 *c = cpu_data;
455
456 cpu_set(cpu, cpu_sibling_setup_map);
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700457
458 if (smp_num_siblings > 1) {
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100459 for_each_cpu_mask(i, cpu_sibling_setup_map) {
460 if (phys_proc_id[cpu] == phys_proc_id[i] &&
461 cpu_core_id[cpu] == cpu_core_id[i]) {
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700462 cpu_set(i, cpu_sibling_map[cpu]);
463 cpu_set(cpu, cpu_sibling_map[i]);
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100464 cpu_set(i, cpu_core_map[cpu]);
465 cpu_set(cpu, cpu_core_map[i]);
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700466 }
467 }
468 } else {
469 cpu_set(cpu, cpu_sibling_map[cpu]);
470 }
471
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100472 if (current_cpu_data.x86_max_cores == 1) {
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700473 cpu_core_map[cpu] = cpu_sibling_map[cpu];
Siddha, Suresh B94605ef2005-11-05 17:25:54 +0100474 c[cpu].booted_cores = 1;
475 return;
476 }
477
478 for_each_cpu_mask(i, cpu_sibling_setup_map) {
479 if (phys_proc_id[cpu] == phys_proc_id[i]) {
480 cpu_set(i, cpu_core_map[cpu]);
481 cpu_set(cpu, cpu_core_map[i]);
482 /*
483 * Does this new cpu bringup a new core?
484 */
485 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
486 /*
487 * for each core in package, increment
488 * the booted_cores for this new cpu
489 */
490 if (first_cpu(cpu_sibling_map[i]) == i)
491 c[cpu].booted_cores++;
492 /*
493 * increment the core count for all
494 * the other cpus in this package
495 */
496 if (i != cpu)
497 c[i].booted_cores++;
498 } else if (i != cpu && !c[cpu].booted_cores)
499 c[cpu].booted_cores = c[i].booted_cores;
500 }
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700501 }
502}
503
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700505 * Setup code on secondary processor (after comming out of the trampoline)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700507void __cpuinit start_secondary(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508{
509 /*
510 * Dont put anything before smp_callin(), SMP
511 * booting is too fragile that we want to limit the
512 * things done here to the most necessary things.
513 */
514 cpu_init();
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800515 preempt_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 smp_callin();
517
518 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
519 barrier();
520
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
522 setup_secondary_APIC_clock();
523
Andi Kleena8ab26f2005-04-16 15:25:19 -0700524 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
526 if (nmi_watchdog == NMI_IO_APIC) {
527 disable_8259A_irq(0);
528 enable_NMI_through_LVT0(NULL);
529 enable_8259A_irq(0);
530 }
531
Andi Kleena8ab26f2005-04-16 15:25:19 -0700532 enable_APIC_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533
534 /*
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700535 * The sibling maps must be set before turing the online map on for
536 * this cpu
537 */
538 set_cpu_sibling_map(smp_processor_id());
539
Andi Kleen1eecd732005-08-19 06:56:40 +0200540 /*
541 * Wait for TSC sync to not schedule things before.
542 * We still process interrupts, which could see an inconsistent
543 * time in that window unfortunately.
544 * Do this here because TSC sync has global unprotected state.
545 */
546 tsc_sync_wait();
547
Ashok Rajcb0cd8d2005-06-25 14:55:01 -0700548 /*
Ashok Raj884d9e42005-06-25 14:55:02 -0700549 * We need to hold call_lock, so there is no inconsistency
550 * between the time smp_call_function() determines number of
551 * IPI receipients, and the time when the determination is made
552 * for which cpus receive the IPI in genapic_flat.c. Holding this
553 * lock helps us to not include this cpu in a currently in progress
554 * smp_call_function().
555 */
556 lock_ipi_call_lock();
557
558 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700559 * Allow the master to continue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 cpu_set(smp_processor_id(), cpu_online_map);
Ashok Raj884d9e42005-06-25 14:55:02 -0700562 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
563 unlock_ipi_call_lock();
564
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 cpu_idle();
566}
567
Andi Kleena8ab26f2005-04-16 15:25:19 -0700568extern volatile unsigned long init_rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569extern void (*initial_code)(void);
570
Olaf Hering44456d32005-07-27 11:45:17 -0700571#ifdef APIC_DEBUG
Andi Kleena8ab26f2005-04-16 15:25:19 -0700572static void inquire_remote_apic(int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573{
574 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
575 char *names[] = { "ID", "VERSION", "SPIV" };
576 int timeout, status;
577
578 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
579
580 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
581 printk("... APIC #%d %s: ", apicid, names[i]);
582
583 /*
584 * Wait for idle.
585 */
586 apic_wait_icr_idle();
587
Andi Kleenc1507eb2005-09-12 18:49:23 +0200588 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
589 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
591 timeout = 0;
592 do {
593 udelay(100);
594 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
595 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
596
597 switch (status) {
598 case APIC_ICR_RR_VALID:
599 status = apic_read(APIC_RRR);
600 printk("%08x\n", status);
601 break;
602 default:
603 printk("failed\n");
604 }
605 }
606}
607#endif
608
Andi Kleena8ab26f2005-04-16 15:25:19 -0700609/*
610 * Kick the secondary to wake up.
611 */
612static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613{
614 unsigned long send_status = 0, accept_status = 0;
615 int maxlvt, timeout, num_starts, j;
616
617 Dprintk("Asserting INIT.\n");
618
619 /*
620 * Turn INIT on target chip
621 */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200622 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624 /*
625 * Send IPI
626 */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200627 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 | APIC_DM_INIT);
629
630 Dprintk("Waiting for send to finish...\n");
631 timeout = 0;
632 do {
633 Dprintk("+");
634 udelay(100);
635 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
636 } while (send_status && (timeout++ < 1000));
637
638 mdelay(10);
639
640 Dprintk("Deasserting INIT.\n");
641
642 /* Target chip */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200643 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
645 /* Send IPI */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200646 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648 Dprintk("Waiting for send to finish...\n");
649 timeout = 0;
650 do {
651 Dprintk("+");
652 udelay(100);
653 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
654 } while (send_status && (timeout++ < 1000));
655
Benjamin LaHaisef2ecfab2006-01-11 22:43:03 +0100656 mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 atomic_set(&init_deasserted, 1);
658
Andi Kleen5a40b7c2005-09-12 18:49:24 +0200659 num_starts = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661 /*
662 * Run STARTUP IPI loop.
663 */
664 Dprintk("#startup loops: %d.\n", num_starts);
665
666 maxlvt = get_maxlvt();
667
668 for (j = 1; j <= num_starts; j++) {
669 Dprintk("Sending STARTUP #%d.\n",j);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 apic_write(APIC_ESR, 0);
671 apic_read(APIC_ESR);
672 Dprintk("After apic_write.\n");
673
674 /*
675 * STARTUP IPI
676 */
677
678 /* Target chip */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200679 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
681 /* Boot on the stack */
682 /* Kick the second */
Andi Kleenc1507eb2005-09-12 18:49:23 +0200683 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
685 /*
686 * Give the other CPU some time to accept the IPI.
687 */
688 udelay(300);
689
690 Dprintk("Startup point 1.\n");
691
692 Dprintk("Waiting for send to finish...\n");
693 timeout = 0;
694 do {
695 Dprintk("+");
696 udelay(100);
697 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
698 } while (send_status && (timeout++ < 1000));
699
700 /*
701 * Give the other CPU some time to accept the IPI.
702 */
703 udelay(200);
704 /*
705 * Due to the Pentium erratum 3AP.
706 */
707 if (maxlvt > 3) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 apic_write(APIC_ESR, 0);
709 }
710 accept_status = (apic_read(APIC_ESR) & 0xEF);
711 if (send_status || accept_status)
712 break;
713 }
714 Dprintk("After Startup.\n");
715
716 if (send_status)
717 printk(KERN_ERR "APIC never delivered???\n");
718 if (accept_status)
719 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
720
721 return (send_status | accept_status);
722}
723
Ashok Raj76e4f662005-06-25 14:55:00 -0700724struct create_idle {
725 struct task_struct *idle;
726 struct completion done;
727 int cpu;
728};
729
730void do_fork_idle(void *_c_idle)
731{
732 struct create_idle *c_idle = _c_idle;
733
734 c_idle->idle = fork_idle(c_idle->cpu);
735 complete(&c_idle->done);
736}
737
Andi Kleena8ab26f2005-04-16 15:25:19 -0700738/*
739 * Boot one CPU.
740 */
741static int __cpuinit do_boot_cpu(int cpu, int apicid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 unsigned long boot_error;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700744 int timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 unsigned long start_rip;
Ashok Raj76e4f662005-06-25 14:55:00 -0700746 struct create_idle c_idle = {
747 .cpu = cpu,
748 .done = COMPLETION_INITIALIZER(c_idle.done),
749 };
750 DECLARE_WORK(work, do_fork_idle, &c_idle);
751
Ravikiran G Thirumalaic11efdf2006-01-11 22:43:57 +0100752 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
753 if (!cpu_gdt_descr[cpu].address &&
754 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
755 printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
756 return -1;
757 }
758
Ravikiran G Thirumalai365ba912006-01-11 22:45:42 +0100759 /* Allocate node local memory for AP pdas */
760 if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
761 struct x8664_pda *newpda, *pda;
762 int node = cpu_to_node(cpu);
763 pda = cpu_pda(cpu);
764 newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
765 node);
766 if (newpda) {
767 memcpy(newpda, pda, sizeof (struct x8664_pda));
768 cpu_pda(cpu) = newpda;
769 } else
770 printk(KERN_ERR
771 "Could not allocate node local PDA for CPU %d on node %d\n",
772 cpu, node);
773 }
774
775
Ashok Raj76e4f662005-06-25 14:55:00 -0700776 c_idle.idle = get_idle_for_cpu(cpu);
777
778 if (c_idle.idle) {
779 c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
Al Viro57eafdc2006-01-12 01:05:39 -0800780 (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
Ashok Raj76e4f662005-06-25 14:55:00 -0700781 init_idle(c_idle.idle, cpu);
782 goto do_rest;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700783 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784
Ashok Raj76e4f662005-06-25 14:55:00 -0700785 /*
786 * During cold boot process, keventd thread is not spun up yet.
787 * When we do cpu hot-add, we create idle threads on the fly, we should
788 * not acquire any attributes from the calling context. Hence the clean
789 * way to create kernel_threads() is to do that from keventd().
790 * We do the current_is_keventd() due to the fact that ACPI notifier
791 * was also queuing to keventd() and when the caller is already running
792 * in context of keventd(), we would end up with locking up the keventd
793 * thread.
794 */
795 if (!keventd_up() || current_is_keventd())
796 work.func(work.data);
797 else {
798 schedule_work(&work);
799 wait_for_completion(&c_idle.done);
800 }
801
802 if (IS_ERR(c_idle.idle)) {
803 printk("failed fork for CPU %d\n", cpu);
804 return PTR_ERR(c_idle.idle);
805 }
806
807 set_idle_for_cpu(cpu, c_idle.idle);
808
809do_rest:
810
Ravikiran G Thirumalaidf79efd2006-01-11 22:45:39 +0100811 cpu_pda(cpu)->pcurrent = c_idle.idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
813 start_rip = setup_trampoline();
814
Ashok Raj76e4f662005-06-25 14:55:00 -0700815 init_rsp = c_idle.idle->thread.rsp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 per_cpu(init_tss,cpu).rsp0 = init_rsp;
817 initial_code = start_secondary;
Al Viroe4f17c42006-01-12 01:05:38 -0800818 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
Andi Kleende04f322005-07-28 21:15:29 -0700820 printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
821 cpus_weight(cpu_present_map),
822 apicid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823
824 /*
825 * This grunge runs the startup process for
826 * the targeted processor.
827 */
828
829 atomic_set(&init_deasserted, 0);
830
831 Dprintk("Setting warm reset code and vector.\n");
832
833 CMOS_WRITE(0xa, 0xf);
834 local_flush_tlb();
835 Dprintk("1.\n");
836 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
837 Dprintk("2.\n");
838 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
839 Dprintk("3.\n");
840
841 /*
842 * Be paranoid about clearing APIC errors.
843 */
Andi Kleen11a8e772006-01-11 22:46:51 +0100844 apic_write(APIC_ESR, 0);
845 apic_read(APIC_ESR);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846
847 /*
848 * Status is now clean
849 */
850 boot_error = 0;
851
852 /*
853 * Starting actual IPI sequence...
854 */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700855 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856
857 if (!boot_error) {
858 /*
859 * allow APs to start initializing.
860 */
861 Dprintk("Before Callout %d.\n", cpu);
862 cpu_set(cpu, cpu_callout_map);
863 Dprintk("After Callout %d.\n", cpu);
864
865 /*
866 * Wait 5s total for a response
867 */
868 for (timeout = 0; timeout < 50000; timeout++) {
869 if (cpu_isset(cpu, cpu_callin_map))
870 break; /* It has booted */
871 udelay(100);
872 }
873
874 if (cpu_isset(cpu, cpu_callin_map)) {
875 /* number CPUs logically, starting from 1 (BSP is 0) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 Dprintk("CPU has booted.\n");
877 } else {
878 boot_error = 1;
879 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
880 == 0xA5)
881 /* trampoline started but...? */
882 printk("Stuck ??\n");
883 else
884 /* trampoline code not run */
885 printk("Not responding.\n");
Olaf Hering44456d32005-07-27 11:45:17 -0700886#ifdef APIC_DEBUG
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 inquire_remote_apic(apicid);
888#endif
889 }
890 }
891 if (boot_error) {
892 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
893 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Ravikiran G Thirumalai488fc082006-02-07 12:58:23 -0800894 clear_node_cpumask(cpu); /* was set by numa_add_cpu */
Andi Kleena8ab26f2005-04-16 15:25:19 -0700895 cpu_clear(cpu, cpu_present_map);
896 cpu_clear(cpu, cpu_possible_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 x86_cpu_to_apicid[cpu] = BAD_APICID;
898 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700899 return -EIO;
900 }
901
902 return 0;
903}
904
905cycles_t cacheflush_time;
906unsigned long cache_decay_ticks;
907
908/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700909 * Cleanup possible dangling ends...
910 */
911static __cpuinit void smp_cleanup_boot(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700914 * Paranoid: Set warm reset code and vector here back
915 * to default values.
916 */
917 CMOS_WRITE(0, 0xf);
918
919 /*
920 * Reset trampoline flag
921 */
922 *((volatile int *) phys_to_virt(0x467)) = 0;
Andi Kleena8ab26f2005-04-16 15:25:19 -0700923}
924
925/*
926 * Fall back to non SMP mode after errors.
927 *
928 * RED-PEN audit/test this more. I bet there is more state messed up here.
929 */
Ashok Raje6982c62005-06-25 14:54:58 -0700930static __init void disable_smp(void)
Andi Kleena8ab26f2005-04-16 15:25:19 -0700931{
932 cpu_present_map = cpumask_of_cpu(0);
933 cpu_possible_map = cpumask_of_cpu(0);
934 if (smp_found_config)
935 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
936 else
937 phys_cpu_present_map = physid_mask_of_physid(0);
938 cpu_set(0, cpu_sibling_map[0]);
939 cpu_set(0, cpu_core_map[0]);
940}
941
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700942#ifdef CONFIG_HOTPLUG_CPU
Andi Kleen420f8f62005-11-05 17:25:54 +0100943
944int additional_cpus __initdata = -1;
945
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700946/*
947 * cpu_possible_map should be static, it cannot change as cpu's
948 * are onlined, or offlined. The reason is per-cpu data-structures
949 * are allocated by some modules at init time, and dont expect to
950 * do this dynamically on cpu arrival/departure.
951 * cpu_present_map on the other hand can change dynamically.
952 * In case when cpu_hotplug is not compiled, then we resort to current
953 * behaviour, which is cpu_possible == cpu_present.
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700954 * - Ashok Raj
Andi Kleen420f8f62005-11-05 17:25:54 +0100955 *
956 * Three ways to find out the number of additional hotplug CPUs:
957 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
Andi Kleen420f8f62005-11-05 17:25:54 +0100958 * - The user can overwrite it with additional_cpus=NUM
Andi Kleenf62a91f2006-01-11 22:42:35 +0100959 * - Otherwise don't reserve additional CPUs.
Andi Kleen420f8f62005-11-05 17:25:54 +0100960 * We do this because additional CPUs waste a lot of memory.
961 * -AK
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700962 */
Andi Kleen421c7ce2005-10-10 22:32:45 +0200963__init void prefill_possible_map(void)
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700964{
965 int i;
Andi Kleen420f8f62005-11-05 17:25:54 +0100966 int possible;
967
968 if (additional_cpus == -1) {
Andi Kleenf62a91f2006-01-11 22:42:35 +0100969 if (disabled_cpus > 0)
Andi Kleen420f8f62005-11-05 17:25:54 +0100970 additional_cpus = disabled_cpus;
Andi Kleenf62a91f2006-01-11 22:42:35 +0100971 else
972 additional_cpus = 0;
Andi Kleen420f8f62005-11-05 17:25:54 +0100973 }
974 possible = num_processors + additional_cpus;
975 if (possible > NR_CPUS)
976 possible = NR_CPUS;
977
978 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
979 possible,
980 max_t(int, possible - num_processors, 0));
981
982 for (i = 0; i < possible; i++)
Andi Kleen61b1b2d2005-07-28 21:15:27 -0700983 cpu_set(i, cpu_possible_map);
984}
985#endif
986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987/*
Andi Kleena8ab26f2005-04-16 15:25:19 -0700988 * Various sanity checks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 */
Ashok Raje6982c62005-06-25 14:54:58 -0700990static int __init smp_sanity_check(unsigned max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
993 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
994 hard_smp_processor_id());
995 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
996 }
997
998 /*
999 * If we couldn't find an SMP configuration at boot time,
1000 * get out of here now!
1001 */
1002 if (!smp_found_config) {
1003 printk(KERN_NOTICE "SMP motherboard not detected.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -07001004 disable_smp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 if (APIC_init_uniprocessor())
1006 printk(KERN_NOTICE "Local APIC not detected."
1007 " Using dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -07001008 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 }
1010
1011 /*
1012 * Should not be necessary because the MP table should list the boot
1013 * CPU too, but we do it for the sake of robustness anyway.
1014 */
1015 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
1016 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
1017 boot_cpu_id);
1018 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1019 }
1020
1021 /*
1022 * If we couldn't find a local APIC, then get out of here now!
1023 */
Andi Kleen11a8e772006-01-11 22:46:51 +01001024 if (!cpu_has_apic) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1026 boot_cpu_id);
1027 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -07001028 nr_ioapics = 0;
1029 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 }
1031
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 /*
1033 * If SMP should be disabled, then really disable it!
1034 */
1035 if (!max_cpus) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
Andi Kleena8ab26f2005-04-16 15:25:19 -07001037 nr_ioapics = 0;
1038 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 }
1040
Andi Kleena8ab26f2005-04-16 15:25:19 -07001041 return 0;
1042}
1043
1044/*
1045 * Prepare for SMP bootup. The MP table or ACPI has been read
1046 * earlier. Just do some sanity checking here and enable APIC mode.
1047 */
Ashok Raje6982c62005-06-25 14:54:58 -07001048void __init smp_prepare_cpus(unsigned int max_cpus)
Andi Kleena8ab26f2005-04-16 15:25:19 -07001049{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001050 nmi_watchdog_default();
1051 current_cpu_data = boot_cpu_data;
1052 current_thread_info()->cpu = 0; /* needed? */
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001053 set_cpu_sibling_map(0);
Andi Kleena8ab26f2005-04-16 15:25:19 -07001054
Andi Kleena8ab26f2005-04-16 15:25:19 -07001055 if (smp_sanity_check(max_cpus) < 0) {
1056 printk(KERN_INFO "SMP disabled\n");
1057 disable_smp();
1058 return;
1059 }
1060
1061
1062 /*
1063 * Switch from PIC to APIC mode.
1064 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 connect_bsp_APIC();
1066 setup_local_APIC();
1067
Andi Kleena8ab26f2005-04-16 15:25:19 -07001068 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
1069 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1070 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
1071 /* Or can we switch back to PIC here? */
1072 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073
1074 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -07001075 * Now start the IO-APICs
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 */
1077 if (!skip_ioapic_setup && nr_ioapics)
1078 setup_IO_APIC();
1079 else
1080 nr_ioapics = 0;
1081
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 /*
Andi Kleena8ab26f2005-04-16 15:25:19 -07001083 * Set up local APIC timer on boot CPU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
Andi Kleena8ab26f2005-04-16 15:25:19 -07001086 setup_boot_APIC_clock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087}
1088
Andi Kleena8ab26f2005-04-16 15:25:19 -07001089/*
1090 * Early setup to make printk work.
1091 */
1092void __init smp_prepare_boot_cpu(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001094 int me = smp_processor_id();
1095 cpu_set(me, cpu_online_map);
1096 cpu_set(me, cpu_callout_map);
Ashok Raj884d9e42005-06-25 14:55:02 -07001097 per_cpu(cpu_state, me) = CPU_ONLINE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098}
1099
Andi Kleena8ab26f2005-04-16 15:25:19 -07001100/*
1101 * Entry point to boot a CPU.
Andi Kleena8ab26f2005-04-16 15:25:19 -07001102 */
1103int __cpuinit __cpu_up(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001105 int err;
1106 int apicid = cpu_present_to_apicid(cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
Andi Kleena8ab26f2005-04-16 15:25:19 -07001108 WARN_ON(irqs_disabled());
1109
1110 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1111
1112 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
1113 !physid_isset(apicid, phys_cpu_present_map)) {
1114 printk("__cpu_up: bad cpu %d\n", cpu);
1115 return -EINVAL;
1116 }
Andi Kleena8ab26f2005-04-16 15:25:19 -07001117
Ashok Raj76e4f662005-06-25 14:55:00 -07001118 /*
1119 * Already booted CPU?
1120 */
1121 if (cpu_isset(cpu, cpu_callin_map)) {
1122 Dprintk("do_boot_cpu %d Already started\n", cpu);
1123 return -ENOSYS;
1124 }
1125
Ashok Raj884d9e42005-06-25 14:55:02 -07001126 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
Andi Kleena8ab26f2005-04-16 15:25:19 -07001127 /* Boot it! */
1128 err = do_boot_cpu(cpu, apicid);
1129 if (err < 0) {
Andi Kleena8ab26f2005-04-16 15:25:19 -07001130 Dprintk("do_boot_cpu failed %d\n", err);
1131 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 }
1133
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 /* Unleash the CPU! */
1135 Dprintk("waiting for cpu %d\n", cpu);
1136
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 while (!cpu_isset(cpu, cpu_online_map))
Andi Kleena8ab26f2005-04-16 15:25:19 -07001138 cpu_relax();
Ashok Raj76e4f662005-06-25 14:55:00 -07001139 err = 0;
1140
1141 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142}
1143
Andi Kleena8ab26f2005-04-16 15:25:19 -07001144/*
1145 * Finish the SMP boot.
1146 */
Ashok Raje6982c62005-06-25 14:54:58 -07001147void __init smp_cpus_done(unsigned int max_cpus)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148{
Andi Kleena8ab26f2005-04-16 15:25:19 -07001149 smp_cleanup_boot();
1150
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151#ifdef CONFIG_X86_IO_APIC
1152 setup_ioapic_dest();
1153#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
Andi Kleena8ab26f2005-04-16 15:25:19 -07001155 time_init_gtod();
Andi Kleen75152112005-05-16 21:53:34 -07001156
1157 check_nmi_watchdog();
Andi Kleena8ab26f2005-04-16 15:25:19 -07001158}
Ashok Raj76e4f662005-06-25 14:55:00 -07001159
1160#ifdef CONFIG_HOTPLUG_CPU
1161
Ashok Rajcb0cd8d2005-06-25 14:55:01 -07001162static void remove_siblinginfo(int cpu)
Ashok Raj76e4f662005-06-25 14:55:00 -07001163{
1164 int sibling;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001165 struct cpuinfo_x86 *c = cpu_data;
Ashok Raj76e4f662005-06-25 14:55:00 -07001166
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001167 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1168 cpu_clear(cpu, cpu_core_map[sibling]);
1169 /*
1170 * last thread sibling in this cpu core going down
1171 */
1172 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1173 c[sibling].booted_cores--;
1174 }
1175
Ashok Raj76e4f662005-06-25 14:55:00 -07001176 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1177 cpu_clear(cpu, cpu_sibling_map[sibling]);
Ashok Raj76e4f662005-06-25 14:55:00 -07001178 cpus_clear(cpu_sibling_map[cpu]);
1179 cpus_clear(cpu_core_map[cpu]);
1180 phys_proc_id[cpu] = BAD_APICID;
1181 cpu_core_id[cpu] = BAD_APICID;
Siddha, Suresh B94605ef2005-11-05 17:25:54 +01001182 cpu_clear(cpu, cpu_sibling_setup_map);
Ashok Raj76e4f662005-06-25 14:55:00 -07001183}
1184
1185void remove_cpu_from_maps(void)
1186{
1187 int cpu = smp_processor_id();
1188
1189 cpu_clear(cpu, cpu_callout_map);
1190 cpu_clear(cpu, cpu_callin_map);
1191 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
Ravikiran G Thirumalai488fc082006-02-07 12:58:23 -08001192 clear_node_cpumask(cpu);
Ashok Raj76e4f662005-06-25 14:55:00 -07001193}
1194
1195int __cpu_disable(void)
1196{
1197 int cpu = smp_processor_id();
1198
1199 /*
1200 * Perhaps use cpufreq to drop frequency, but that could go
1201 * into generic code.
1202 *
1203 * We won't take down the boot processor on i386 due to some
1204 * interrupts only being able to be serviced by the BSP.
1205 * Especially so if we're not using an IOAPIC -zwane
1206 */
1207 if (cpu == 0)
1208 return -EBUSY;
1209
Shaohua Li5e9ef022005-12-12 22:17:08 -08001210 clear_local_APIC();
Ashok Raj76e4f662005-06-25 14:55:00 -07001211
1212 /*
1213 * HACK:
1214 * Allow any queued timer interrupts to get serviced
1215 * This is only a temporary solution until we cleanup
1216 * fixup_irqs as we do for IA64.
1217 */
1218 local_irq_enable();
1219 mdelay(1);
1220
1221 local_irq_disable();
1222 remove_siblinginfo(cpu);
1223
1224 /* It's now safe to remove this processor from the online map */
1225 cpu_clear(cpu, cpu_online_map);
1226 remove_cpu_from_maps();
1227 fixup_irqs(cpu_online_map);
1228 return 0;
1229}
1230
1231void __cpu_die(unsigned int cpu)
1232{
1233 /* We don't do anything here: idle task is faking death itself. */
1234 unsigned int i;
1235
1236 for (i = 0; i < 10; i++) {
1237 /* They ack this in play_dead by setting CPU_DEAD */
Ashok Raj884d9e42005-06-25 14:55:02 -07001238 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1239 printk ("CPU %d is now offline\n", cpu);
Ashok Raj76e4f662005-06-25 14:55:00 -07001240 return;
Ashok Raj884d9e42005-06-25 14:55:02 -07001241 }
Nishanth Aravamudanef6e5252005-07-28 21:15:53 -07001242 msleep(100);
Ashok Raj76e4f662005-06-25 14:55:00 -07001243 }
1244 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1245}
1246
Andi Kleen420f8f62005-11-05 17:25:54 +01001247static __init int setup_additional_cpus(char *s)
1248{
1249 return get_option(&s, &additional_cpus);
1250}
1251__setup("additional_cpus=", setup_additional_cpus);
1252
Ashok Raj76e4f662005-06-25 14:55:00 -07001253#else /* ... !CONFIG_HOTPLUG_CPU */
1254
1255int __cpu_disable(void)
1256{
1257 return -ENOSYS;
1258}
1259
1260void __cpu_die(unsigned int cpu)
1261{
1262 /* We said "no" in __cpu_disable */
1263 BUG();
1264}
1265#endif /* CONFIG_HOTPLUG_CPU */