Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Intel specific MCE features. |
| 3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 4 | * Copyright (C) 2008, 2009 Intel Corporation |
| 5 | * Author: Andi Kleen |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 6 | */ |
| 7 | |
Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 8 | #include <linux/gfp.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 | #include <linux/init.h> |
| 10 | #include <linux/interrupt.h> |
| 11 | #include <linux/percpu.h> |
Alexey Dobriyan | d43c36d | 2009-10-07 17:09:06 +0400 | [diff] [blame] | 12 | #include <linux/sched.h> |
H. Peter Anvin | 1bf7b31 | 2009-06-17 08:31:15 -0700 | [diff] [blame] | 13 | #include <asm/apic.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 14 | #include <asm/processor.h> |
| 15 | #include <asm/msr.h> |
| 16 | #include <asm/mce.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 18 | /* |
| 19 | * Support for Intel Correct Machine Check Interrupts. This allows |
| 20 | * the CPU to raise an interrupt when a corrected machine check happened. |
| 21 | * Normally we pick those up using a regular polling timer. |
| 22 | * Also supports reliable discovery of shared banks. |
| 23 | */ |
| 24 | |
| 25 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
| 26 | |
| 27 | /* |
| 28 | * cmci_discover_lock protects against parallel discovery attempts |
| 29 | * which could race against each other. |
| 30 | */ |
| 31 | static DEFINE_SPINLOCK(cmci_discover_lock); |
| 32 | |
| 33 | #define CMCI_THRESHOLD 1 |
| 34 | |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 35 | static int cmci_supported(int *banks) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 36 | { |
| 37 | u64 cap; |
| 38 | |
Hidetoshi Seto | 62fdac5 | 2009-06-11 16:06:07 +0900 | [diff] [blame] | 39 | if (mce_cmci_disabled || mce_ignore_ce) |
| 40 | return 0; |
| 41 | |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 42 | /* |
| 43 | * Vendor check is not strictly needed, but the initial |
| 44 | * initialization is vendor keyed and this |
| 45 | * makes sure none of the backdoors are entered otherwise. |
| 46 | */ |
| 47 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
| 48 | return 0; |
| 49 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) |
| 50 | return 0; |
| 51 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
| 52 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); |
| 53 | return !!(cap & MCG_CMCI_P); |
| 54 | } |
| 55 | |
| 56 | /* |
| 57 | * The interrupt handler. This is called on every event. |
| 58 | * Just call the poller directly to log any events. |
| 59 | * This could in theory increase the threshold under high load, |
| 60 | * but doesn't for now. |
| 61 | */ |
| 62 | static void intel_threshold_interrupt(void) |
| 63 | { |
| 64 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
Andi Kleen | 9ff36ee | 2009-05-27 21:56:58 +0200 | [diff] [blame] | 65 | mce_notify_irq(); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 66 | } |
| 67 | |
| 68 | static void print_update(char *type, int *hdr, int num) |
| 69 | { |
| 70 | if (*hdr == 0) |
| 71 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); |
| 72 | *hdr = 1; |
| 73 | printk(KERN_CONT " %s:%d", type, num); |
| 74 | } |
| 75 | |
| 76 | /* |
| 77 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
| 78 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
| 79 | * banks. |
| 80 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 81 | static void cmci_discover(int banks, int boot) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 82 | { |
| 83 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 84 | unsigned long flags; |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 85 | int hdr = 0; |
| 86 | int i; |
| 87 | |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 88 | spin_lock_irqsave(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 89 | for (i = 0; i < banks; i++) { |
| 90 | u64 val; |
| 91 | |
| 92 | if (test_bit(i, owned)) |
| 93 | continue; |
| 94 | |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 96 | |
| 97 | /* Already owned by someone else? */ |
Huang Ying | 1f9a0bd | 2010-06-08 14:09:08 +0800 | [diff] [blame] | 98 | if (val & MCI_CTL2_CMCI_EN) { |
Mike Travis | 10fb7f1 | 2010-03-05 13:10:36 -0600 | [diff] [blame] | 99 | if (test_and_clear_bit(i, owned) && !boot) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 100 | print_update("SHD", &hdr, i); |
| 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 102 | continue; |
| 103 | } |
| 104 | |
Huang Ying | 3c41758 | 2010-06-08 14:09:10 +0800 | [diff] [blame] | 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
Huang Ying | 1f9a0bd | 2010-06-08 14:09:08 +0800 | [diff] [blame] | 106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 109 | |
| 110 | /* Did the enable bit stick? -- the bank supports CMCI */ |
Huang Ying | 1f9a0bd | 2010-06-08 14:09:08 +0800 | [diff] [blame] | 111 | if (val & MCI_CTL2_CMCI_EN) { |
Mike Travis | 10fb7f1 | 2010-03-05 13:10:36 -0600 | [diff] [blame] | 112 | if (!test_and_set_bit(i, owned) && !boot) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 113 | print_update("CMCI", &hdr, i); |
| 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 115 | } else { |
| 116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
| 117 | } |
| 118 | } |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 119 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 120 | if (hdr) |
| 121 | printk(KERN_CONT "\n"); |
| 122 | } |
| 123 | |
| 124 | /* |
| 125 | * Just in case we missed an event during initialization check |
| 126 | * all the CMCI owned banks. |
| 127 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 128 | void cmci_recheck(void) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 129 | { |
| 130 | unsigned long flags; |
| 131 | int banks; |
| 132 | |
Tejun Heo | 7b543a5 | 2010-12-18 16:30:05 +0100 | [diff] [blame] | 133 | if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 134 | return; |
| 135 | local_irq_save(flags); |
| 136 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
| 137 | local_irq_restore(flags); |
| 138 | } |
| 139 | |
| 140 | /* |
| 141 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
| 142 | * This allows other CPUs to claim the banks on rediscovery. |
| 143 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 144 | void cmci_clear(void) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 145 | { |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 146 | unsigned long flags; |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 147 | int i; |
| 148 | int banks; |
| 149 | u64 val; |
| 150 | |
| 151 | if (!cmci_supported(&banks)) |
| 152 | return; |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 153 | spin_lock_irqsave(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 154 | for (i = 0; i < banks; i++) { |
| 155 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) |
| 156 | continue; |
| 157 | /* Disable CMCI */ |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
Huang Ying | 1f9a0bd | 2010-06-08 14:09:08 +0800 | [diff] [blame] | 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
| 162 | } |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 163 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 164 | } |
| 165 | |
| 166 | /* |
| 167 | * After a CPU went down cycle through all the others and rediscover |
| 168 | * Must run in process context. |
| 169 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 170 | void cmci_rediscover(int dying) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 171 | { |
| 172 | int banks; |
| 173 | int cpu; |
| 174 | cpumask_var_t old; |
| 175 | |
| 176 | if (!cmci_supported(&banks)) |
| 177 | return; |
| 178 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) |
| 179 | return; |
| 180 | cpumask_copy(old, ¤t->cpus_allowed); |
| 181 | |
Hidetoshi Seto | 61a021a | 2009-04-14 17:09:04 +0900 | [diff] [blame] | 182 | for_each_online_cpu(cpu) { |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 183 | if (cpu == dying) |
| 184 | continue; |
Rusty Russell | 4f06289 | 2009-03-13 14:49:54 +1030 | [diff] [blame] | 185 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 186 | continue; |
| 187 | /* Recheck banks in case CPUs don't all have the same */ |
| 188 | if (cmci_supported(&banks)) |
| 189 | cmci_discover(banks, 0); |
| 190 | } |
| 191 | |
| 192 | set_cpus_allowed_ptr(current, old); |
| 193 | free_cpumask_var(old); |
| 194 | } |
| 195 | |
| 196 | /* |
| 197 | * Reenable CMCI on this CPU in case a CPU down failed. |
| 198 | */ |
| 199 | void cmci_reenable(void) |
| 200 | { |
| 201 | int banks; |
| 202 | if (cmci_supported(&banks)) |
| 203 | cmci_discover(banks, 0); |
| 204 | } |
| 205 | |
Hidetoshi Seto | 514ec49 | 2009-03-16 17:07:33 +0900 | [diff] [blame] | 206 | static void intel_init_cmci(void) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 207 | { |
| 208 | int banks; |
| 209 | |
| 210 | if (!cmci_supported(&banks)) |
| 211 | return; |
| 212 | |
| 213 | mce_threshold_vector = intel_threshold_interrupt; |
| 214 | cmci_discover(banks, 1); |
| 215 | /* |
| 216 | * For CPU #0 this runs with still disabled APIC, but that's |
| 217 | * ok because only the vector is set up. We still do another |
| 218 | * check for the banks later for CPU #0 just to make sure |
| 219 | * to not miss any events. |
| 220 | */ |
| 221 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); |
| 222 | cmci_recheck(); |
| 223 | } |
| 224 | |
H. Peter Anvin | cc3ca22 | 2009-02-20 23:35:51 -0800 | [diff] [blame] | 225 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 226 | { |
| 227 | intel_init_thermal(c); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 228 | intel_init_cmci(); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | } |