Thiago Jung Bauermann | a045828 | 2016-11-29 23:45:51 +1100 | [diff] [blame] | 1 | /* |
| 2 | * ppc64 code to implement the kexec_file_load syscall |
| 3 | * |
| 4 | * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) |
| 5 | * Copyright (C) 2004 IBM Corp. |
| 6 | * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation |
| 7 | * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) |
| 8 | * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) |
| 9 | * Copyright (C) 2016 IBM Corporation |
| 10 | * |
| 11 | * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. |
| 12 | * Heavily modified for the kernel by |
| 13 | * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. |
| 14 | * |
| 15 | * This program is free software; you can redistribute it and/or modify |
| 16 | * it under the terms of the GNU General Public License as published by |
| 17 | * the Free Software Foundation (version 2 of the License). |
| 18 | * |
| 19 | * This program is distributed in the hope that it will be useful, |
| 20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 22 | * GNU General Public License for more details. |
| 23 | */ |
| 24 | |
| 25 | #include <linux/slab.h> |
| 26 | #include <linux/kexec.h> |
| 27 | #include <linux/memblock.h> |
| 28 | #include <linux/of_fdt.h> |
| 29 | #include <linux/libfdt.h> |
Thiago Jung Bauermann | 467d278 | 2016-12-19 16:22:32 -0800 | [diff] [blame] | 30 | #include <asm/ima.h> |
Thiago Jung Bauermann | a045828 | 2016-11-29 23:45:51 +1100 | [diff] [blame] | 31 | |
| 32 | #define SLAVE_CODE_SIZE 256 |
| 33 | |
| 34 | static struct kexec_file_ops *kexec_file_loaders[] = { |
| 35 | &kexec_elf64_ops, |
| 36 | }; |
| 37 | |
| 38 | int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, |
| 39 | unsigned long buf_len) |
| 40 | { |
| 41 | int i, ret = -ENOEXEC; |
| 42 | struct kexec_file_ops *fops; |
| 43 | |
| 44 | /* We don't support crash kernels yet. */ |
| 45 | if (image->type == KEXEC_TYPE_CRASH) |
| 46 | return -ENOTSUPP; |
| 47 | |
| 48 | for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { |
| 49 | fops = kexec_file_loaders[i]; |
| 50 | if (!fops || !fops->probe) |
| 51 | continue; |
| 52 | |
| 53 | ret = fops->probe(buf, buf_len); |
| 54 | if (!ret) { |
| 55 | image->fops = fops; |
| 56 | return ret; |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | return ret; |
| 61 | } |
| 62 | |
| 63 | void *arch_kexec_kernel_image_load(struct kimage *image) |
| 64 | { |
| 65 | if (!image->fops || !image->fops->load) |
| 66 | return ERR_PTR(-ENOEXEC); |
| 67 | |
| 68 | return image->fops->load(image, image->kernel_buf, |
| 69 | image->kernel_buf_len, image->initrd_buf, |
| 70 | image->initrd_buf_len, image->cmdline_buf, |
| 71 | image->cmdline_buf_len); |
| 72 | } |
| 73 | |
| 74 | int arch_kimage_file_post_load_cleanup(struct kimage *image) |
| 75 | { |
| 76 | if (!image->fops || !image->fops->cleanup) |
| 77 | return 0; |
| 78 | |
| 79 | return image->fops->cleanup(image->image_loader_data); |
| 80 | } |
| 81 | |
| 82 | /** |
| 83 | * arch_kexec_walk_mem - call func(data) for each unreserved memory block |
| 84 | * @kbuf: Context info for the search. Also passed to @func. |
| 85 | * @func: Function to call for each memory block. |
| 86 | * |
| 87 | * This function is used by kexec_add_buffer and kexec_locate_mem_hole |
| 88 | * to find unreserved memory to load kexec segments into. |
| 89 | * |
| 90 | * Return: The memory walk will stop when func returns a non-zero value |
| 91 | * and that value will be returned. If all free regions are visited without |
| 92 | * func returning non-zero, then zero will be returned. |
| 93 | */ |
| 94 | int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) |
| 95 | { |
| 96 | int ret = 0; |
| 97 | u64 i; |
| 98 | phys_addr_t mstart, mend; |
| 99 | |
| 100 | if (kbuf->top_down) { |
| 101 | for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, |
| 102 | &mstart, &mend, NULL) { |
| 103 | /* |
| 104 | * In memblock, end points to the first byte after the |
| 105 | * range while in kexec, end points to the last byte |
| 106 | * in the range. |
| 107 | */ |
| 108 | ret = func(mstart, mend - 1, kbuf); |
| 109 | if (ret) |
| 110 | break; |
| 111 | } |
| 112 | } else { |
| 113 | for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, |
| 114 | NULL) { |
| 115 | /* |
| 116 | * In memblock, end points to the first byte after the |
| 117 | * range while in kexec, end points to the last byte |
| 118 | * in the range. |
| 119 | */ |
| 120 | ret = func(mstart, mend - 1, kbuf); |
| 121 | if (ret) |
| 122 | break; |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | return ret; |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * setup_purgatory - initialize the purgatory's global variables |
| 131 | * @image: kexec image. |
| 132 | * @slave_code: Slave code for the purgatory. |
| 133 | * @fdt: Flattened device tree for the next kernel. |
| 134 | * @kernel_load_addr: Address where the kernel is loaded. |
| 135 | * @fdt_load_addr: Address where the flattened device tree is loaded. |
| 136 | * |
| 137 | * Return: 0 on success, or negative errno on error. |
| 138 | */ |
| 139 | int setup_purgatory(struct kimage *image, const void *slave_code, |
| 140 | const void *fdt, unsigned long kernel_load_addr, |
| 141 | unsigned long fdt_load_addr) |
| 142 | { |
| 143 | unsigned int *slave_code_buf, master_entry; |
| 144 | int ret; |
| 145 | |
| 146 | slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); |
| 147 | if (!slave_code_buf) |
| 148 | return -ENOMEM; |
| 149 | |
| 150 | /* Get the slave code from the new kernel and put it in purgatory. */ |
| 151 | ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", |
| 152 | slave_code_buf, SLAVE_CODE_SIZE, |
| 153 | true); |
| 154 | if (ret) { |
| 155 | kfree(slave_code_buf); |
| 156 | return ret; |
| 157 | } |
| 158 | |
| 159 | master_entry = slave_code_buf[0]; |
| 160 | memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); |
| 161 | slave_code_buf[0] = master_entry; |
| 162 | ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", |
| 163 | slave_code_buf, SLAVE_CODE_SIZE, |
| 164 | false); |
| 165 | kfree(slave_code_buf); |
| 166 | |
| 167 | ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, |
| 168 | sizeof(kernel_load_addr), false); |
| 169 | if (ret) |
| 170 | return ret; |
| 171 | ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, |
| 172 | sizeof(fdt_load_addr), false); |
| 173 | if (ret) |
| 174 | return ret; |
| 175 | |
| 176 | return 0; |
| 177 | } |
| 178 | |
| 179 | /** |
| 180 | * delete_fdt_mem_rsv - delete memory reservation with given address and size |
| 181 | * |
| 182 | * Return: 0 on success, or negative errno on error. |
| 183 | */ |
Thiago Jung Bauermann | 467d278 | 2016-12-19 16:22:32 -0800 | [diff] [blame] | 184 | int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) |
Thiago Jung Bauermann | a045828 | 2016-11-29 23:45:51 +1100 | [diff] [blame] | 185 | { |
| 186 | int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); |
| 187 | |
| 188 | for (i = 0; i < num_rsvs; i++) { |
| 189 | uint64_t rsv_start, rsv_size; |
| 190 | |
| 191 | ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); |
| 192 | if (ret) { |
| 193 | pr_err("Malformed device tree.\n"); |
| 194 | return -EINVAL; |
| 195 | } |
| 196 | |
| 197 | if (rsv_start == start && rsv_size == size) { |
| 198 | ret = fdt_del_mem_rsv(fdt, i); |
| 199 | if (ret) { |
| 200 | pr_err("Error deleting device tree reservation.\n"); |
| 201 | return -EINVAL; |
| 202 | } |
| 203 | |
| 204 | return 0; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | return -ENOENT; |
| 209 | } |
| 210 | |
| 211 | /* |
| 212 | * setup_new_fdt - modify /chosen and memory reservation for the next kernel |
Thiago Jung Bauermann | ab6b1d1 | 2016-12-19 16:22:45 -0800 | [diff] [blame] | 213 | * @image: kexec image being loaded. |
Thiago Jung Bauermann | a045828 | 2016-11-29 23:45:51 +1100 | [diff] [blame] | 214 | * @fdt: Flattened device tree for the next kernel. |
| 215 | * @initrd_load_addr: Address where the next initrd will be loaded. |
| 216 | * @initrd_len: Size of the next initrd, or 0 if there will be none. |
| 217 | * @cmdline: Command line for the next kernel, or NULL if there will |
| 218 | * be none. |
| 219 | * |
| 220 | * Return: 0 on success, or negative errno on error. |
| 221 | */ |
Thiago Jung Bauermann | ab6b1d1 | 2016-12-19 16:22:45 -0800 | [diff] [blame] | 222 | int setup_new_fdt(const struct kimage *image, void *fdt, |
| 223 | unsigned long initrd_load_addr, unsigned long initrd_len, |
| 224 | const char *cmdline) |
Thiago Jung Bauermann | a045828 | 2016-11-29 23:45:51 +1100 | [diff] [blame] | 225 | { |
| 226 | int ret, chosen_node; |
| 227 | const void *prop; |
| 228 | |
| 229 | /* Remove memory reservation for the current device tree. */ |
| 230 | ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), |
| 231 | fdt_totalsize(initial_boot_params)); |
| 232 | if (ret == 0) |
| 233 | pr_debug("Removed old device tree reservation.\n"); |
| 234 | else if (ret != -ENOENT) |
| 235 | return ret; |
| 236 | |
| 237 | chosen_node = fdt_path_offset(fdt, "/chosen"); |
| 238 | if (chosen_node == -FDT_ERR_NOTFOUND) { |
| 239 | chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), |
| 240 | "chosen"); |
| 241 | if (chosen_node < 0) { |
| 242 | pr_err("Error creating /chosen.\n"); |
| 243 | return -EINVAL; |
| 244 | } |
| 245 | } else if (chosen_node < 0) { |
| 246 | pr_err("Malformed device tree: error reading /chosen.\n"); |
| 247 | return -EINVAL; |
| 248 | } |
| 249 | |
| 250 | /* Did we boot using an initrd? */ |
| 251 | prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); |
| 252 | if (prop) { |
| 253 | uint64_t tmp_start, tmp_end, tmp_size; |
| 254 | |
| 255 | tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); |
| 256 | |
| 257 | prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); |
| 258 | if (!prop) { |
| 259 | pr_err("Malformed device tree.\n"); |
| 260 | return -EINVAL; |
| 261 | } |
| 262 | tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); |
| 263 | |
| 264 | /* |
| 265 | * kexec reserves exact initrd size, while firmware may |
| 266 | * reserve a multiple of PAGE_SIZE, so check for both. |
| 267 | */ |
| 268 | tmp_size = tmp_end - tmp_start; |
| 269 | ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); |
| 270 | if (ret == -ENOENT) |
| 271 | ret = delete_fdt_mem_rsv(fdt, tmp_start, |
| 272 | round_up(tmp_size, PAGE_SIZE)); |
| 273 | if (ret == 0) |
| 274 | pr_debug("Removed old initrd reservation.\n"); |
| 275 | else if (ret != -ENOENT) |
| 276 | return ret; |
| 277 | |
| 278 | /* If there's no new initrd, delete the old initrd's info. */ |
| 279 | if (initrd_len == 0) { |
| 280 | ret = fdt_delprop(fdt, chosen_node, |
| 281 | "linux,initrd-start"); |
| 282 | if (ret) { |
| 283 | pr_err("Error deleting linux,initrd-start.\n"); |
| 284 | return -EINVAL; |
| 285 | } |
| 286 | |
| 287 | ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); |
| 288 | if (ret) { |
| 289 | pr_err("Error deleting linux,initrd-end.\n"); |
| 290 | return -EINVAL; |
| 291 | } |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | if (initrd_len) { |
| 296 | ret = fdt_setprop_u64(fdt, chosen_node, |
| 297 | "linux,initrd-start", |
| 298 | initrd_load_addr); |
| 299 | if (ret < 0) { |
| 300 | pr_err("Error setting up the new device tree.\n"); |
| 301 | return -EINVAL; |
| 302 | } |
| 303 | |
| 304 | /* initrd-end is the first address after the initrd image. */ |
| 305 | ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", |
| 306 | initrd_load_addr + initrd_len); |
| 307 | if (ret < 0) { |
| 308 | pr_err("Error setting up the new device tree.\n"); |
| 309 | return -EINVAL; |
| 310 | } |
| 311 | |
| 312 | ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); |
| 313 | if (ret) { |
| 314 | pr_err("Error reserving initrd memory: %s\n", |
| 315 | fdt_strerror(ret)); |
| 316 | return -EINVAL; |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | if (cmdline != NULL) { |
| 321 | ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); |
| 322 | if (ret < 0) { |
| 323 | pr_err("Error setting up the new device tree.\n"); |
| 324 | return -EINVAL; |
| 325 | } |
| 326 | } else { |
| 327 | ret = fdt_delprop(fdt, chosen_node, "bootargs"); |
| 328 | if (ret && ret != -FDT_ERR_NOTFOUND) { |
| 329 | pr_err("Error deleting bootargs.\n"); |
| 330 | return -EINVAL; |
| 331 | } |
| 332 | } |
| 333 | |
Thiago Jung Bauermann | ab6b1d1 | 2016-12-19 16:22:45 -0800 | [diff] [blame] | 334 | ret = setup_ima_buffer(image, fdt, chosen_node); |
| 335 | if (ret) { |
| 336 | pr_err("Error setting up the new device tree.\n"); |
| 337 | return ret; |
| 338 | } |
Thiago Jung Bauermann | 467d278 | 2016-12-19 16:22:32 -0800 | [diff] [blame] | 339 | |
Thiago Jung Bauermann | a045828 | 2016-11-29 23:45:51 +1100 | [diff] [blame] | 340 | ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); |
| 341 | if (ret) { |
| 342 | pr_err("Error setting up the new device tree.\n"); |
| 343 | return -EINVAL; |
| 344 | } |
| 345 | |
| 346 | return 0; |
| 347 | } |