| /*  | 
 |  * Copyright 2002 Andi Kleen, SuSE Labs.  | 
 |  * Thanks to Ben LaHaise for precious feedback. | 
 |  */  | 
 |  | 
 | #include <linux/mm.h> | 
 | #include <linux/sched.h> | 
 | #include <linux/highmem.h> | 
 | #include <linux/module.h> | 
 | #include <linux/slab.h> | 
 | #include <asm/uaccess.h> | 
 | #include <asm/processor.h> | 
 | #include <asm/tlbflush.h> | 
 | #include <asm/pgalloc.h> | 
 | #include <asm/sections.h> | 
 |  | 
 | static DEFINE_SPINLOCK(cpa_lock); | 
 | static struct list_head df_list = LIST_HEAD_INIT(df_list); | 
 |  | 
 |  | 
 | pte_t *lookup_address(unsigned long address)  | 
 | {  | 
 | 	pgd_t *pgd = pgd_offset_k(address); | 
 | 	pud_t *pud; | 
 | 	pmd_t *pmd; | 
 | 	if (pgd_none(*pgd)) | 
 | 		return NULL; | 
 | 	pud = pud_offset(pgd, address); | 
 | 	if (pud_none(*pud)) | 
 | 		return NULL; | 
 | 	pmd = pmd_offset(pud, address); | 
 | 	if (pmd_none(*pmd)) | 
 | 		return NULL; | 
 | 	if (pmd_large(*pmd)) | 
 | 		return (pte_t *)pmd; | 
 |         return pte_offset_kernel(pmd, address); | 
 | }  | 
 |  | 
 | static struct page *split_large_page(unsigned long address, pgprot_t prot, | 
 | 					pgprot_t ref_prot) | 
 | {  | 
 | 	int i;  | 
 | 	unsigned long addr; | 
 | 	struct page *base; | 
 | 	pte_t *pbase; | 
 |  | 
 | 	spin_unlock_irq(&cpa_lock); | 
 | 	base = alloc_pages(GFP_KERNEL, 0); | 
 | 	spin_lock_irq(&cpa_lock); | 
 | 	if (!base)  | 
 | 		return NULL; | 
 |  | 
 | 	/* | 
 | 	 * page_private is used to track the number of entries in | 
 | 	 * the page table page that have non standard attributes. | 
 | 	 */ | 
 | 	SetPagePrivate(base); | 
 | 	page_private(base) = 0; | 
 |  | 
 | 	address = __pa(address); | 
 | 	addr = address & LARGE_PAGE_MASK;  | 
 | 	pbase = (pte_t *)page_address(base); | 
 | 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | 
 |                set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, | 
 |                                           addr == address ? prot : ref_prot)); | 
 | 	} | 
 | 	return base; | 
 | }  | 
 |  | 
 | static void flush_kernel_map(void *dummy)  | 
 | {  | 
 | 	/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ | 
 | 	if (boot_cpu_data.x86_model >= 4)  | 
 | 		wbinvd(); | 
 | 	/* Flush all to work around Errata in early athlons regarding  | 
 | 	 * large page flushing.  | 
 | 	 */ | 
 | 	__flush_tlb_all(); 	 | 
 | } | 
 |  | 
 | static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)  | 
 | {  | 
 | 	struct page *page; | 
 | 	unsigned long flags; | 
 |  | 
 | 	set_pte_atomic(kpte, pte); 	/* change init_mm */ | 
 | 	if (PTRS_PER_PMD > 1) | 
 | 		return; | 
 |  | 
 | 	spin_lock_irqsave(&pgd_lock, flags); | 
 | 	for (page = pgd_list; page; page = (struct page *)page->index) { | 
 | 		pgd_t *pgd; | 
 | 		pud_t *pud; | 
 | 		pmd_t *pmd; | 
 | 		pgd = (pgd_t *)page_address(page) + pgd_index(address); | 
 | 		pud = pud_offset(pgd, address); | 
 | 		pmd = pmd_offset(pud, address); | 
 | 		set_pte_atomic((pte_t *)pmd, pte); | 
 | 	} | 
 | 	spin_unlock_irqrestore(&pgd_lock, flags); | 
 | } | 
 |  | 
 | /*  | 
 |  * No more special protections in this 2/4MB area - revert to a | 
 |  * large page again.  | 
 |  */ | 
 | static inline void revert_page(struct page *kpte_page, unsigned long address) | 
 | { | 
 | 	pgprot_t ref_prot; | 
 | 	pte_t *linear; | 
 |  | 
 | 	ref_prot = | 
 | 	((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) | 
 | 		? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; | 
 |  | 
 | 	linear = (pte_t *) | 
 | 		pmd_offset(pud_offset(pgd_offset_k(address), address), address); | 
 | 	set_pmd_pte(linear,  address, | 
 | 		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, | 
 | 			    ref_prot)); | 
 | } | 
 |  | 
 | static int | 
 | __change_page_attr(struct page *page, pgprot_t prot) | 
 | {  | 
 | 	pte_t *kpte;  | 
 | 	unsigned long address; | 
 | 	struct page *kpte_page; | 
 |  | 
 | 	BUG_ON(PageHighMem(page)); | 
 | 	address = (unsigned long)page_address(page); | 
 |  | 
 | 	kpte = lookup_address(address); | 
 | 	if (!kpte) | 
 | 		return -EINVAL; | 
 | 	kpte_page = virt_to_page(kpte); | 
 | 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {  | 
 | 		if ((pte_val(*kpte) & _PAGE_PSE) == 0) {  | 
 | 			set_pte_atomic(kpte, mk_pte(page, prot));  | 
 | 		} else { | 
 | 			pgprot_t ref_prot; | 
 | 			struct page *split; | 
 |  | 
 | 			ref_prot = | 
 | 			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) | 
 | 				? PAGE_KERNEL_EXEC : PAGE_KERNEL; | 
 | 			split = split_large_page(address, prot, ref_prot); | 
 | 			if (!split) | 
 | 				return -ENOMEM; | 
 | 			set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); | 
 | 			kpte_page = split; | 
 | 		} | 
 | 		page_private(kpte_page)++; | 
 | 	} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {  | 
 | 		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); | 
 | 		BUG_ON(page_private(kpte_page) == 0); | 
 | 		page_private(kpte_page)--; | 
 | 	} else | 
 | 		BUG(); | 
 |  | 
 | 	/* | 
 | 	 * If the pte was reserved, it means it was created at boot | 
 | 	 * time (not via split_large_page) and in turn we must not | 
 | 	 * replace it with a largepage. | 
 | 	 */ | 
 | 	if (!PageReserved(kpte_page)) { | 
 | 		if (cpu_has_pse && (page_private(kpte_page) == 0)) { | 
 | 			ClearPagePrivate(kpte_page); | 
 | 			list_add(&kpte_page->lru, &df_list); | 
 | 			revert_page(kpte_page, address); | 
 | 		} | 
 | 	} | 
 | 	return 0; | 
 | }  | 
 |  | 
 | static inline void flush_map(void) | 
 | { | 
 | 	on_each_cpu(flush_kernel_map, NULL, 1, 1); | 
 | } | 
 |  | 
 | /* | 
 |  * Change the page attributes of an page in the linear mapping. | 
 |  * | 
 |  * This should be used when a page is mapped with a different caching policy | 
 |  * than write-back somewhere - some CPUs do not like it when mappings with | 
 |  * different caching policies exist. This changes the page attributes of the | 
 |  * in kernel linear mapping too. | 
 |  *  | 
 |  * The caller needs to ensure that there are no conflicting mappings elsewhere. | 
 |  * This function only deals with the kernel linear map. | 
 |  *  | 
 |  * Caller must call global_flush_tlb() after this. | 
 |  */ | 
 | int change_page_attr(struct page *page, int numpages, pgprot_t prot) | 
 | { | 
 | 	int err = 0;  | 
 | 	int i;  | 
 | 	unsigned long flags; | 
 |  | 
 | 	spin_lock_irqsave(&cpa_lock, flags); | 
 | 	for (i = 0; i < numpages; i++, page++) {  | 
 | 		err = __change_page_attr(page, prot); | 
 | 		if (err)  | 
 | 			break;  | 
 | 	} 	 | 
 | 	spin_unlock_irqrestore(&cpa_lock, flags); | 
 | 	return err; | 
 | } | 
 |  | 
 | void global_flush_tlb(void) | 
 | { | 
 | 	struct list_head l; | 
 | 	struct page *pg, *next; | 
 |  | 
 | 	BUG_ON(irqs_disabled()); | 
 |  | 
 | 	spin_lock_irq(&cpa_lock); | 
 | 	list_replace_init(&df_list, &l); | 
 | 	spin_unlock_irq(&cpa_lock); | 
 | 	flush_map(); | 
 | 	list_for_each_entry_safe(pg, next, &l, lru) | 
 | 		__free_page(pg); | 
 | } | 
 |  | 
 | #ifdef CONFIG_DEBUG_PAGEALLOC | 
 | void kernel_map_pages(struct page *page, int numpages, int enable) | 
 | { | 
 | 	if (PageHighMem(page)) | 
 | 		return; | 
 | 	if (!enable) | 
 | 		debug_check_no_locks_freed(page_address(page), | 
 | 					   numpages * PAGE_SIZE); | 
 |  | 
 | 	/* the return value is ignored - the calls cannot fail, | 
 | 	 * large pages are disabled at boot time. | 
 | 	 */ | 
 | 	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); | 
 | 	/* we should perform an IPI and flush all tlbs, | 
 | 	 * but that can deadlock->flush only current cpu. | 
 | 	 */ | 
 | 	__flush_tlb_all(); | 
 | } | 
 | #endif | 
 |  | 
 | EXPORT_SYMBOL(change_page_attr); | 
 | EXPORT_SYMBOL(global_flush_tlb); |