[S390] 1K/2K page table pages.

This patch implements 1K/2K page table pages for s390.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 983ec6e..01dfe20 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -184,7 +184,7 @@
 		pmd = pmd_offset(pud, address);
 		pte = pte_offset_kernel(pmd, address);
 		if (!enable) {
-			ptep_invalidate(address, pte);
+			ptep_invalidate(&init_mm, address, pte);
 			continue;
 		}
 		*pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 019f518..809e7789 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -26,8 +26,14 @@
 
 #ifndef CONFIG_64BIT
 #define ALLOC_ORDER	1
+#define TABLES_PER_PAGE	4
+#define FRAG_MASK	15UL
+#define SECOND_HALVES	10UL
 #else
 #define ALLOC_ORDER	2
+#define TABLES_PER_PAGE	2
+#define FRAG_MASK	3UL
+#define SECOND_HALVES	2UL
 #endif
 
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -45,13 +51,20 @@
 		}
 		page->index = page_to_phys(shadow);
 	}
+	spin_lock(&mm->page_table_lock);
+	list_add(&page->lru, &mm->context.crst_list);
+	spin_unlock(&mm->page_table_lock);
 	return (unsigned long *) page_to_phys(page);
 }
 
-void crst_table_free(unsigned long *table)
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
 {
 	unsigned long *shadow = get_shadow_table(table);
+	struct page *page = virt_to_page(table);
 
+	spin_lock(&mm->page_table_lock);
+	list_del(&page->lru);
+	spin_unlock(&mm->page_table_lock);
 	if (shadow)
 		free_pages((unsigned long) shadow, ALLOC_ORDER);
 	free_pages((unsigned long) table, ALLOC_ORDER);
@@ -60,37 +73,84 @@
 /*
  * page table entry allocation/free routines.
  */
-unsigned long *page_table_alloc(int noexec)
+unsigned long *page_table_alloc(struct mm_struct *mm)
 {
-	struct page *page = alloc_page(GFP_KERNEL);
+	struct page *page;
 	unsigned long *table;
+	unsigned long bits;
 
-	if (!page)
-		return NULL;
-	page->index = 0;
-	if (noexec) {
-		struct page *shadow = alloc_page(GFP_KERNEL);
-		if (!shadow) {
-			__free_page(page);
-			return NULL;
-		}
-		table = (unsigned long *) page_to_phys(shadow);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
-		page->index = (addr_t) table;
+	bits = mm->context.noexec ? 3UL : 1UL;
+	spin_lock(&mm->page_table_lock);
+	page = NULL;
+	if (!list_empty(&mm->context.pgtable_list)) {
+		page = list_first_entry(&mm->context.pgtable_list,
+					struct page, lru);
+		if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
+			page = NULL;
 	}
-	pgtable_page_ctor(page);
+	if (!page) {
+		spin_unlock(&mm->page_table_lock);
+		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+		if (!page)
+			return NULL;
+		pgtable_page_ctor(page);
+		page->flags &= ~FRAG_MASK;
+		table = (unsigned long *) page_to_phys(page);
+		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		spin_lock(&mm->page_table_lock);
+		list_add(&page->lru, &mm->context.pgtable_list);
+	}
 	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+	while (page->flags & bits) {
+		table += 256;
+		bits <<= 1;
+	}
+	page->flags |= bits;
+	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
+		list_move_tail(&page->lru, &mm->context.pgtable_list);
+	spin_unlock(&mm->page_table_lock);
 	return table;
 }
 
-void page_table_free(unsigned long *table)
+void page_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	unsigned long *shadow = get_shadow_pte(table);
+	struct page *page;
+	unsigned long bits;
 
-	pgtable_page_dtor(virt_to_page(table));
-	if (shadow)
-		free_page((unsigned long) shadow);
-	free_page((unsigned long) table);
+	bits = mm->context.noexec ? 3UL : 1UL;
+	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	spin_lock(&mm->page_table_lock);
+	page->flags ^= bits;
+	if (page->flags & FRAG_MASK) {
+		/* Page now has some free pgtable fragments. */
+		list_move(&page->lru, &mm->context.pgtable_list);
+		page = NULL;
+	} else
+		/* All fragments of the 4K page have been freed. */
+		list_del(&page->lru);
+	spin_unlock(&mm->page_table_lock);
+	if (page) {
+		pgtable_page_dtor(page);
+		__free_page(page);
+	}
+}
 
+void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
+{
+	struct page *page;
+
+	spin_lock(&mm->page_table_lock);
+	/* Free shadow region and segment tables. */
+	list_for_each_entry(page, &mm->context.crst_list, lru)
+		if (page->index) {
+			free_pages((unsigned long) page->index, ALLOC_ORDER);
+			page->index = 0;
+		}
+	/* "Free" second halves of page tables. */
+	list_for_each_entry(page, &mm->context.pgtable_list, lru)
+		page->flags &= ~SECOND_HALVES;
+	spin_unlock(&mm->page_table_lock);
+	mm->context.noexec = 0;
+	update_mm(mm, tsk);
 }
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 7c1287c..434491f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -84,13 +84,18 @@
 	return pmd;
 }
 
-static inline pte_t *vmem_pte_alloc(void)
+static pte_t __init_refok *vmem_pte_alloc(void)
 {
-	pte_t *pte = vmem_alloc_pages(0);
+	pte_t *pte;
 
+	if (slab_is_available())
+		pte = (pte_t *) page_table_alloc(&init_mm);
+	else
+		pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
 	if (!pte)
 		return NULL;
-	clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+	clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+		    PTRS_PER_PTE * sizeof(pte_t));
 	return pte;
 }
 
@@ -360,6 +365,9 @@
 {
 	int i;
 
+	INIT_LIST_HEAD(&init_mm.context.crst_list);
+	INIT_LIST_HEAD(&init_mm.context.pgtable_list);
+	init_mm.context.noexec = 0;
 	NODE_DATA(0)->node_mem_map = VMEM_MAP;
 	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
 		vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);