[PATCH] mm: init_mm without ptlock
First step in pushing down the page_table_lock. init_mm.page_table_lock has
been used throughout the architectures (usually for ioremap): not to serialize
kernel address space allocation (that's usually vmlist_lock), but because
pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it.
Reverse that: don't lock or unlock init_mm.page_table_lock in any of the
architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take
and drop it when allocating a new one, to check lest a racing task already
did. Similarly no page_table_lock in vmalloc's map_vm_area.
Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle
user mms, which are converted only by a later patch, for now they have to lock
differently according to whether or not it's init_mm.
If sources get muddled, there's a danger that an arch source taking
init_mm.page_table_lock will be mixed with common source also taking it (or
neither take it). So break the rules and make another change, which should
break the build for such a mismatch: remove the redundant mm arg from
pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13).
Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64
used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to
pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64
map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free
took page_table_lock for no good reason.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/alpha/mm/remap.c b/arch/alpha/mm/remap.c
index 19817ad..a78356c 100644
--- a/arch/alpha/mm/remap.c
+++ b/arch/alpha/mm/remap.c
@@ -2,7 +2,6 @@
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
-/* called with the page_table_lock held */
static inline void
remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
unsigned long phys_addr, unsigned long flags)
@@ -31,7 +30,6 @@
} while (address && (address < end));
}
-/* called with the page_table_lock held */
static inline int
remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
unsigned long phys_addr, unsigned long flags)
@@ -46,7 +44,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address,
@@ -70,7 +68,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd;
pmd = pmd_alloc(&init_mm, dir, address);
@@ -84,7 +81,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
return error;
}
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 82f4d5e..47b0b76 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -397,8 +397,6 @@
pte_t *pte;
int ret = 0;
- spin_lock(&init_mm.page_table_lock);
-
do {
pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -409,7 +407,7 @@
}
WARN_ON(!pmd_none(*pmd));
- pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+ pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
if (!pte) {
printk(KERN_ERR "%s: no pte tables\n", __func__);
ret = -ENOMEM;
@@ -419,8 +417,6 @@
consistent_pte = pte;
} while (0);
- spin_unlock(&init_mm.page_table_lock);
-
return ret;
}
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6fb1258..0f128c2 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -75,7 +75,7 @@
pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags);
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, pgprot);
@@ -97,7 +97,6 @@
phys_addr -= address;
dir = pgd_offset(&init_mm, address);
BUG_ON(address >= end);
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
if (!pmd) {
@@ -114,7 +113,6 @@
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_cache_vmap(start, end);
return err;
}
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
index 8e8a2bb2..d6b008b 100644
--- a/arch/arm26/mm/memc.c
+++ b/arch/arm26/mm/memc.c
@@ -92,7 +92,7 @@
if (!new_pmd)
goto no_pmd;
- new_pte = pte_alloc_kernel(mm, new_pmd, 0);
+ new_pte = pte_alloc_map(mm, new_pmd, 0);
if (!new_pte)
goto no_pte;
@@ -101,6 +101,7 @@
init_pte = pte_offset(init_pmd, 0);
set_pte(new_pte, *init_pte);
+ pte_unmap(new_pte);
/*
* the page table entries are zeroed
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index ebba11e..a92ac98 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -52,7 +52,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, prot);
@@ -74,7 +74,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pud_t *pud;
pmd_t *pmd;
@@ -94,7 +93,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return error;
}
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index cfc4f97..342823a 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -55,21 +55,18 @@
pte_t *pte;
int err = -ENOMEM;
- spin_lock(&init_mm.page_table_lock);
-
/* Use upper 10 bits of VA to index the first level map */
pge = pgd_offset_k(va);
pue = pud_offset(pge, va);
pme = pmd_offset(pue, va);
/* Use middle 10 bits of VA to index the second-level map */
- pte = pte_alloc_kernel(&init_mm, pme, va);
+ pte = pte_alloc_kernel(pme, va);
if (pte != 0) {
err = 0;
set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot));
}
- spin_unlock(&init_mm.page_table_lock);
return err;
}
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index f379b8d..5d09de8 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -28,7 +28,7 @@
unsigned long pfn;
pfn = phys_addr >> PAGE_SHIFT;
- pte = pte_alloc_kernel(&init_mm, pmd, addr);
+ pte = pte_alloc_kernel(pmd, addr);
if (!pte)
return -ENOMEM;
do {
@@ -87,14 +87,12 @@
flush_cache_all();
phys_addr -= addr;
pgd = pgd_offset_k(addr);
- spin_lock(&init_mm.page_table_lock);
do {
next = pgd_addr_end(addr, end);
err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
if (err)
break;
} while (pgd++, addr = next, addr != end);
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return err;
}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0063b2c..e3215ba 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -275,26 +275,21 @@
pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
- spin_lock(&init_mm.page_table_lock);
{
pud = pud_alloc(&init_mm, pgd, address);
if (!pud)
goto out;
-
pmd = pmd_alloc(&init_mm, pud, address);
if (!pmd)
goto out;
- pte = pte_alloc_map(&init_mm, pmd, address);
+ pte = pte_alloc_kernel(pmd, address);
if (!pte)
goto out;
- if (!pte_none(*pte)) {
- pte_unmap(pte);
+ if (!pte_none(*pte))
goto out;
- }
set_pte(pte, mk_pte(page, pgprot));
- pte_unmap(pte);
}
- out: spin_unlock(&init_mm.page_table_lock);
+ out:
/* no need for flush_tlb */
return page;
}
diff --git a/arch/m32r/mm/ioremap.c b/arch/m32r/mm/ioremap.c
index 70c5905..a151849 100644
--- a/arch/m32r/mm/ioremap.c
+++ b/arch/m32r/mm/ioremap.c
@@ -67,7 +67,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -90,7 +90,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd;
pmd = pmd_alloc(&init_mm, dir, address);
@@ -104,7 +103,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return error;
}
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 5dcb3fa..fe2383e 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -201,7 +201,7 @@
virtaddr += PTRTREESIZE;
size -= PTRTREESIZE;
} else {
- pte_dir = pte_alloc_kernel(&init_mm, pmd_dir, virtaddr);
+ pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
if (!pte_dir) {
printk("ioremap: no mem for pte_dir\n");
return NULL;
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 32e55ad..117481e 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -116,7 +116,7 @@
pte_t *pte;
unsigned long end3;
- if((pte = pte_alloc_kernel(&init_mm, pmd, vaddr)) == NULL) {
+ if((pte = pte_alloc_kernel(pmd, vaddr)) == NULL) {
ret = -ENOMEM;
goto out;
}
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 9c44ca70..3101d1d 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -55,7 +55,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -77,7 +77,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pud_t *pud;
pmd_t *pmd;
@@ -96,7 +95,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return error;
}
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index ae6213d..f94a02e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -114,7 +114,7 @@
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, vaddr);
+ pte_t * pte = pte_alloc_kernel(pmd, vaddr);
if (!pte)
return -ENOMEM;
if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr))
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index f2df502..5c7a1b3 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -52,7 +52,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(NULL, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -75,10 +75,9 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd;
- pmd = pmd_alloc(dir, address);
+ pmd = pmd_alloc(&init_mm, dir, address);
error = -ENOMEM;
if (!pmd)
break;
@@ -89,7 +88,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return error;
}
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
index 0f710d2..685fd0d 100644
--- a/arch/ppc/kernel/dma-mapping.c
+++ b/arch/ppc/kernel/dma-mapping.c
@@ -335,8 +335,6 @@
pte_t *pte;
int ret = 0;
- spin_lock(&init_mm.page_table_lock);
-
do {
pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -347,7 +345,7 @@
}
WARN_ON(!pmd_none(*pmd));
- pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+ pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
if (!pte) {
printk(KERN_ERR "%s: no pte tables\n", __func__);
ret = -ENOMEM;
@@ -357,8 +355,6 @@
consistent_pte = pte;
} while (0);
- spin_unlock(&init_mm.page_table_lock);
-
return ret;
}
diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c
index b7bcbc2..4d006aa 100644
--- a/arch/ppc/mm/4xx_mmu.c
+++ b/arch/ppc/mm/4xx_mmu.c
@@ -110,13 +110,11 @@
pmd_t *pmdp;
unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
- spin_lock(&init_mm.page_table_lock);
pmdp = pmd_offset(pgd_offset_k(v), v);
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
- spin_unlock(&init_mm.page_table_lock);
v += LARGE_PAGE_SIZE_16M;
p += LARGE_PAGE_SIZE_16M;
@@ -127,10 +125,8 @@
pmd_t *pmdp;
unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
- spin_lock(&init_mm.page_table_lock);
pmdp = pmd_offset(pgd_offset_k(v), v);
pmd_val(*pmdp) = val;
- spin_unlock(&init_mm.page_table_lock);
v += LARGE_PAGE_SIZE_4M;
p += LARGE_PAGE_SIZE_4M;
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 43505b1..6ea9185 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -280,18 +280,16 @@
pte_t *pg;
int err = -ENOMEM;
- spin_lock(&init_mm.page_table_lock);
/* Use upper 10 bits of VA to index the first level map */
pd = pmd_offset(pgd_offset_k(va), va);
/* Use middle 10 bits of VA to index the second-level map */
- pg = pte_alloc_kernel(&init_mm, pd, va);
+ pg = pte_alloc_kernel(pd, va);
if (pg != 0) {
err = 0;
set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
if (mem_init_done)
flush_HPTE(0, va, pmd_val(*pd));
}
- spin_unlock(&init_mm.page_table_lock);
return err;
}
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
index c65b87b..f4ca29c 100644
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -300,12 +300,7 @@
for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
*p = tmp->next;
-
- /* XXX: do we need the lock? */
- spin_lock(&init_mm.page_table_lock);
unmap_vm_area(tmp);
- spin_unlock(&init_mm.page_table_lock);
-
kfree(tmp);
up(&imlist_sem);
return;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index be64b15..a45584b 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -155,7 +155,6 @@
unsigned long vsid;
if (mem_init_done) {
- spin_lock(&init_mm.page_table_lock);
pgdp = pgd_offset_k(ea);
pudp = pud_alloc(&init_mm, pgdp, ea);
if (!pudp)
@@ -163,12 +162,11 @@
pmdp = pmd_alloc(&init_mm, pudp, ea);
if (!pmdp)
return -ENOMEM;
- ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+ ptep = pte_alloc_kernel(pmdp, ea);
if (!ptep)
return -ENOMEM;
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
- spin_unlock(&init_mm.page_table_lock);
} else {
unsigned long va, vpn, hash, hpteg;
diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c
index c6c39d8..0f6e9ec 100644
--- a/arch/s390/mm/ioremap.c
+++ b/arch/s390/mm/ioremap.c
@@ -58,7 +58,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -80,7 +80,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd;
pmd = pmd_alloc(&init_mm, dir, address);
@@ -94,7 +93,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return 0;
}
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index 9f490c2..e794e27 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -57,7 +57,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -79,7 +79,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd;
pmd = pmd_alloc(&init_mm, dir, address);
@@ -93,7 +92,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return error;
}
diff --git a/arch/sh64/mm/ioremap.c b/arch/sh64/mm/ioremap.c
index f4003da..fb1866f 100644
--- a/arch/sh64/mm/ioremap.c
+++ b/arch/sh64/mm/ioremap.c
@@ -79,7 +79,7 @@
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -101,7 +101,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
error = -ENOMEM;
@@ -115,7 +114,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return 0;
}
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 6972df4..ecf7acb 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -60,7 +60,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -105,7 +105,6 @@
flush_cache_all();
if (address >= end)
BUG();
- spin_lock(&init_mm.page_table_lock);
do {
pud_t *pud;
pud = pud_alloc(&init_mm, pgd, address);
@@ -119,7 +118,6 @@
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgd++;
} while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return error;
}