m68k: discontinuous memory support

Fix support for discontinuous memory

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b8536c7..85cdd23 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -355,8 +355,9 @@
 	  adventurous.
 
 config SINGLE_MEMORY_CHUNK
-	bool "Use one physical chunk of memory only"
-	depends on ADVANCED && !SUN3
+	bool "Use one physical chunk of memory only" if ADVANCED && !SUN3
+	default y if SUN3
+	select NEED_MULTIPLE_NODES
 	help
 	  Ignore all but the first contiguous chunk of physical memory for VM
 	  purposes.  This will save a few bytes kernel size and may speed up
@@ -377,6 +378,14 @@
 	  is hardwired on.  The 53c710 SCSI driver is known to suffer from
 	  this problem.
 
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool !SINGLE_MEMORY_CHUNK
+
+config NODES_SHIFT
+	int
+	default "3"
+	depends on !SINGLE_MEMORY_CHUNK
+
 source "mm/Kconfig"
 
 endmenu
diff --git a/arch/m68k/kernel/module.c b/arch/m68k/kernel/module.c
index 32969d0..774862b 100644
--- a/arch/m68k/kernel/module.c
+++ b/arch/m68k/kernel/module.c
@@ -149,6 +149,9 @@
 		case m68k_fixup_memoffset:
 			*(u32 *)fixup->addr = m68k_memoffset;
 			break;
+		case m68k_fixup_vnode_shift:
+			*(u16 *)fixup->addr += m68k_virt_to_node_shift;
+			break;
 		}
 	}
 }
diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c
index 6103193..215c7bd 100644
--- a/arch/m68k/kernel/setup.c
+++ b/arch/m68k/kernel/setup.c
@@ -60,14 +60,12 @@
 int m68k_num_memory;
 int m68k_realnum_memory;
 EXPORT_SYMBOL(m68k_realnum_memory);
-#ifdef CONFIG_SINGLE_MEMORY_CHUNK
 unsigned long m68k_memoffset;
 EXPORT_SYMBOL(m68k_memoffset);
-#endif
 struct mem_info m68k_memory[NUM_MEMINFO];
 EXPORT_SYMBOL(m68k_memory);
 
-static struct mem_info m68k_ramdisk;
+struct mem_info m68k_ramdisk;
 
 static char m68k_command_line[CL_SIZE];
 
@@ -208,9 +206,6 @@
 void __init setup_arch(char **cmdline_p)
 {
 	extern int _etext, _edata, _end;
-#ifndef CONFIG_SUN3
-	unsigned long endmem, startmem;
-#endif
 	int i;
 
 	/* The bootinfo is located right after the kernel bss */
@@ -320,30 +315,16 @@
 		panic("No configuration setup");
 	}
 
+	paging_init();
+
 #ifndef CONFIG_SUN3
-	startmem= m68k_memory[0].addr;
-	endmem = startmem + m68k_memory[0].size;
-	high_memory = (void *)PAGE_OFFSET;
-	for (i = 0; i < m68k_num_memory; i++) {
-		m68k_memory[i].size &= MASK_256K;
-		if (m68k_memory[i].addr < startmem)
-			startmem = m68k_memory[i].addr;
-		if (m68k_memory[i].addr+m68k_memory[i].size > endmem)
-			endmem = m68k_memory[i].addr+m68k_memory[i].size;
-		high_memory += m68k_memory[i].size;
-	}
-
-	availmem += init_bootmem_node(NODE_DATA(0), availmem >> PAGE_SHIFT,
-				      startmem >> PAGE_SHIFT, endmem >> PAGE_SHIFT);
-
-	for (i = 0; i < m68k_num_memory; i++)
-		free_bootmem(m68k_memory[i].addr, m68k_memory[i].size);
-
-	reserve_bootmem(m68k_memory[0].addr, availmem - m68k_memory[0].addr);
-
+	for (i = 1; i < m68k_num_memory; i++)
+		free_bootmem_node(NODE_DATA(i), m68k_memory[i].addr,
+				  m68k_memory[i].size);
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (m68k_ramdisk.size) {
-		reserve_bootmem(m68k_ramdisk.addr, m68k_ramdisk.size);
+		reserve_bootmem_node(__virt_to_node(phys_to_virt(m68k_ramdisk.addr)),
+				     m68k_ramdisk.addr, m68k_ramdisk.size);
 		initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
 		initrd_end = initrd_start + m68k_ramdisk.size;
 		printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
@@ -362,8 +343,6 @@
 
 #endif /* !CONFIG_SUN3 */
 
-	paging_init();
-
 /* set ISA defs early as possible */
 #if defined(CONFIG_ISA) && defined(MULTI_ISA)
 #if defined(CONFIG_Q40)
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index ab90213..f1de19e 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -7,6 +7,7 @@
  *  to motorola.c and sun3mmu.c
  */
 
+#include <linux/module.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -31,6 +32,37 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
+static bootmem_data_t __initdata bootmem_data[MAX_NUMNODES];
+
+pg_data_t pg_data_map[MAX_NUMNODES];
+EXPORT_SYMBOL(pg_data_map);
+
+int m68k_virt_to_node_shift;
+
+#ifndef CONFIG_SINGLE_MEMORY_CHUNK
+pg_data_t *pg_data_table[65];
+EXPORT_SYMBOL(pg_data_table);
+#endif
+
+void m68k_setup_node(int node)
+{
+#ifndef CONFIG_SINGLE_MEMORY_CHUNK
+	struct mem_info *info = m68k_memory + node;
+	int i, end;
+
+	i = (unsigned long)phys_to_virt(info->addr) >> __virt_to_node_shift();
+	end = (unsigned long)phys_to_virt(info->addr + info->size - 1) >> __virt_to_node_shift();
+	for (; i <= end; i++) {
+		if (pg_data_table[i])
+			printk("overlap at %u for chunk %u\n", i, node);
+		pg_data_table[i] = pg_data_map + node;
+	}
+#endif
+	pg_data_map[node].bdata = bootmem_data + node;
+	node_set_online(node);
+}
+
+
 /*
  * ZERO_PAGE is a special page that is used for zero-initialized
  * data and COW.
@@ -40,52 +72,51 @@
 
 void show_mem(void)
 {
-    unsigned long i;
-    int free = 0, total = 0, reserved = 0, shared = 0;
-    int cached = 0;
+	pg_data_t *pgdat;
+	int free = 0, total = 0, reserved = 0, shared = 0;
+	int cached = 0;
+	int i;
 
-    printk("\nMem-info:\n");
-    show_free_areas();
-    printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-    i = max_mapnr;
-    while (i-- > 0) {
-	total++;
-	if (PageReserved(mem_map+i))
-	    reserved++;
-	else if (PageSwapCache(mem_map+i))
-	    cached++;
-	else if (!page_count(mem_map+i))
-	    free++;
-	else
-	    shared += page_count(mem_map+i) - 1;
-    }
-    printk("%d pages of RAM\n",total);
-    printk("%d free pages\n",free);
-    printk("%d reserved pages\n",reserved);
-    printk("%d pages shared\n",shared);
-    printk("%d pages swap cached\n",cached);
+	printk("\nMem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_online_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			struct page *page = pgdat->node_mem_map + i;
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (!page_count(page))
+				free++;
+			else
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%d pages of RAM\n",total);
+	printk("%d free pages\n",free);
+	printk("%d reserved pages\n",reserved);
+	printk("%d pages shared\n",shared);
+	printk("%d pages swap cached\n",cached);
 }
 
 extern void init_pointer_table(unsigned long ptable);
 
 /* References to section boundaries */
 
-extern char _text, _etext, _edata, __bss_start, _end;
-extern char __init_begin, __init_end;
+extern char _text[], _etext[];
+extern char __init_begin[], __init_end[];
 
 extern pmd_t *zero_pgtable;
 
 void __init mem_init(void)
 {
+	pg_data_t *pgdat;
 	int codepages = 0;
 	int datapages = 0;
 	int initpages = 0;
-	unsigned long tmp;
-#ifndef CONFIG_SUN3
 	int i;
-#endif
-
-	max_mapnr = num_physpages = (((unsigned long)high_memory - PAGE_OFFSET) >> PAGE_SHIFT);
 
 #ifdef CONFIG_ATARI
 	if (MACH_IS_ATARI)
@@ -93,19 +124,25 @@
 #endif
 
 	/* this will put all memory onto the freelists */
-	totalram_pages = free_all_bootmem();
+	totalram_pages = num_physpages = 0;
+	for_each_online_pgdat(pgdat) {
+		num_physpages += pgdat->node_present_pages;
 
-	for (tmp = PAGE_OFFSET ; tmp < (unsigned long)high_memory; tmp += PAGE_SIZE) {
-		if (PageReserved(virt_to_page(tmp))) {
-			if (tmp >= (unsigned long)&_text
-			    && tmp < (unsigned long)&_etext)
+		totalram_pages += free_all_bootmem_node(pgdat);
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			struct page *page = pgdat->node_mem_map + i;
+			char *addr = page_to_virt(page);
+
+			if (!PageReserved(page))
+				continue;
+			if (addr >= _text &&
+			    addr < _etext)
 				codepages++;
-			else if (tmp >= (unsigned long) &__init_begin
-				 && tmp < (unsigned long) &__init_end)
+			else if (addr >= __init_begin &&
+				 addr < __init_end)
 				initpages++;
 			else
 				datapages++;
-			continue;
 		}
 	}
 
@@ -124,7 +161,7 @@
 
 	printk("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n",
 	       (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
-	       max_mapnr << (PAGE_SHIFT-10),
+	       totalram_pages << (PAGE_SHIFT-10),
 	       codepages << (PAGE_SHIFT-10),
 	       datapages << (PAGE_SHIFT-10),
 	       initpages << (PAGE_SHIFT-10));
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index 13c0b4a..b747352 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -127,67 +127,6 @@
 	return 0;
 }
 
-#ifdef DEBUG_INVALID_PTOV
-int mm_inv_cnt = 5;
-#endif
-
-#ifndef CONFIG_SINGLE_MEMORY_CHUNK
-/*
- * The following two routines map from a physical address to a kernel
- * virtual address and vice versa.
- */
-unsigned long mm_vtop(unsigned long vaddr)
-{
-	int i=0;
-	unsigned long voff = (unsigned long)vaddr - PAGE_OFFSET;
-
-	do {
-		if (voff < m68k_memory[i].size) {
-#ifdef DEBUGPV
-			printk ("VTOP(%p)=%lx\n", vaddr,
-				m68k_memory[i].addr + voff);
-#endif
-			return m68k_memory[i].addr + voff;
-		}
-		voff -= m68k_memory[i].size;
-	} while (++i < m68k_num_memory);
-
-	/* As a special case allow `__pa(high_memory)'.  */
-	if (voff == 0)
-		return m68k_memory[i-1].addr + m68k_memory[i-1].size;
-
-	return -1;
-}
-EXPORT_SYMBOL(mm_vtop);
-
-unsigned long mm_ptov (unsigned long paddr)
-{
-	int i = 0;
-	unsigned long poff, voff = PAGE_OFFSET;
-
-	do {
-		poff = paddr - m68k_memory[i].addr;
-		if (poff < m68k_memory[i].size) {
-#ifdef DEBUGPV
-			printk ("PTOV(%lx)=%lx\n", paddr, poff + voff);
-#endif
-			return poff + voff;
-		}
-		voff += m68k_memory[i].size;
-	} while (++i < m68k_num_memory);
-
-#ifdef DEBUG_INVALID_PTOV
-	if (mm_inv_cnt > 0) {
-		mm_inv_cnt--;
-		printk("Invalid use of phys_to_virt(0x%lx) at 0x%p!\n",
-			paddr, __builtin_return_address(0));
-	}
-#endif
-	return -1;
-}
-EXPORT_SYMBOL(mm_ptov);
-#endif
-
 /* invalidate page in both caches */
 static inline void clear040(unsigned long paddr)
 {
@@ -354,15 +293,3 @@
 }
 EXPORT_SYMBOL(cache_push);
 
-#ifndef CONFIG_SINGLE_MEMORY_CHUNK
-int mm_end_of_chunk (unsigned long addr, int len)
-{
-	int i;
-
-	for (i = 0; i < m68k_num_memory; i++)
-		if (m68k_memory[i].addr + m68k_memory[i].size == addr + len)
-			return 1;
-	return 0;
-}
-EXPORT_SYMBOL(mm_end_of_chunk);
-#endif
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 98ef005..7d571a2 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -43,6 +43,11 @@
 EXPORT_SYMBOL(mm_cachebits);
 #endif
 
+/* size of memory already mapped in head.S */
+#define INIT_MAPPED_SIZE	(4UL<<20)
+
+extern unsigned long availmem;
+
 static pte_t * __init kernel_page_table(void)
 {
 	pte_t *ptablep;
@@ -98,19 +103,20 @@
 	return last_pgtable;
 }
 
-static unsigned long __init
-map_chunk (unsigned long addr, long size)
+static void __init map_node(int node)
 {
 #define PTRTREESIZE (256*1024)
 #define ROOTTREESIZE (32*1024*1024)
-	static unsigned long virtaddr = PAGE_OFFSET;
-	unsigned long physaddr;
+	unsigned long physaddr, virtaddr, size;
 	pgd_t *pgd_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
-	physaddr = (addr | m68k_supervisor_cachemode |
-		    _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
+	size = m68k_memory[node].size;
+	physaddr = m68k_memory[node].addr;
+	virtaddr = (unsigned long)phys_to_virt(physaddr);
+	physaddr |= m68k_supervisor_cachemode |
+		    _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY;
 	if (CPU_IS_040_OR_060)
 		physaddr |= _PAGE_GLOBAL040;
 
@@ -190,8 +196,6 @@
 #ifdef DEBUG
 	printk("\n");
 #endif
-
-	return virtaddr;
 }
 
 /*
@@ -200,15 +204,16 @@
  */
 void __init paging_init(void)
 {
-	int chunk;
-	unsigned long mem_avail = 0;
 	unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+	unsigned long min_addr, max_addr;
+	unsigned long addr, size, end;
+	int i;
 
 #ifdef DEBUG
 	{
 		extern unsigned long availmem;
-		printk ("start of paging_init (%p, %lx, %lx, %lx)\n",
-			kernel_pg_dir, availmem, start_mem, end_mem);
+		printk ("start of paging_init (%p, %lx)\n",
+			kernel_pg_dir, availmem);
 	}
 #endif
 
@@ -222,27 +227,62 @@
 			pgprot_val(protection_map[i]) |= _PAGE_CACHE040;
 	}
 
+	min_addr = m68k_memory[0].addr;
+	max_addr = min_addr + m68k_memory[0].size;
+	for (i = 1; i < m68k_num_memory;) {
+		if (m68k_memory[i].addr < min_addr) {
+			printk("Ignoring memory chunk at 0x%lx:0x%lx before the first chunk\n",
+				m68k_memory[i].addr, m68k_memory[i].size);
+			printk("Fix your bootloader or use a memfile to make use of this area!\n");
+			m68k_num_memory--;
+			memmove(m68k_memory + i, m68k_memory + i + 1,
+				(m68k_num_memory - i) * sizeof(struct mem_info));
+			continue;
+		}
+		addr = m68k_memory[i].addr + m68k_memory[i].size;
+		if (addr > max_addr)
+			max_addr = addr;
+		i++;
+	}
+	m68k_memoffset = min_addr - PAGE_OFFSET;
+	m68k_virt_to_node_shift = fls(max_addr - min_addr - 1) - 6;
+
 	module_fixup(NULL, __start_fixup, __stop_fixup);
 	flush_icache();
 
-	/*
-	 * Map the physical memory available into the kernel virtual
-	 * address space.  It may allocate some memory for page
-	 * tables and thus modify availmem.
-	 */
+	high_memory = phys_to_virt(max_addr);
 
-	for (chunk = 0; chunk < m68k_num_memory; chunk++) {
-		mem_avail = map_chunk (m68k_memory[chunk].addr,
-				       m68k_memory[chunk].size);
+	min_low_pfn = availmem >> PAGE_SHIFT;
+	max_low_pfn = max_addr >> PAGE_SHIFT;
 
+	for (i = 0; i < m68k_num_memory; i++) {
+		addr = m68k_memory[i].addr;
+		end = addr + m68k_memory[i].size;
+		m68k_setup_node(i);
+		availmem = PAGE_ALIGN(availmem);
+		availmem += init_bootmem_node(NODE_DATA(i),
+					      availmem >> PAGE_SHIFT,
+					      addr >> PAGE_SHIFT,
+					      end >> PAGE_SHIFT);
 	}
 
+	/*
+	 * Map the physical memory available into the kernel virtual
+	 * address space. First initialize the bootmem allocator with
+	 * the memory we already mapped, so map_node() has something
+	 * to allocate.
+	 */
+	addr = m68k_memory[0].addr;
+	size = m68k_memory[0].size;
+	free_bootmem_node(NODE_DATA(0), availmem, min(INIT_MAPPED_SIZE, size) - (availmem - addr));
+	map_node(0);
+	if (size > INIT_MAPPED_SIZE)
+		free_bootmem_node(NODE_DATA(0), addr + INIT_MAPPED_SIZE, size - INIT_MAPPED_SIZE);
+
+	for (i = 1; i < m68k_num_memory; i++)
+		map_node(i);
+
 	flush_tlb_all();
-#ifdef DEBUG
-	printk ("memory available is %ldKB\n", mem_avail >> 10);
-	printk ("start_mem is %#lx\nvirtual_end is %#lx\n",
-		start_mem, end_mem);
-#endif
 
 	/*
 	 * initialize the bad page table and bad page to point
@@ -259,14 +299,11 @@
 #ifdef DEBUG
 	printk ("before free_area_init\n");
 #endif
-	zones_size[ZONE_DMA] = (mach_max_dma_address < (unsigned long)high_memory ?
-				(mach_max_dma_address+1) : (unsigned long)high_memory);
-	zones_size[ZONE_NORMAL] = (unsigned long)high_memory - zones_size[0];
-
-	zones_size[ZONE_DMA] = (zones_size[ZONE_DMA] - PAGE_OFFSET) >> PAGE_SHIFT;
-	zones_size[ZONE_NORMAL] >>= PAGE_SHIFT;
-
-	free_area_init(zones_size);
+	for (i = 0; i < m68k_num_memory; i++) {
+		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
+		free_area_init_node(i, pg_data_map + i, zones_size,
+				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
+	}
 }
 
 extern char __init_begin, __init_end;
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 4851b84..c0fbd27 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -21,6 +21,7 @@
 #include <asm/contregs.h>
 #include <asm/movs.h>
 #include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 #include <asm/sun3-head.h>
 #include <asm/sun3mmu.h>
 #include <asm/rtc.h>
@@ -127,6 +128,7 @@
 	high_memory = (void *)memory_end;
 	availmem = memory_start;
 
+	m68k_setup_node(0);
 	availmem += init_bootmem_node(NODE_DATA(0), start_page, 0, num_pages);
 	availmem = (availmem + (PAGE_SIZE-1)) & PAGE_MASK;