parisc: Switch from DISCONTIGMEM to SPARSEMEM
authorHelge Deller <deller@gmx.de>
Tue, 9 Apr 2019 19:52:35 +0000 (21:52 +0200)
committerHelge Deller <deller@gmx.de>
Fri, 3 May 2019 21:47:40 +0000 (23:47 +0200)
The commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an
external fragmentation event occurs") breaks memory management on a
parisc c8000 workstation with this memory layout:

0) Start 0x0000000000000000 End 0x000000003fffffff Size   1024 MB
1) Start 0x0000000100000000 End 0x00000001bfdfffff Size   3070 MB
2) Start 0x0000004040000000 End 0x00000040ffffffff Size   3072 MB

With the patch 1c30844d2dfe, the kernel will incorrectly reclaim the
first zone when it fills up, ignoring the fact that there are two
completely free zones. Basiscally, it limits cache size to 1GiB.

The parisc kernel is currently using the DISCONTIGMEM implementation,
but isn't NUMA. Avoid this issue or strange work-arounds by switching to
the more commonly used SPARSEMEM implementation.

Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
Signed-off-by: Helge Deller <deller@gmx.de>
arch/parisc/Kconfig
arch/parisc/include/asm/mmzone.h
arch/parisc/include/asm/page.h
arch/parisc/include/asm/sparsemem.h [new file with mode: 0644]
arch/parisc/kernel/parisc_ksyms.c
arch/parisc/mm/init.c

index c8038165b81fd203d2fe219a643bc5a462fa3444..26c215570adfb8ee8a6ab98b65012337eb554b1b 100644 (file)
@@ -36,6 +36,7 @@ config PARISC
        select GENERIC_STRNCPY_FROM_USER
        select SYSCTL_ARCH_UNALIGN_ALLOW
        select SYSCTL_EXCEPTION_TRACE
+       select ARCH_DISCARD_MEMBLOCK
        select HAVE_MOD_ARCH_SPECIFIC
        select VIRT_TO_BUS
        select MODULES_USE_ELF_RELA
@@ -314,21 +315,16 @@ config ARCH_SELECT_MEMORY_MODEL
        def_bool y
        depends on 64BIT
 
-config ARCH_DISCONTIGMEM_ENABLE
+config ARCH_SPARSEMEM_ENABLE
        def_bool y
        depends on 64BIT
 
 config ARCH_FLATMEM_ENABLE
        def_bool y
 
-config ARCH_DISCONTIGMEM_DEFAULT
+config ARCH_SPARSEMEM_DEFAULT
        def_bool y
-       depends on ARCH_DISCONTIGMEM_ENABLE
-
-config NODES_SHIFT
-       int
-       default "3"
-       depends on NEED_MULTIPLE_NODES
+       depends on ARCH_SPARSEMEM_ENABLE
 
 source "kernel/Kconfig.hz"
 
index fafa3893fd70cda02c05f5e3dc9198efff09e739..8d390406d8620a8563db331409b4457eab51546e 100644 (file)
@@ -2,62 +2,6 @@
 #ifndef _PARISC_MMZONE_H
 #define _PARISC_MMZONE_H
 
-#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
+#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */
 
-#ifdef CONFIG_DISCONTIGMEM
-
-extern int npmem_ranges;
-
-struct node_map_data {
-    pg_data_t pg_data;
-};
-
-extern struct node_map_data node_data[];
-
-#define NODE_DATA(nid)          (&node_data[nid].pg_data)
-
-/* We have these possible memory map layouts:
- * Astro: 0-3.75, 67.75-68, 4-64
- * zx1: 0-1, 257-260, 4-256
- * Stretch (N-class): 0-2, 4-32, 34-xxx
- */
-
-/* Since each 1GB can only belong to one region (node), we can create
- * an index table for pfn to nid lookup; each entry in pfnnid_map 
- * represents 1GB, and contains the node that the memory belongs to. */
-
-#define PFNNID_SHIFT (30 - PAGE_SHIFT)
-#define PFNNID_MAP_MAX  512     /* support 512GB */
-extern signed char pfnnid_map[PFNNID_MAP_MAX];
-
-#ifndef CONFIG_64BIT
-#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT))
-#else
-/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */
-#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT))
-#endif
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-       unsigned int i;
-
-       if (unlikely(pfn_is_io(pfn)))
-               return 0;
-
-       i = pfn >> PFNNID_SHIFT;
-       BUG_ON(i >= ARRAY_SIZE(pfnnid_map));
-
-       return pfnnid_map[i];
-}
-
-static inline int pfn_valid(int pfn)
-{
-       int nid = pfn_to_nid(pfn);
-
-       if (nid >= 0)
-               return (pfn < node_end_pfn(nid));
-       return 0;
-}
-
-#endif
 #endif /* _PARISC_MMZONE_H */
index b77f49ce6220dd1568284414e9333585ecf39b30..93caf17ac5e2ff20268cd47e538f691ee5edd721 100644 (file)
@@ -147,9 +147,9 @@ extern int npmem_ranges;
 #define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
 #define pfn_valid(pfn)         ((pfn) < max_mapnr)
-#endif /* CONFIG_DISCONTIGMEM */
+#endif
 
 #ifdef CONFIG_HUGETLB_PAGE
 #define HPAGE_SHIFT            PMD_SHIFT /* fixed for transparent huge pages */
diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h
new file mode 100644 (file)
index 0000000..b5c3a79
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_PARISC_SPARSEMEM_H
+#define ASM_PARISC_SPARSEMEM_H
+
+/* We have these possible memory map layouts:
+ * Astro: 0-3.75, 67.75-68, 4-64
+ * zx1: 0-1, 257-260, 4-256
+ * Stretch (N-class): 0-2, 4-32, 34-xxx
+ */
+
+#define MAX_PHYSMEM_BITS       39      /* 512 GB */
+#define SECTION_SIZE_BITS      27      /* 128 MB */
+
+#endif
index 7baa2265d43927fd7e5a24269e627486c60c6b35..174213b1716ea4e45d0206e2e5f5e07d88ca6706 100644 (file)
@@ -138,12 +138,6 @@ extern void $$dyncall(void);
 EXPORT_SYMBOL($$dyncall);
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-#include <asm/mmzone.h>
-EXPORT_SYMBOL(node_data);
-EXPORT_SYMBOL(pfnnid_map);
-#endif
-
 #ifdef CONFIG_FUNCTION_TRACER
 extern void _mcount(void);
 EXPORT_SYMBOL(_mcount);
index b2b52de2b82b4c843737cab2a74db4dbd4941803..513f747b0d9da4e484dcc6b4029b46ec8047947d 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/mmzone.h>
 #include <asm/sections.h>
 #include <asm/msgbuf.h>
+#include <asm/sparsemem.h>
 
 extern int  data_start;
 extern void parisc_kernel_start(void); /* Kernel entry point in head.S */
@@ -48,11 +49,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE)));
 pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE)));
 
-#ifdef CONFIG_DISCONTIGMEM
-struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
-signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
-#endif
-
 static struct resource data_resource = {
        .name   = "Kernel data",
        .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
@@ -76,11 +72,11 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly;
  * information retrieved in kernel/inventory.c.
  */
 
-physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly;
-int npmem_ranges __read_mostly;
+physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata;
+int npmem_ranges __initdata;
 
 #ifdef CONFIG_64BIT
-#define MAX_MEM         (~0UL)
+#define MAX_MEM         (1UL << MAX_PHYSMEM_BITS)
 #else /* !CONFIG_64BIT */
 #define MAX_MEM         (3584U*1024U*1024U)
 #endif /* !CONFIG_64BIT */
@@ -119,7 +115,7 @@ static void __init mem_limit_func(void)
 static void __init setup_bootmem(void)
 {
        unsigned long mem_max;
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
        physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1];
        int npmem_holes;
 #endif
@@ -137,23 +133,20 @@ static void __init setup_bootmem(void)
                int j;
 
                for (j = i; j > 0; j--) {
-                       unsigned long tmp;
+                       physmem_range_t tmp;
 
                        if (pmem_ranges[j-1].start_pfn <
                            pmem_ranges[j].start_pfn) {
 
                                break;
                        }
-                       tmp = pmem_ranges[j-1].start_pfn;
-                       pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn;
-                       pmem_ranges[j].start_pfn = tmp;
-                       tmp = pmem_ranges[j-1].pages;
-                       pmem_ranges[j-1].pages = pmem_ranges[j].pages;
-                       pmem_ranges[j].pages = tmp;
+                       tmp = pmem_ranges[j-1];
+                       pmem_ranges[j-1] = pmem_ranges[j];
+                       pmem_ranges[j] = tmp;
                }
        }
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
        /*
         * Throw out ranges that are too far apart (controlled by
         * MAX_GAP).
@@ -165,7 +158,7 @@ static void __init setup_bootmem(void)
                         pmem_ranges[i-1].pages) > MAX_GAP) {
                        npmem_ranges = i;
                        printk("Large gap in memory detected (%ld pages). "
-                              "Consider turning on CONFIG_DISCONTIGMEM\n",
+                              "Consider turning on CONFIG_SPARSEMEM\n",
                               pmem_ranges[i].start_pfn -
                               (pmem_ranges[i-1].start_pfn +
                                pmem_ranges[i-1].pages));
@@ -230,9 +223,8 @@ static void __init setup_bootmem(void)
 
        printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20);
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
        /* Merge the ranges, keeping track of the holes */
-
        {
                unsigned long end_pfn;
                unsigned long hole_pages;
@@ -255,18 +247,6 @@ static void __init setup_bootmem(void)
        }
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-       for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
-               memset(NODE_DATA(i), 0, sizeof(pg_data_t));
-       }
-       memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
-
-       for (i = 0; i < npmem_ranges; i++) {
-               node_set_state(i, N_NORMAL_MEMORY);
-               node_set_online(i);
-       }
-#endif
-
        /*
         * Initialize and free the full range of memory in each range.
         */
@@ -314,7 +294,7 @@ static void __init setup_bootmem(void)
        memblock_reserve(__pa(KERNEL_BINARY_TEXT_START),
                        (unsigned long)(_end - KERNEL_BINARY_TEXT_START));
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
 
        /* reserve the holes */
 
@@ -360,6 +340,9 @@ static void __init setup_bootmem(void)
 
        /* Initialize Page Deallocation Table (PDT) and check for bad memory. */
        pdc_pdt_init();
+
+       memblock_allow_resize();
+       memblock_dump_all();
 }
 
 static int __init parisc_text_address(unsigned long vaddr)
@@ -713,37 +696,46 @@ static void __init gateway_init(void)
                  PAGE_SIZE, PAGE_GATEWAY, 1);
 }
 
-void __init paging_init(void)
+static void __init parisc_bootmem_free(void)
 {
+       unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+       unsigned long holes_size[MAX_NR_ZONES] = { 0, };
+       unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0;
        int i;
 
+       for (i = 0; i < npmem_ranges; i++) {
+               unsigned long start = pmem_ranges[i].start_pfn;
+               unsigned long size = pmem_ranges[i].pages;
+               unsigned long end = start + size;
+
+               if (mem_start_pfn > start)
+                       mem_start_pfn = start;
+               if (mem_end_pfn < end)
+                       mem_end_pfn = end;
+               mem_size_pfn += size;
+       }
+
+       zones_size[0] = mem_end_pfn - mem_start_pfn;
+       holes_size[0] = zones_size[0] - mem_size_pfn;
+
+       free_area_init_node(0, zones_size, mem_start_pfn, holes_size);
+}
+
+void __init paging_init(void)
+{
        setup_bootmem();
        pagetable_init();
        gateway_init();
        flush_cache_all_local(); /* start with known state */
        flush_tlb_all_local(NULL);
 
-       for (i = 0; i < npmem_ranges; i++) {
-               unsigned long zones_size[MAX_NR_ZONES] = { 0, };
-
-               zones_size[ZONE_NORMAL] = pmem_ranges[i].pages;
-
-#ifdef CONFIG_DISCONTIGMEM
-               /* Need to initialize the pfnnid_map before we can initialize
-                  the zone */
-               {
-                   int j;
-                   for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT);
-                        j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT);
-                        j++) {
-                       pfnnid_map[j] = i;
-                   }
-               }
-#endif
-
-               free_area_init_node(i, zones_size,
-                               pmem_ranges[i].start_pfn, NULL);
-       }
+       /*
+        * Mark all memblocks as present for sparsemem using
+        * memory_present() and then initialize sparsemem.
+        */
+       memblocks_present();
+       sparse_init();
+       parisc_bootmem_free();
 }
 
 #ifdef CONFIG_PA20