From c57e182b560e4c93377270d470600095c2b580fe Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Wed, 6 Jun 2018 21:01:32 +0200
Subject: [PATCH] ath79: lzma-loader: sync with ar71xx target

Import all improvements made in the lzma-loader since development on the
ath79 target started.

This also reverts fe594bf90d09 ("ath79: fix loader-okli, lzma-loader"), as
is obsoleted by 2ad60168b6af ("ar71xx: lzma-loader: move padding workaround
to gzip step").

Likely, many of the changes should be ported to the ramips lzma-loader as
well, but I don't have a device to test this.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
---
 target/linux/ath79/image/Makefile                   |  3 ++-
 target/linux/ath79/image/common-tp-link.mk          |  2 +-
 target/linux/ath79/image/lzma-loader/Makefile       |  6 +++++-
 target/linux/ath79/image/lzma-loader/src/Makefile   | 13 +++++++------
 target/linux/ath79/image/lzma-loader/src/head.S     | 13 +++++++++++++
 target/linux/ath79/image/lzma-loader/src/loader.c   |  2 +-
 target/linux/ath79/image/lzma-loader/src/loader.lds |  1 -
 7 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/target/linux/ath79/image/Makefile b/target/linux/ath79/image/Makefile
index cd136b23b9..34dd819610 100644
--- a/target/linux/ath79/image/Makefile
+++ b/target/linux/ath79/image/Makefile
@@ -29,8 +29,9 @@ define Build/loader-okli-compile
 	$(call Build/loader-common,FLASH_OFFS=$(LOADER_FLASH_OFFS) FLASH_MAX=0)
 endef
 
+# Arguments: <output name> <kernel offset>
 define Build/loader-okli
-	dd if=$(KDIR)/loader-$(1).gz bs=7680 conv=sync of="$@.new"
+	dd if=$(KDIR)/loader-$(word 1,$(1)).$(LOADER_TYPE) bs=$(word 2,$(1)) conv=sync of="$@.new"
 	cat "$@" >> "$@.new"
 	mv "$@.new" "$@"
 endef
diff --git a/target/linux/ath79/image/common-tp-link.mk b/target/linux/ath79/image/common-tp-link.mk
index 1dd5a289f2..dcd651722d 100644
--- a/target/linux/ath79/image/common-tp-link.mk
+++ b/target/linux/ath79/image/common-tp-link.mk
@@ -55,7 +55,7 @@ define Device/tplink-nolzma
   LOADER_FLASH_OFFS := 0x22000
   COMPILE := loader-$(1).gz
   COMPILE/loader-$(1).gz := loader-okli-compile
-  KERNEL := kernel-bin | append-dtb | lzma | uImage lzma -M 0x4f4b4c49 | loader-okli $(1)
+  KERNEL := kernel-bin | append-dtb | lzma | uImage lzma -M 0x4f4b4c49 | loader-okli $(1) 7680
   KERNEL_INITRAMFS := kernel-bin | append-dtb | gzip | tplink-v1-header
 endef
 
diff --git a/target/linux/ath79/image/lzma-loader/Makefile b/target/linux/ath79/image/lzma-loader/Makefile
index 9b81e87306..738093a958 100644
--- a/target/linux/ath79/image/lzma-loader/Makefile
+++ b/target/linux/ath79/image/lzma-loader/Makefile
@@ -47,7 +47,11 @@ loader-compile: $(PKG_BUILD_DIR)/.prepared
 		clean all
 
 loader.gz: $(PKG_BUILD_DIR)/loader.bin
-	gzip -nc9 $< > $(LOADER_GZ)
+	# Workaround for buggy bootloaders: Some devices
+	# (TP-Link TL-WR1043ND v1) don't work correctly when
+	# the uncompressed loader is too small (probably a cache
+	# invalidation issue)
+	dd if=$< bs=512K conv=sync | gzip -nc9 > $(LOADER_GZ)
 
 loader.elf: $(PKG_BUILD_DIR)/loader.elf
 	$(CP) $< $(LOADER_ELF)
diff --git a/target/linux/ath79/image/lzma-loader/src/Makefile b/target/linux/ath79/image/lzma-loader/src/Makefile
index 133bf6ee8f..7773f027a2 100644
--- a/target/linux/ath79/image/lzma-loader/src/Makefile
+++ b/target/linux/ath79/image/lzma-loader/src/Makefile
@@ -21,7 +21,6 @@ LOADER_DATA	:=
 BOARD		:=
 FLASH_OFFS	:=
 FLASH_MAX	:=
-KERNEL_CMDLINE	:= rootfstype=squashfs
 
 CC		:= $(CROSS_COMPILE)gcc
 LD		:= $(CROSS_COMPILE)ld
@@ -34,15 +33,17 @@ BIN_FLAGS	:= -O binary -R .reginfo -R .note -R .comment -R .mdebug \
 CFLAGS		= -D__KERNEL__ -Wall -Wstrict-prototypes -Wno-trigraphs -Os \
 		  -fno-strict-aliasing -fno-common -fomit-frame-pointer -G 0 \
 		  -mno-abicalls -fno-pic -ffunction-sections -pipe -mlong-calls \
-		  -fno-common -ffreestanding -fhonour-copts \
+		  -fno-common -ffreestanding -fhonour-copts -nostartfiles \
 		  -mabi=32 -march=mips32r2 \
 		  -Wa,-32 -Wa,-march=mips32r2 -Wa,-mips32r2 -Wa,--trap
 CFLAGS		+= -D_LZMA_PROB32
+CFLAGS		+= -flto
 
 ASFLAGS		= $(CFLAGS) -D__ASSEMBLY__
 
-LDFLAGS		= -static --gc-sections -no-warn-mismatch
-LDFLAGS		+= -e startup -T loader.lds -Ttext $(LZMA_TEXT_START)
+LDFLAGS		= -static -Wl,--gc-sections -Wl,-no-warn-mismatch
+LDFLAGS		+= -Wl,-e,startup -T loader.lds -Wl,-Ttext,$(LZMA_TEXT_START)
+LDFLAGS		+= -flto -fwhole-program
 
 O_FORMAT 	= $(shell $(OBJDUMP) -i | head -2 | grep elf32)
 
@@ -87,7 +88,7 @@ data.o: $(LOADER_DATA)
 	$(LD) -r -b binary --oformat $(O_FORMAT) -T lzma-data.lds -o $@ $<
 
 loader: $(OBJECTS)
-	$(LD) $(LDFLAGS) -o $@ $(OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS)
 
 loader.bin: loader
 	$(OBJCOPY) $(BIN_FLAGS) $< $@
@@ -96,7 +97,7 @@ loader2.o: loader.bin
 	$(LD) -r -b binary --oformat $(O_FORMAT) -o $@ $<
 
 loader.elf: loader2.o
-	$(LD) -e startup -T loader2.lds -Ttext $(LOADADDR) -o $@ $<
+	$(LD) -z max-page-size=0x1000 -e startup -T loader2.lds -Ttext $(LOADADDR) -o $@ $<
 
 mrproper: clean
 
diff --git a/target/linux/ath79/image/lzma-loader/src/head.S b/target/linux/ath79/image/lzma-loader/src/head.S
index 47a7c9bd63..d414b14d11 100644
--- a/target/linux/ath79/image/lzma-loader/src/head.S
+++ b/target/linux/ath79/image/lzma-loader/src/head.S
@@ -42,6 +42,19 @@ LEAF(startup)
 	mtc0	t0, CP0_STATUS
 	ehb
 
+	/*
+	 * Some bootloaders set the 'Kseg0 coherency algorithm' to
+	 * 'Cacheable, noncoherent, write-through, no write allocate'
+	 * and this cause performance issues. Let's go and change it to
+	 * 'Cacheable, noncoherent, write-back, write allocate'
+	 */
+	mfc0	t0, CP0_CONFIG
+	li	t1, ~7			#~CONF_CM_CMASK
+	and	t0, t1
+	ori	t0, 3			#CONF_CM_CACHABLE_NONCOHERENT
+	mtc0	t0, CP0_CONFIG
+	nop
+
 	mtc0	zero, CP0_COUNT
 	mtc0	zero, CP0_COMPARE
 	ehb
diff --git a/target/linux/ath79/image/lzma-loader/src/loader.c b/target/linux/ath79/image/lzma-loader/src/loader.c
index cc73eb1721..794c4b6285 100644
--- a/target/linux/ath79/image/lzma-loader/src/loader.c
+++ b/target/linux/ath79/image/lzma-loader/src/loader.c
@@ -75,7 +75,7 @@ static unsigned long kernel_la;
 #ifdef CONFIG_KERNEL_CMDLINE
 #define kernel_argc	2
 static const char kernel_cmdline[] = CONFIG_KERNEL_CMDLINE;
-static const char *kernel_argv[] = {
+static const char *const kernel_argv[] = {
 	NULL,
 	kernel_cmdline,
 	NULL,
diff --git a/target/linux/ath79/image/lzma-loader/src/loader.lds b/target/linux/ath79/image/lzma-loader/src/loader.lds
index 80cc7ca3ec..01ff852361 100644
--- a/target/linux/ath79/image/lzma-loader/src/loader.lds
+++ b/target/linux/ath79/image/lzma-loader/src/loader.lds
@@ -13,7 +13,6 @@ SECTIONS {
 	.data : {
 		*(.data)
 		*(.data.*)
-		. = . + 524288;		/* workaround for buggy bootloaders */
 	}
 
 	. = ALIGN(32);
-- 
2.30.2