From: Akira Takeuchi Date: Wed, 27 Oct 2010 16:28:53 +0000 (+0100) Subject: MN10300: Optimise do_csum() X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=ab244c1a08a4e234cd3761a8aba3cb5a7bbe525a;p=openwrt%2Fstaging%2Fblogic.git MN10300: Optimise do_csum() Optimise do_csum() to gang up the loads so they're less likely to get interruptions between. Signed-off-by: Akira Takeuchi Signed-off-by: Kiyoshi Owada Signed-off-by: David Howells --- diff --git a/arch/mn10300/lib/do_csum.S b/arch/mn10300/lib/do_csum.S index e138994e1667..1d27bba0cd8f 100644 --- a/arch/mn10300/lib/do_csum.S +++ b/arch/mn10300/lib/do_csum.S @@ -10,26 +10,25 @@ */ #include - .section .text - .balign L1_CACHE_BYTES + .section .text + .balign L1_CACHE_BYTES ############################################################################### # -# unsigned int do_csum(const unsigned char *buff, size_t len) +# unsigned int do_csum(const unsigned char *buff, int len) # ############################################################################### .globl do_csum - .type do_csum,@function + .type do_csum,@function do_csum: movm [d2,d3],(sp) - mov d0,(12,sp) - mov d1,(16,sp) mov d1,d2 # count mov d0,a0 # buff + mov a0,a1 clr d1 # accumulator cmp +0,d2 - beq do_csum_done # return if zero-length buffer + ble do_csum_done # check for zero length or negative # 4-byte align the buffer pointer btst +3,a0 @@ -41,17 +40,15 @@ do_csum: inc a0 asl +8,d0 add d0,d1 - addc +0,d1 add -1,d2 -do_csum_addr_not_odd: +do_csum_addr_not_odd: cmp +2,d2 bcs do_csum_fewer_than_4 btst +2,a0 beq do_csum_now_4b_aligned movhu (a0+),d0 add d0,d1 - addc +0,d1 add -2,d2 cmp +4,d2 bcs do_csum_fewer_than_4 @@ -66,20 +63,20 @@ do_csum_now_4b_aligned: do_csum_loop: mov (a0+),d0 - add d0,d1 mov (a0+),e0 - addc e0,d1 mov (a0+),e1 - addc e1,d1 mov (a0+),e3 + add d0,d1 + addc e0,d1 + addc e1,d1 addc e3,d1 mov (a0+),d0 - addc d0,d1 mov (a0+),e0 - addc e0,d1 mov (a0+),e1 - addc e1,d1 mov (a0+),e3 + addc d0,d1 + addc e0,d1 + addc e1,d1 addc e3,d1 addc +0,d1 @@ -94,12 +91,12 @@ do_csum_remainder: cmp +16,d2 bcs do_csum_fewer_than_16 mov (a0+),d0 - add d0,d1 mov (a0+),e0 - addc e0,d1 mov (a0+),e1 - addc e1,d1 mov (a0+),e3 + add d0,d1 + addc e0,d1 + addc e1,d1 addc e3,d1 addc +0,d1 add -16,d2 @@ -131,9 +128,9 @@ do_csum_fewer_than_4: xor_cmp d0,d0,+2,d2 bcs do_csum_fewer_than_2 movhu (a0+),d0 -do_csum_fewer_than_2: and +1,d2 beq do_csum_add_last_bit +do_csum_fewer_than_2: movbu (a0),d3 add d3,d0 do_csum_add_last_bit: @@ -142,21 +139,19 @@ do_csum_add_last_bit: do_csum_done: # compress the checksum down to 16 bits - mov +0xffff0000,d2 - and d1,d2 + mov +0xffff0000,d0 + and d1,d0 asl +16,d1 - add d2,d1,d0 + add d1,d0 addc +0xffff,d0 lsr +16,d0 # flip the halves of the word result if the buffer was oddly aligned - mov (12,sp),d1 - and +1,d1 + and +1,a1 beq do_csum_not_oddly_aligned swaph d0,d0 # exchange bits 15:8 with 7:0 do_csum_not_oddly_aligned: ret [d2,d3],8 -do_csum_end: - .size do_csum, do_csum_end-do_csum + .size do_csum, .-do_csum