diff options
Diffstat (limited to 'arch/cris/arch-v32/lib')
-rw-r--r-- | arch/cris/arch-v32/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/cris/arch-v32/lib/checksum.S | 72 | ||||
-rw-r--r-- | arch/cris/arch-v32/lib/checksumcopy.S | 69 | ||||
-rw-r--r-- | arch/cris/arch-v32/lib/delay.c | 28 | ||||
-rw-r--r-- | arch/cris/arch-v32/lib/dram_init.S | 119 | ||||
-rw-r--r-- | arch/cris/arch-v32/lib/hw_settings.S | 72 | ||||
-rw-r--r-- | arch/cris/arch-v32/lib/spinlock.S | 10 |
7 files changed, 78 insertions, 295 deletions
diff --git a/arch/cris/arch-v32/lib/Makefile b/arch/cris/arch-v32/lib/Makefile index 05b3ec6978d6..eb4aad1f1158 100644 --- a/arch/cris/arch-v32/lib/Makefile +++ b/arch/cris/arch-v32/lib/Makefile @@ -2,5 +2,6 @@ # Makefile for Etrax-specific library files.. # -lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o spinlock.o +lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o \ + csumcpfruser.o spinlock.o delay.o diff --git a/arch/cris/arch-v32/lib/checksum.S b/arch/cris/arch-v32/lib/checksum.S index 32e66181b826..87f3fd71ab10 100644 --- a/arch/cris/arch-v32/lib/checksum.S +++ b/arch/cris/arch-v32/lib/checksum.S @@ -1,6 +1,6 @@ /* * A fast checksum routine using movem - * Copyright (c) 1998-2001, 2003 Axis Communications AB + * Copyright (c) 1998-2007 Axis Communications AB * * csum_partial(const unsigned char * buff, int len, unsigned int sum) */ @@ -12,30 +12,23 @@ csum_partial: ;; r11 - length ;; r12 - checksum - ;; check for breakeven length between movem and normal word looping versions - ;; we also do _NOT_ want to compute a checksum over more than the - ;; actual length when length < 40 - - cmpu.w 80,$r11 - blo _word_loop - nop - - ;; need to save the registers we use below in the movem loop - ;; this overhead is why we have a check above for breakeven length - ;; only r0 - r8 have to be saved, the other ones are clobber-able - ;; according to the ABI + ;; Optimized for large packets + subq 10*4, $r11 + blt _word_loop + move.d $r11, $acr subq 9*4,$sp - subq 10*4,$r11 ; update length for the first loop + clearf c movem $r8,[$sp] ;; do a movem checksum _mloop: movem [$r10+],$r9 ; read 10 longwords - + ;; Loop count without touching the c flag. + addoq -10*4, $acr, $acr ;; perform dword checksumming on the 10 longwords - add.d $r0,$r12 + addc $r0,$r12 addc $r1,$r12 addc $r2,$r12 addc $r3,$r12 @@ -46,60 +39,41 @@ _mloop: movem [$r10+],$r9 ; read 10 longwords addc $r8,$r12 addc $r9,$r12 - ;; fold the carry into the checksum, to avoid having to loop the carry - ;; back into the top - - addc 0,$r12 - addc 0,$r12 ; do it again, since we might have generated a carry - - subq 10*4,$r11 - bge _mloop - nop - - addq 10*4,$r11 ; compensate for last loop underflowing length + ;; test $acr without trashing carry. + move.d $acr, $acr + bpl _mloop + ;; r11 <= acr is not really needed in the mloop, just using the dslot + ;; to prepare for what is needed after mloop. + move.d $acr, $r11 + ;; fold the last carry into r13 + addc 0, $r12 movem [$sp+],$r8 ; restore regs _word_loop: - ;; only fold if there is anything to fold. - - cmpq 0,$r12 - beq _no_fold - - ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below. - ;; r9 and r13 can be used as temporaries. + addq 10*4,$r11 ; compensate for last loop underflowing length moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9 lsrq 16,$r9 move.d $r12,$r13 lsrq 16,$r13 ; r13 = checksum >> 16 - and.d $r9,$r12 ; checksum = checksum & 0xffff - add.d $r13,$r12 ; checksum += r13 - move.d $r12,$r13 ; do the same again, maybe we got a carry last add - lsrq 16,$r13 - and.d $r9,$r12 - add.d $r13,$r12 + and.d $r9,$r12 ; checksum = checksum & 0xffff _no_fold: - cmpq 2,$r11 + subq 2,$r11 blt _no_words - nop + add.d $r13,$r12 ; checksum += r13 ;; checksum the rest of the words - - subq 2,$r11 - _wloop: subq 2,$r11 bge _wloop addu.w [$r10+],$r12 - addq 2,$r11 - _no_words: + addq 2,$r11 ;; see if we have one odd byte more - cmpq 1,$r11 - beq _do_byte + bne _do_byte nop ret move.d $r12,$r10 diff --git a/arch/cris/arch-v32/lib/checksumcopy.S b/arch/cris/arch-v32/lib/checksumcopy.S index 9303ccbadc6d..21aabe91489b 100644 --- a/arch/cris/arch-v32/lib/checksumcopy.S +++ b/arch/cris/arch-v32/lib/checksumcopy.S @@ -1,6 +1,6 @@ /* * A fast checksum+copy routine using movem - * Copyright (c) 1998, 2001, 2003 Axis Communications AB + * Copyright (c) 1998-2007 Axis Communications AB * * Authors: Bjorn Wesen * @@ -16,32 +16,23 @@ csum_partial_copy_nocheck: ;; r12 - length ;; r13 - checksum - ;; check for breakeven length between movem and normal word looping versions - ;; we also do _NOT_ want to compute a checksum over more than the - ;; actual length when length < 40 - - cmpu.w 80,$r12 - blo _word_loop - nop - - ;; need to save the registers we use below in the movem loop - ;; this overhead is why we have a check above for breakeven length - ;; only r0 - r8 have to be saved, the other ones are clobber-able - ;; according to the ABI + ;; Optimized for large packets + subq 10*4, $r12 + blt _word_loop + move.d $r12, $acr subq 9*4,$sp - subq 10*4,$r12 ; update length for the first loop + clearf c movem $r8,[$sp] ;; do a movem copy and checksum - 1: ;; A failing userspace access (the read) will have this as PC. _mloop: movem [$r10+],$r9 ; read 10 longwords + addoq -10*4, $acr, $acr ; loop counter in latency cycle movem $r9,[$r11+] ; write 10 longwords ;; perform dword checksumming on the 10 longwords - - add.d $r0,$r13 + addc $r0,$r13 addc $r1,$r13 addc $r2,$r13 addc $r3,$r13 @@ -52,47 +43,30 @@ _mloop: movem [$r10+],$r9 ; read 10 longwords addc $r8,$r13 addc $r9,$r13 - ;; fold the carry into the checksum, to avoid having to loop the carry - ;; back into the top - - addc 0,$r13 - addc 0,$r13 ; do it again, since we might have generated a carry - - subq 10*4,$r12 - bge _mloop - nop - - addq 10*4,$r12 ; compensate for last loop underflowing length + ;; test $acr, without trashing carry. + move.d $acr, $acr + bpl _mloop + ;; r12 <= acr is needed after mloop and in the exception handlers. + move.d $acr, $r12 + ;; fold the last carry into r13 + addc 0, $r13 movem [$sp+],$r8 ; restore regs _word_loop: - ;; only fold if there is anything to fold. - - cmpq 0,$r13 - beq _no_fold + addq 10*4,$r12 ; compensate for last loop underflowing length ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below ;; r9 can be used as temporary. - move.d $r13,$r9 lsrq 16,$r9 ; r0 = checksum >> 16 and.d 0xffff,$r13 ; checksum = checksum & 0xffff - add.d $r9,$r13 ; checksum += r0 - move.d $r13,$r9 ; do the same again, maybe we got a carry last add - lsrq 16,$r9 - and.d 0xffff,$r13 - add.d $r9,$r13 -_no_fold: - cmpq 2,$r12 + subq 2, $r12 blt _no_words - nop + add.d $r9,$r13 ; checksum += r0 ;; copy and checksum the rest of the words - - subq 2,$r12 - 2: ;; A failing userspace access for the read below will have this as PC. _wloop: move.w [$r10+],$r9 addu.w $r9,$r13 @@ -100,12 +74,9 @@ _wloop: move.w [$r10+],$r9 bge _wloop move.w $r9,[$r11+] - addq 2,$r12 - _no_words: - ;; see if we have one odd byte more - cmpq 1,$r12 - beq _do_byte + addq 2,$r12 + bne _do_byte nop ret move.d $r13,$r10 diff --git a/arch/cris/arch-v32/lib/delay.c b/arch/cris/arch-v32/lib/delay.c new file mode 100644 index 000000000000..39f1ac9995b4 --- /dev/null +++ b/arch/cris/arch-v32/lib/delay.c @@ -0,0 +1,28 @@ +/* + * Precise Delay Loops for ETRAX FS + * + * Copyright (C) 2006 Axis Communications AB. + * + */ + +#include <hwregs/reg_map.h> +#include <hwregs/reg_rdwr.h> +#include <hwregs/timer_defs.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/module.h> + +/* + * On ETRAX FS, we can check the free-running read-only 100MHz timer + * getting 32-bit 10ns precision, theoretically good for 42.94967295 + * seconds. Unsigned arithmetic and careful expression handles + * wrapping. + */ + +void cris_delay10ns(u32 n10ns) +{ + u32 t0 = REG_RD(timer, regi_timer0, r_time); + while (REG_RD(timer, regi_timer0, r_time) - t0 < n10ns) + ; +} +EXPORT_SYMBOL(cris_delay10ns); diff --git a/arch/cris/arch-v32/lib/dram_init.S b/arch/cris/arch-v32/lib/dram_init.S deleted file mode 100644 index 218fbe259ee5..000000000000 --- a/arch/cris/arch-v32/lib/dram_init.S +++ /dev/null @@ -1,119 +0,0 @@ -/* $Id: dram_init.S,v 1.4 2005/04/24 18:48:32 starvik Exp $ - * - * DRAM/SDRAM initialization - alter with care - * This file is intended to be included from other assembler files - * - * Note: This file may not modify r8 or r9 because they are used to - * carry information from the decompresser to the kernel - * - * Copyright (C) 2000-2003 Axis Communications AB - * - * Authors: Mikael Starvik (starvik@axis.com) - */ - -/* Just to be certain the config file is included, we include it here - * explicitly instead of depending on it being included in the file that - * uses this code. - */ - -#include <asm/arch/hwregs/asm/reg_map_asm.h> -#include <asm/arch/hwregs/asm/bif_core_defs_asm.h> - - ;; WARNING! The registers r8 and r9 are used as parameters carrying - ;; information from the decompressor (if the kernel was compressed). - ;; They should not be used in the code below. - - ; Refer to BIF MDS for a description of SDRAM initialization - - ; Bank configuration - move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp0), $r0 - move.d CONFIG_ETRAX_SDRAM_GRP0_CONFIG, $r1 - move.d $r1, [$r0] - move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp1), $r0 - move.d CONFIG_ETRAX_SDRAM_GRP1_CONFIG, $r1 - move.d $r1, [$r0] - - ; Calculate value of mrs_data - ; CAS latency = 2 && bus_width = 32 => 0x40 - ; CAS latency = 3 && bus_width = 32 => 0x60 - ; CAS latency = 2 && bus_width = 16 => 0x20 - ; CAS latency = 3 && bus_width = 16 => 0x30 - - ; Check if value is already supplied in kernel config - move.d CONFIG_ETRAX_SDRAM_COMMAND, $r2 - bne _set_timing - nop - - move.d 0x40, $r4 ; Assume 32 bits and CAS latency = 2 - move.d CONFIG_ETRAX_SDRAM_TIMING, $r1 - and.d 0x07, $r1 ; Get CAS latency - cmpq 2, $r1 ; CL = 2 ? - beq _bw_check - nop - move.d 0x60, $r4 - -_bw_check: - ; Assume that group 0 width is equal to group 1. This assumption - ; is wrong for a group 1 only hardware (such as the grand old - ; StorPoint+). - move.d CONFIG_ETRAX_SDRAM_GRP0_CONFIG, $r1 - and.d 0x200, $r1 ; DRAM width is bit 9 - beq _set_timing - lslq 2, $r4 ; mrs_data starts at bit 2 - lsrq 1, $r4 ; 16 bits. Shift down value. - - ; Set timing parameters (refresh off to avoid Guinness TR 83) -_set_timing: - move.d CONFIG_ETRAX_SDRAM_TIMING, $r1 - and.d ~(3 << reg_bif_core_rw_sdram_timing___ref___lsb), $r1 - move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing), $r0 - move.d $r1, [$r0] - - ; Issue NOP command - move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cmd), $r5 - moveq regk_bif_core_nop, $r1 - move.d $r1, [$r5] - - ; Wait 200us - move.d 10000, $r2 -1: bne 1b - subq 1, $r2 - - ; Issue initialization command sequence - move.d _sdram_commands_start, $r2 - and.d 0x000fffff, $r2 ; Make sure commands are read from flash - move.d _sdram_commands_end, $r3 - and.d 0x000fffff, $r3 -1: clear.d $r6 - move.b [$r2+], $r6 ; Load command - or.d $r4, $r6 ; Add calculated mrs - move.d $r6, [$r5] ; Write rw_sdram_cmd - ; Wait 80 ns between each command - move.d 4000, $r7 -2: bne 2b - subq 1, $r7 - cmp.d $r2, $r3 ; Last command? - bne 1b - nop - - ; Start refresh - move.d CONFIG_ETRAX_SDRAM_TIMING, $r1 - move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing), $r0 - move.d $r1, [$r0] - - ; Initialization finished - ba _sdram_commands_end - nop - -_sdram_commands_start: - .byte regk_bif_core_pre ; Precharge - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_ref ; refresh - .byte regk_bif_core_mrs ; mrs -_sdram_commands_end: diff --git a/arch/cris/arch-v32/lib/hw_settings.S b/arch/cris/arch-v32/lib/hw_settings.S deleted file mode 100644 index fff9443513d1..000000000000 --- a/arch/cris/arch-v32/lib/hw_settings.S +++ /dev/null @@ -1,72 +0,0 @@ -/* - * $Id: hw_settings.S,v 1.3 2005/04/24 18:36:57 starvik Exp $ - * - * This table is used by some tools to extract hardware parameters. - * The table should be included in the kernel and the decompressor. - * Don't forget to update the tools if you change this table. - * - * Copyright (C) 2001 Axis Communications AB - * - * Authors: Mikael Starvik (starvik@axis.com) - */ - -#include <asm/arch/hwregs/asm/reg_map_asm.h> -#include <asm/arch/hwregs/asm/bif_core_defs_asm.h> -#include <asm/arch/hwregs/asm/gio_defs_asm.h> - - .ascii "HW_PARAM_MAGIC" ; Magic number - .dword 0xc0004000 ; Kernel start address - - ; Debug port -#ifdef CONFIG_ETRAX_DEBUG_PORT0 - .dword 0 -#elif defined(CONFIG_ETRAX_DEBUG_PORT1) - .dword 1 -#elif defined(CONFIG_ETRAX_DEBUG_PORT2) - .dword 2 -#elif defined(CONFIG_ETRAX_DEBUG_PORT3) - .dword 3 -#else - .dword 4 ; No debug -#endif - - ; Register values - .dword REG_ADDR(bif_core, regi_bif_core, rw_grp1_cfg) - .dword CONFIG_ETRAX_MEM_GRP1_CONFIG - .dword REG_ADDR(bif_core, regi_bif_core, rw_grp2_cfg) - .dword CONFIG_ETRAX_MEM_GRP2_CONFIG - .dword REG_ADDR(bif_core, regi_bif_core, rw_grp3_cfg) - .dword CONFIG_ETRAX_MEM_GRP3_CONFIG - .dword REG_ADDR(bif_core, regi_bif_core, rw_grp4_cfg) - .dword CONFIG_ETRAX_MEM_GRP4_CONFIG - .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp0) - .dword CONFIG_ETRAX_SDRAM_GRP0_CONFIG - .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp1) - .dword CONFIG_ETRAX_SDRAM_GRP1_CONFIG - .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing) - .dword CONFIG_ETRAX_SDRAM_TIMING - .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cmd) - .dword CONFIG_ETRAX_SDRAM_COMMAND - - .dword REG_ADDR(gio, regi_gio, rw_pa_dout) - .dword CONFIG_ETRAX_DEF_GIO_PA_OUT - .dword REG_ADDR(gio, regi_gio, rw_pa_oe) - .dword CONFIG_ETRAX_DEF_GIO_PA_OE - .dword REG_ADDR(gio, regi_gio, rw_pb_dout) - .dword CONFIG_ETRAX_DEF_GIO_PB_OUT - .dword REG_ADDR(gio, regi_gio, rw_pb_oe) - .dword CONFIG_ETRAX_DEF_GIO_PB_OE - .dword REG_ADDR(gio, regi_gio, rw_pc_dout) - .dword CONFIG_ETRAX_DEF_GIO_PC_OUT - .dword REG_ADDR(gio, regi_gio, rw_pc_oe) - .dword CONFIG_ETRAX_DEF_GIO_PC_OE - .dword REG_ADDR(gio, regi_gio, rw_pd_dout) - .dword CONFIG_ETRAX_DEF_GIO_PD_OUT - .dword REG_ADDR(gio, regi_gio, rw_pd_oe) - .dword CONFIG_ETRAX_DEF_GIO_PD_OE - .dword REG_ADDR(gio, regi_gio, rw_pe_dout) - .dword CONFIG_ETRAX_DEF_GIO_PE_OUT - .dword REG_ADDR(gio, regi_gio, rw_pe_oe) - .dword CONFIG_ETRAX_DEF_GIO_PE_OE - - .dword 0 ; No more register values diff --git a/arch/cris/arch-v32/lib/spinlock.S b/arch/cris/arch-v32/lib/spinlock.S index 2437ae7f6ed2..79087ef59a1c 100644 --- a/arch/cris/arch-v32/lib/spinlock.S +++ b/arch/cris/arch-v32/lib/spinlock.S @@ -12,11 +12,11 @@ cris_spin_lock: clearf p -1: test.d [$r10] +1: test.b [$r10] beq 1b clearf p ax - clear.d [$r10] + clear.b [$r10] bcs 1b clearf p ret @@ -24,10 +24,10 @@ cris_spin_lock: cris_spin_trylock: clearf p -1: move.d [$r10], $r11 +1: move.b [$r10], $r11 ax - clear.d [$r10] + clear.b [$r10] bcs 1b clearf p ret - move.d $r11,$r10 + movu.b $r11,$r10 |