diff options
Diffstat (limited to 'include/asm-xtensa/xtensa/coreasm.h')
-rw-r--r-- | include/asm-xtensa/xtensa/coreasm.h | 526 |
1 files changed, 526 insertions, 0 deletions
diff --git a/include/asm-xtensa/xtensa/coreasm.h b/include/asm-xtensa/xtensa/coreasm.h new file mode 100644 index 000000000000..a8cfb54c20a1 --- /dev/null +++ b/include/asm-xtensa/xtensa/coreasm.h @@ -0,0 +1,526 @@ +#ifndef XTENSA_COREASM_H +#define XTENSA_COREASM_H + +/* + * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND + * + * include/asm-xtensa/xtensa/coreasm.h -- assembler-specific + * definitions that depend on CORE configuration. + * + * Source for configuration-independent binaries (which link in a + * configuration-specific HAL library) must NEVER include this file. + * It is perfectly normal, however, for the HAL itself to include this + * file. + * + * This file must NOT include xtensa/config/system.h. Any assembler + * header file that depends on system information should likely go in + * a new systemasm.h (or sysasm.h) header file. + * + * NOTE: macro beqi32 is NOT configuration-dependent, and is placed + * here til we will have configuration-independent header file. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of + * this archive for more details. + * + * Copyright (C) 2002 Tensilica Inc. + */ + + +#include <xtensa/config/core.h> +#include <xtensa/config/specreg.h> + +/* + * Assembly-language specific definitions (assembly macros, etc.). + */ + +/*---------------------------------------------------------------------- + * find_ms_setbit + * + * This macro finds the most significant bit that is set in <as> + * and return its index + <base> in <ad>, or <base> - 1 if <as> is zero. + * The index counts starting at zero for the lsbit, so the return + * value ranges from <base>-1 (no bit set) to <base>+31 (msbit set). + * + * Parameters: + * <ad> destination address register (any register) + * <as> source address register + * <at> temporary address register (must be different than <as>) + * <base> constant value added to result (usually 0 or 1) + * On entry: + * <ad> = undefined if different than <as> + * <as> = value whose most significant set bit is to be found + * <at> = undefined + * no other registers are used by this macro. + * On exit: + * <ad> = <base> + index of msbit set in original <as>, + * = <base> - 1 if original <as> was zero. + * <as> clobbered (if not <ad>) + * <at> clobbered (if not <ad>) + * Example: + * find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4 + */ + + .macro find_ms_setbit ad, as, at, base +#if XCHAL_HAVE_NSA + movi \at, 31+\base + nsau \as, \as // get index of \as, numbered from msbit (32 if absent) + sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent) +#else /* XCHAL_HAVE_NSA */ + movi \at, \base // start with result of 0 (point to lsbit of 32) + + beqz \as, 2f // special case for zero argument: return -1 + bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits) + addi \at, \at, 16 // no, increment result to upper 16 bits (of 32) + //srli \as, \as, 16 // check upper half (shift right 16 bits) + extui \as, \as, 16, 16 // check upper half (shift right 16 bits) +1: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits) + addi \at, \at, 8 // no, increment result to upper 8 bits (of 16) + srli \as, \as, 8 // shift right to check upper 8 bits +1: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits) + addi \at, \at, 4 // no, increment result to upper 4 bits (of 8) + srli \as, \as, 4 // shift right 4 bits to check upper half +1: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits) + addi \at, \at, 2 // no, increment result to upper 2 bits (of 4) + srli \as, \as, 2 // shift right 2 bits to check upper half +1: bltui \as, 0x2, 1f // is it the lsbit? + addi \at, \at, 2 // no, increment result to upper bit (of 2) +2: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1) + //srli \as, \as, 1 +1: // done! \at contains index of msbit set (or -1 if none set) + .if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15) + mov \ad, \at // then move result to \ad + .endif +#endif /* XCHAL_HAVE_NSA */ + .endm // find_ms_setbit + +/*---------------------------------------------------------------------- + * find_ls_setbit + * + * This macro finds the least significant bit that is set in <as>, + * and return its index in <ad>. + * Usage is the same as for the find_ms_setbit macro. + * Example: + * find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4 + */ + + .macro find_ls_setbit ad, as, at, base + neg \at, \as // keep only the least-significant bit that is set... + and \as, \at, \as // ... in \as + find_ms_setbit \ad, \as, \at, \base + .endm // find_ls_setbit + +/*---------------------------------------------------------------------- + * find_ls_one + * + * Same as find_ls_setbit with base zero. + * Source (as) and destination (ad) registers must be different. + * Provided for backward compatibility. + */ + + .macro find_ls_one ad, as + find_ls_setbit \ad, \as, \ad, 0 + .endm // find_ls_one + +/*---------------------------------------------------------------------- + * floop, floopnez, floopgtz, floopend + * + * These macros are used for fast inner loops that + * work whether or not the Loops options is configured. + * If the Loops option is configured, they simply use + * the zero-overhead LOOP instructions; otherwise + * they use explicit decrement and branch instructions. + * + * They are used in pairs, with floop, floopnez or floopgtz + * at the beginning of the loop, and floopend at the end. + * + * Each pair of loop macro calls must be given the loop count + * address register and a unique label for that loop. + * + * Example: + * + * movi a3, 16 // loop 16 times + * floop a3, myloop1 + * : + * bnez a7, end1 // exit loop if a7 != 0 + * : + * floopend a3, myloop1 + * end1: + * + * Like the LOOP instructions, these macros cannot be + * nested, must include at least one instruction, + * cannot call functions inside the loop, etc. + * The loop can be exited by jumping to the instruction + * following floopend (or elsewhere outside the loop), + * or continued by jumping to a NOP instruction placed + * immediately before floopend. + * + * Unlike LOOP instructions, the register passed to floop* + * cannot be used inside the loop, because it is used as + * the loop counter if the Loops option is not configured. + * And its value is undefined after exiting the loop. + * And because the loop counter register is active inside + * the loop, you can't easily use this construct to loop + * across a register file using ROTW as you might with LOOP + * instructions, unless you copy the loop register along. + */ + + /* Named label version of the macros: */ + + .macro floop ar, endlabel + floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel + .endm + + .macro floopnez ar, endlabel + floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel + .endm + + .macro floopgtz ar, endlabel + floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel + .endm + + .macro floopend ar, endlabel + floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel + .endm + + /* Numbered local label version of the macros: */ +#if 0 /*UNTESTED*/ + .macro floop89 ar + floop_ \ar, 8, 9f + .endm + + .macro floopnez89 ar + floopnez_ \ar, 8, 9f + .endm + + .macro floopgtz89 ar + floopgtz_ \ar, 8, 9f + .endm + + .macro floopend89 ar + floopend_ \ar, 8b, 9 + .endm +#endif /*0*/ + + /* Underlying version of the macros: */ + + .macro floop_ ar, startlabel, endlabelref + .ifdef _infloop_ + .if _infloop_ + .err // Error: floop cannot be nested + .endif + .endif + .set _infloop_, 1 +#if XCHAL_HAVE_LOOPS + loop \ar, \endlabelref +#else /* XCHAL_HAVE_LOOPS */ +\startlabel: + addi \ar, \ar, -1 +#endif /* XCHAL_HAVE_LOOPS */ + .endm // floop_ + + .macro floopnez_ ar, startlabel, endlabelref + .ifdef _infloop_ + .if _infloop_ + .err // Error: floopnez cannot be nested + .endif + .endif + .set _infloop_, 1 +#if XCHAL_HAVE_LOOPS + loopnez \ar, \endlabelref +#else /* XCHAL_HAVE_LOOPS */ + beqz \ar, \endlabelref +\startlabel: + addi \ar, \ar, -1 +#endif /* XCHAL_HAVE_LOOPS */ + .endm // floopnez_ + + .macro floopgtz_ ar, startlabel, endlabelref + .ifdef _infloop_ + .if _infloop_ + .err // Error: floopgtz cannot be nested + .endif + .endif + .set _infloop_, 1 +#if XCHAL_HAVE_LOOPS + loopgtz \ar, \endlabelref +#else /* XCHAL_HAVE_LOOPS */ + bltz \ar, \endlabelref + beqz \ar, \endlabelref +\startlabel: + addi \ar, \ar, -1 +#endif /* XCHAL_HAVE_LOOPS */ + .endm // floopgtz_ + + + .macro floopend_ ar, startlabelref, endlabel + .ifndef _infloop_ + .err // Error: floopend without matching floopXXX + .endif + .ifeq _infloop_ + .err // Error: floopend without matching floopXXX + .endif + .set _infloop_, 0 +#if ! XCHAL_HAVE_LOOPS + bnez \ar, \startlabelref +#endif /* XCHAL_HAVE_LOOPS */ +\endlabel: + .endm // floopend_ + +/*---------------------------------------------------------------------- + * crsil -- conditional RSIL (read/set interrupt level) + * + * Executes the RSIL instruction if it exists, else just reads PS. + * The RSIL instruction does not exist in the new exception architecture + * if the interrupt option is not selected. + */ + + .macro crsil ar, newlevel +#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS + rsil \ar, \newlevel +#else + rsr \ar, PS +#endif + .endm // crsil + +/*---------------------------------------------------------------------- + * window_spill{4,8,12} + * + * These macros spill callers' register windows to the stack. + * They work for both privileged and non-privileged tasks. + * Must be called from a windowed ABI context, eg. within + * a windowed ABI function (ie. valid stack frame, window + * exceptions enabled, not in exception mode, etc). + * + * This macro requires a single invocation of the window_spill_common + * macro in the same assembly unit and section. + * + * Note that using window_spill{4,8,12} macros is more efficient + * than calling a function implemented using window_spill_function, + * because the latter needs extra code to figure out the size of + * the call to the spilling function. + * + * Example usage: + * + * .text + * .align 4 + * .global some_function + * .type some_function,@function + * some_function: + * entry a1, 16 + * : + * : + * + * window_spill4 // spill windows of some_function's callers; preserves a0..a3 only; + * // to use window_spill{8,12} in this example function we'd have + * // to increase space allocated by the entry instruction, because + * // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed + * // for call8/window_spill8 or call12/window_spill12 respectively. + * : + * + * retw + * + * window_spill_common // instantiates code used by window_spill4 + * + * + * On entry: + * none (if window_spill4) + * stack frame has enough space allocated for call8 (if window_spill8) + * stack frame has enough space allocated for call12 (if window_spill12) + * On exit: + * a4..a15 clobbered (if window_spill4) + * a8..a15 clobbered (if window_spill8) + * a12..a15 clobbered (if window_spill12) + * no caller windows are in live registers + */ + + .macro window_spill4 +#if XCHAL_HAVE_WINDOWED +# if XCHAL_NUM_AREGS == 16 + movi a15, 0 // for 16-register files, no need to call to reach the end +# elif XCHAL_NUM_AREGS == 32 + call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers +# elif XCHAL_NUM_AREGS == 64 + call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers +# endif +#endif + .endm // window_spill4 + + .macro window_spill8 +#if XCHAL_HAVE_WINDOWED +# if XCHAL_NUM_AREGS == 16 + movi a15, 0 // for 16-register files, no need to call to reach the end +# elif XCHAL_NUM_AREGS == 32 + call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers +# elif XCHAL_NUM_AREGS == 64 + call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers +# endif +#endif + .endm // window_spill8 + + .macro window_spill12 +#if XCHAL_HAVE_WINDOWED +# if XCHAL_NUM_AREGS == 16 + movi a15, 0 // for 16-register files, no need to call to reach the end +# elif XCHAL_NUM_AREGS == 32 + call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers +# elif XCHAL_NUM_AREGS == 64 + call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers +# endif +#endif + .endm // window_spill12 + +/*---------------------------------------------------------------------- + * window_spill_function + * + * This macro outputs a function that will spill its caller's callers' + * register windows to the stack. Eg. it could be used to implement + * a version of xthal_window_spill() that works in non-privileged tasks. + * This works for both privileged and non-privileged tasks. + * + * Typical usage: + * + * .text + * .align 4 + * .global my_spill_function + * .type my_spill_function,@function + * my_spill_function: + * window_spill_function + * + * On entry to resulting function: + * none + * On exit from resulting function: + * none (no caller windows are in live registers) + */ + + .macro window_spill_function +#if XCHAL_HAVE_WINDOWED +# if XCHAL_NUM_AREGS == 32 + entry sp, 48 + bbci.l a0, 31, 1f // branch if called with call4 + bbsi.l a0, 30, 2f // branch if called with call12 + call8 .L__wdwspill_assist16 // called with call8, only need another 8 + retw +1: call12 .L__wdwspill_assist16 // called with call4, only need another 12 + retw +2: call4 .L__wdwspill_assist16 // called with call12, only need another 4 + retw +# elif XCHAL_NUM_AREGS == 64 + entry sp, 48 + bbci.l a0, 31, 1f // branch if called with call4 + bbsi.l a0, 30, 2f // branch if called with call12 + call4 .L__wdwspill_assist52 // called with call8, only need a call4 + retw +1: call8 .L__wdwspill_assist52 // called with call4, only need a call8 + retw +2: call12 .L__wdwspill_assist40 // called with call12, can skip a call12 + retw +# elif XCHAL_NUM_AREGS == 16 + entry sp, 16 + bbci.l a0, 31, 1f // branch if called with call4 + bbsi.l a0, 30, 2f // branch if called with call12 + movi a7, 0 // called with call8 + retw +1: movi a11, 0 // called with call4 +2: retw // if called with call12, everything already spilled + +// movi a15, 0 // trick to spill all but the direct caller +// j 1f +// // The entry instruction is magical in the assembler (gets auto-aligned) +// // so we have to jump to it to avoid falling through the padding. +// // We need entry/retw to know where to return. +//1: entry sp, 16 +// retw +# else +# error "unrecognized address register file size" +# endif +#endif /* XCHAL_HAVE_WINDOWED */ + window_spill_common + .endm // window_spill_function + +/*---------------------------------------------------------------------- + * window_spill_common + * + * Common code used by any number of invocations of the window_spill## + * and window_spill_function macros. + * + * Must be instantiated exactly once within a given assembly unit, + * within call/j range of and same section as window_spill## + * macro invocations for that assembly unit. + * (Is automatically instantiated by the window_spill_function macro.) + */ + + .macro window_spill_common +#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64) + .ifndef .L__wdwspill_defined +# if XCHAL_NUM_AREGS >= 64 +.L__wdwspill_assist60: + entry sp, 32 + call8 .L__wdwspill_assist52 + retw +.L__wdwspill_assist56: + entry sp, 16 + call4 .L__wdwspill_assist52 + retw +.L__wdwspill_assist52: + entry sp, 48 + call12 .L__wdwspill_assist40 + retw +.L__wdwspill_assist40: + entry sp, 48 + call12 .L__wdwspill_assist28 + retw +# endif +.L__wdwspill_assist28: + entry sp, 48 + call12 .L__wdwspill_assist16 + retw +.L__wdwspill_assist24: + entry sp, 32 + call8 .L__wdwspill_assist16 + retw +.L__wdwspill_assist20: + entry sp, 16 + call4 .L__wdwspill_assist16 + retw +.L__wdwspill_assist16: + entry sp, 16 + movi a15, 0 + retw + .set .L__wdwspill_defined, 1 + .endif +#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */ + .endm // window_spill_common + +/*---------------------------------------------------------------------- + * beqi32 + * + * macro implements version of beqi for arbitrary 32-bit immidiate value + * + * beqi32 ax, ay, imm32, label + * + * Compares value in register ax with imm32 value and jumps to label if + * equal. Clobberes register ay if needed + * + */ + .macro beqi32 ax, ay, imm, label + .ifeq ((\imm-1) & ~7) // 1..8 ? + beqi \ax, \imm, \label + .else + .ifeq (\imm+1) // -1 ? + beqi \ax, \imm, \label + .else + .ifeq (\imm) // 0 ? + beqz \ax, \label + .else + // We could also handle immediates 10,12,16,32,64,128,256 + // but it would be a long macro... + movi \ay, \imm + beq \ax, \ay, \label + .endif + .endif + .endif + .endm // beqi32 + +#endif /*XTENSA_COREASM_H*/ + |