| .file "reg_round.S" |
| /*---------------------------------------------------------------------------+ |
| | reg_round.S | |
| | | |
| | Rounding/truncation/etc for FPU basic arithmetic functions. | |
| | | |
| | Copyright (C) 1993,1995,1997 | |
| | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| | Australia. E-mail billm@suburbia.net | |
| | | |
| | This code has four possible entry points. | |
| | The following must be entered by a jmp instruction: | |
| | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | |
| | | |
| | The FPU_round entry point is intended to be used by C code. | |
| | From C, call as: | |
| | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | |
| | | |
| | Return value is the tag of the answer, or-ed with FPU_Exception if | |
| | one was raised, or -1 on internal error. | |
| | | |
| | For correct "up" and "down" rounding, the argument must have the correct | |
| | sign. | |
| | | |
| +---------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------+ |
| | Four entry points. | |
| | | |
| | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | |
| | %eax:%ebx 64 bit significand | |
| | %edx 32 bit extension of the significand | |
| | %edi pointer to an FPU_REG for the result to be stored | |
| | stack calling function must have set up a C stack frame and | |
| | pushed %esi, %edi, and %ebx | |
| | | |
| | Needed just for the fpu_reg_round_sqrt entry point: | |
| | %cx A control word in the same format as the FPU control word. | |
| | Otherwise, PARAM4 must give such a value. | |
| | | |
| | | |
| | The significand and its extension are assumed to be exact in the | |
| | following sense: | |
| | If the significand by itself is the exact result then the significand | |
| | extension (%edx) must contain 0, otherwise the significand extension | |
| | must be non-zero. | |
| | If the significand extension is non-zero then the significand is | |
| | smaller than the magnitude of the correct exact result by an amount | |
| | greater than zero and less than one ls bit of the significand. | |
| | The significand extension is only required to have three possible | |
| | non-zero values: | |
| | less than 0x80000000 <=> the significand is less than 1/2 an ls | |
| | bit smaller than the magnitude of the | |
| | true exact result. | |
| | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | |
| | smaller than the magnitude of the true | |
| | exact result. | |
| | greater than 0x80000000 <=> the significand is more than 1/2 an ls | |
| | bit smaller than the magnitude of the | |
| | true exact result. | |
| | | |
| +---------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------+ |
| | The code in this module has become quite complex, but it should handle | |
| | all of the FPU flags which are set at this stage of the basic arithmetic | |
| | computations. | |
| | There are a few rare cases where the results are not set identically to | |
| | a real FPU. These require a bit more thought because at this stage the | |
| | results of the code here appear to be more consistent... | |
| | This may be changed in a future version. | |
| +---------------------------------------------------------------------------*/ |
| |
| |
| #include "fpu_emu.h" |
| #include "exception.h" |
| #include "control_w.h" |
| |
| /* Flags for FPU_bits_lost */ |
| #define LOST_DOWN $1 |
| #define LOST_UP $2 |
| |
| /* Flags for FPU_denormal */ |
| #define DENORMAL $1 |
| #define UNMASKED_UNDERFLOW $2 |
| |
| |
| #ifndef NON_REENTRANT_FPU |
| /* Make the code re-entrant by putting |
| local storage on the stack: */ |
| #define FPU_bits_lost (%esp) |
| #define FPU_denormal 1(%esp) |
| |
| #else |
| /* Not re-entrant, so we can gain speed by putting |
| local storage in a static area: */ |
| .data |
| .align 4,0 |
| FPU_bits_lost: |
| .byte 0 |
| FPU_denormal: |
| .byte 0 |
| #endif /* NON_REENTRANT_FPU */ |
| |
| |
| .text |
| .globl fpu_reg_round |
| .globl fpu_Arith_exit |
| |
| /* Entry point when called from C */ |
| ENTRY(FPU_round) |
| pushl %ebp |
| movl %esp,%ebp |
| pushl %esi |
| pushl %edi |
| pushl %ebx |
| |
| movl PARAM1,%edi |
| movl SIGH(%edi),%eax |
| movl SIGL(%edi),%ebx |
| movl PARAM2,%edx |
| |
| fpu_reg_round: /* Normal entry point */ |
| movl PARAM4,%ecx |
| |
| #ifndef NON_REENTRANT_FPU |
| pushl %ebx /* adjust the stack pointer */ |
| #endif /* NON_REENTRANT_FPU */ |
| |
| #ifdef PARANOID |
| /* Cannot use this here yet */ |
| /* orl %eax,%eax */ |
| /* jns L_entry_bugged */ |
| #endif /* PARANOID */ |
| |
| cmpw EXP_UNDER,EXP(%edi) |
| jle L_Make_denorm /* The number is a de-normal */ |
| |
| movb $0,FPU_denormal /* 0 -> not a de-normal */ |
| |
| Denorm_done: |
| movb $0,FPU_bits_lost /* No bits yet lost in rounding */ |
| |
| movl %ecx,%esi |
| andl CW_PC,%ecx |
| cmpl PR_64_BITS,%ecx |
| je LRound_To_64 |
| |
| cmpl PR_53_BITS,%ecx |
| je LRound_To_53 |
| |
| cmpl PR_24_BITS,%ecx |
| je LRound_To_24 |
| |
| #ifdef PECULIAR_486 |
| /* With the precision control bits set to 01 "(reserved)", a real 80486 |
| behaves as if the precision control bits were set to 11 "64 bits" */ |
| cmpl PR_RESERVED_BITS,%ecx |
| je LRound_To_64 |
| #ifdef PARANOID |
| jmp L_bugged_denorm_486 |
| #endif /* PARANOID */ |
| #else |
| #ifdef PARANOID |
| jmp L_bugged_denorm /* There is no bug, just a bad control word */ |
| #endif /* PARANOID */ |
| #endif /* PECULIAR_486 */ |
| |
| |
| /* Round etc to 24 bit precision */ |
| LRound_To_24: |
| movl %esi,%ecx |
| andl CW_RC,%ecx |
| cmpl RC_RND,%ecx |
| je LRound_nearest_24 |
| |
| cmpl RC_CHOP,%ecx |
| je LCheck_truncate_24 |
| |
| cmpl RC_UP,%ecx /* Towards +infinity */ |
| je LUp_24 |
| |
| cmpl RC_DOWN,%ecx /* Towards -infinity */ |
| je LDown_24 |
| |
| #ifdef PARANOID |
| jmp L_bugged_round24 |
| #endif /* PARANOID */ |
| |
| LUp_24: |
| cmpb SIGN_POS,PARAM5 |
| jne LCheck_truncate_24 /* If negative then up==truncate */ |
| |
| jmp LCheck_24_round_up |
| |
| LDown_24: |
| cmpb SIGN_POS,PARAM5 |
| je LCheck_truncate_24 /* If positive then down==truncate */ |
| |
| LCheck_24_round_up: |
| movl %eax,%ecx |
| andl $0x000000ff,%ecx |
| orl %ebx,%ecx |
| orl %edx,%ecx |
| jnz LDo_24_round_up |
| jmp L_Re_normalise |
| |
| LRound_nearest_24: |
| /* Do rounding of the 24th bit if needed (nearest or even) */ |
| movl %eax,%ecx |
| andl $0x000000ff,%ecx |
| cmpl $0x00000080,%ecx |
| jc LCheck_truncate_24 /* less than half, no increment needed */ |
| |
| jne LGreater_Half_24 /* greater than half, increment needed */ |
| |
| /* Possibly half, we need to check the ls bits */ |
| orl %ebx,%ebx |
| jnz LGreater_Half_24 /* greater than half, increment needed */ |
| |
| orl %edx,%edx |
| jnz LGreater_Half_24 /* greater than half, increment needed */ |
| |
| /* Exactly half, increment only if 24th bit is 1 (round to even) */ |
| testl $0x00000100,%eax |
| jz LDo_truncate_24 |
| |
| LGreater_Half_24: /* Rounding: increment at the 24th bit */ |
| LDo_24_round_up: |
| andl $0xffffff00,%eax /* Truncate to 24 bits */ |
| xorl %ebx,%ebx |
| movb LOST_UP,FPU_bits_lost |
| addl $0x00000100,%eax |
| jmp LCheck_Round_Overflow |
| |
| LCheck_truncate_24: |
| movl %eax,%ecx |
| andl $0x000000ff,%ecx |
| orl %ebx,%ecx |
| orl %edx,%ecx |
| jz L_Re_normalise /* No truncation needed */ |
| |
| LDo_truncate_24: |
| andl $0xffffff00,%eax /* Truncate to 24 bits */ |
| xorl %ebx,%ebx |
| movb LOST_DOWN,FPU_bits_lost |
| jmp L_Re_normalise |
| |
| |
| /* Round etc to 53 bit precision */ |
| LRound_To_53: |
| movl %esi,%ecx |
| andl CW_RC,%ecx |
| cmpl RC_RND,%ecx |
| je LRound_nearest_53 |
| |
| cmpl RC_CHOP,%ecx |
| je LCheck_truncate_53 |
| |
| cmpl RC_UP,%ecx /* Towards +infinity */ |
| je LUp_53 |
| |
| cmpl RC_DOWN,%ecx /* Towards -infinity */ |
| je LDown_53 |
| |
| #ifdef PARANOID |
| jmp L_bugged_round53 |
| #endif /* PARANOID */ |
| |
| LUp_53: |
| cmpb SIGN_POS,PARAM5 |
| jne LCheck_truncate_53 /* If negative then up==truncate */ |
| |
| jmp LCheck_53_round_up |
| |
| LDown_53: |
| cmpb SIGN_POS,PARAM5 |
| je LCheck_truncate_53 /* If positive then down==truncate */ |
| |
| LCheck_53_round_up: |
| movl %ebx,%ecx |
| andl $0x000007ff,%ecx |
| orl %edx,%ecx |
| jnz LDo_53_round_up |
| jmp L_Re_normalise |
| |
| LRound_nearest_53: |
| /* Do rounding of the 53rd bit if needed (nearest or even) */ |
| movl %ebx,%ecx |
| andl $0x000007ff,%ecx |
| cmpl $0x00000400,%ecx |
| jc LCheck_truncate_53 /* less than half, no increment needed */ |
| |
| jnz LGreater_Half_53 /* greater than half, increment needed */ |
| |
| /* Possibly half, we need to check the ls bits */ |
| orl %edx,%edx |
| jnz LGreater_Half_53 /* greater than half, increment needed */ |
| |
| /* Exactly half, increment only if 53rd bit is 1 (round to even) */ |
| testl $0x00000800,%ebx |
| jz LTruncate_53 |
| |
| LGreater_Half_53: /* Rounding: increment at the 53rd bit */ |
| LDo_53_round_up: |
| movb LOST_UP,FPU_bits_lost |
| andl $0xfffff800,%ebx /* Truncate to 53 bits */ |
| addl $0x00000800,%ebx |
| adcl $0,%eax |
| jmp LCheck_Round_Overflow |
| |
| LCheck_truncate_53: |
| movl %ebx,%ecx |
| andl $0x000007ff,%ecx |
| orl %edx,%ecx |
| jz L_Re_normalise |
| |
| LTruncate_53: |
| movb LOST_DOWN,FPU_bits_lost |
| andl $0xfffff800,%ebx /* Truncate to 53 bits */ |
| jmp L_Re_normalise |
| |
| |
| /* Round etc to 64 bit precision */ |
| LRound_To_64: |
| movl %esi,%ecx |
| andl CW_RC,%ecx |
| cmpl RC_RND,%ecx |
| je LRound_nearest_64 |
| |
| cmpl RC_CHOP,%ecx |
| je LCheck_truncate_64 |
| |
| cmpl RC_UP,%ecx /* Towards +infinity */ |
| je LUp_64 |
| |
| cmpl RC_DOWN,%ecx /* Towards -infinity */ |
| je LDown_64 |
| |
| #ifdef PARANOID |
| jmp L_bugged_round64 |
| #endif /* PARANOID */ |
| |
| LUp_64: |
| cmpb SIGN_POS,PARAM5 |
| jne LCheck_truncate_64 /* If negative then up==truncate */ |
| |
| orl %edx,%edx |
| jnz LDo_64_round_up |
| jmp L_Re_normalise |
| |
| LDown_64: |
| cmpb SIGN_POS,PARAM5 |
| je LCheck_truncate_64 /* If positive then down==truncate */ |
| |
| orl %edx,%edx |
| jnz LDo_64_round_up |
| jmp L_Re_normalise |
| |
| LRound_nearest_64: |
| cmpl $0x80000000,%edx |
| jc LCheck_truncate_64 |
| |
| jne LDo_64_round_up |
| |
| /* Now test for round-to-even */ |
| testb $1,%bl |
| jz LCheck_truncate_64 |
| |
| LDo_64_round_up: |
| movb LOST_UP,FPU_bits_lost |
| addl $1,%ebx |
| adcl $0,%eax |
| |
| LCheck_Round_Overflow: |
| jnc L_Re_normalise |
| |
| /* Overflow, adjust the result (significand to 1.0) */ |
| rcrl $1,%eax |
| rcrl $1,%ebx |
| incw EXP(%edi) |
| jmp L_Re_normalise |
| |
| LCheck_truncate_64: |
| orl %edx,%edx |
| jz L_Re_normalise |
| |
| LTruncate_64: |
| movb LOST_DOWN,FPU_bits_lost |
| |
| L_Re_normalise: |
| testb $0xff,FPU_denormal |
| jnz Normalise_result |
| |
| L_Normalised: |
| movl TAG_Valid,%edx |
| |
| L_deNormalised: |
| cmpb LOST_UP,FPU_bits_lost |
| je L_precision_lost_up |
| |
| cmpb LOST_DOWN,FPU_bits_lost |
| je L_precision_lost_down |
| |
| L_no_precision_loss: |
| /* store the result */ |
| |
| L_Store_significand: |
| movl %eax,SIGH(%edi) |
| movl %ebx,SIGL(%edi) |
| |
| cmpw EXP_OVER,EXP(%edi) |
| jge L_overflow |
| |
| movl %edx,%eax |
| |
| /* Convert the exponent to 80x87 form. */ |
| addw EXTENDED_Ebias,EXP(%edi) |
| andw $0x7fff,EXP(%edi) |
| |
| fpu_reg_round_signed_special_exit: |
| |
| cmpb SIGN_POS,PARAM5 |
| je fpu_reg_round_special_exit |
| |
| orw $0x8000,EXP(%edi) /* Negative sign for the result. */ |
| |
| fpu_reg_round_special_exit: |
| |
| #ifndef NON_REENTRANT_FPU |
| popl %ebx /* adjust the stack pointer */ |
| #endif /* NON_REENTRANT_FPU */ |
| |
| fpu_Arith_exit: |
| popl %ebx |
| popl %edi |
| popl %esi |
| leave |
| ret |
| |
| |
| /* |
| * Set the FPU status flags to represent precision loss due to |
| * round-up. |
| */ |
| L_precision_lost_up: |
| push %edx |
| push %eax |
| call set_precision_flag_up |
| popl %eax |
| popl %edx |
| jmp L_no_precision_loss |
| |
| /* |
| * Set the FPU status flags to represent precision loss due to |
| * truncation. |
| */ |
| L_precision_lost_down: |
| push %edx |
| push %eax |
| call set_precision_flag_down |
| popl %eax |
| popl %edx |
| jmp L_no_precision_loss |
| |
| |
| /* |
| * The number is a denormal (which might get rounded up to a normal) |
| * Shift the number right the required number of bits, which will |
| * have to be undone later... |
| */ |
| L_Make_denorm: |
| /* The action to be taken depends upon whether the underflow |
| exception is masked */ |
| testb CW_Underflow,%cl /* Underflow mask. */ |
| jz Unmasked_underflow /* Do not make a denormal. */ |
| |
| movb DENORMAL,FPU_denormal |
| |
| pushl %ecx /* Save */ |
| movw EXP_UNDER+1,%cx |
| subw EXP(%edi),%cx |
| |
| cmpw $64,%cx /* shrd only works for 0..31 bits */ |
| jnc Denorm_shift_more_than_63 |
| |
| cmpw $32,%cx /* shrd only works for 0..31 bits */ |
| jnc Denorm_shift_more_than_32 |
| |
| /* |
| * We got here without jumps by assuming that the most common requirement |
| * is for a small de-normalising shift. |
| * Shift by [1..31] bits |
| */ |
| addw %cx,EXP(%edi) |
| orl %edx,%edx /* extension */ |
| setne %ch /* Save whether %edx is non-zero */ |
| xorl %edx,%edx |
| shrd %cl,%ebx,%edx |
| shrd %cl,%eax,%ebx |
| shr %cl,%eax |
| orb %ch,%dl |
| popl %ecx |
| jmp Denorm_done |
| |
| /* Shift by [32..63] bits */ |
| Denorm_shift_more_than_32: |
| addw %cx,EXP(%edi) |
| subb $32,%cl |
| orl %edx,%edx |
| setne %ch |
| orb %ch,%bl |
| xorl %edx,%edx |
| shrd %cl,%ebx,%edx |
| shrd %cl,%eax,%ebx |
| shr %cl,%eax |
| orl %edx,%edx /* test these 32 bits */ |
| setne %cl |
| orb %ch,%bl |
| orb %cl,%bl |
| movl %ebx,%edx |
| movl %eax,%ebx |
| xorl %eax,%eax |
| popl %ecx |
| jmp Denorm_done |
| |
| /* Shift by [64..) bits */ |
| Denorm_shift_more_than_63: |
| cmpw $64,%cx |
| jne Denorm_shift_more_than_64 |
| |
| /* Exactly 64 bit shift */ |
| addw %cx,EXP(%edi) |
| xorl %ecx,%ecx |
| orl %edx,%edx |
| setne %cl |
| orl %ebx,%ebx |
| setne %ch |
| orb %ch,%cl |
| orb %cl,%al |
| movl %eax,%edx |
| xorl %eax,%eax |
| xorl %ebx,%ebx |
| popl %ecx |
| jmp Denorm_done |
| |
| Denorm_shift_more_than_64: |
| movw EXP_UNDER+1,EXP(%edi) |
| /* This is easy, %eax must be non-zero, so.. */ |
| movl $1,%edx |
| xorl %eax,%eax |
| xorl %ebx,%ebx |
| popl %ecx |
| jmp Denorm_done |
| |
| |
| Unmasked_underflow: |
| movb UNMASKED_UNDERFLOW,FPU_denormal |
| jmp Denorm_done |
| |
| |
| /* Undo the de-normalisation. */ |
| Normalise_result: |
| cmpb UNMASKED_UNDERFLOW,FPU_denormal |
| je Signal_underflow |
| |
| /* The number must be a denormal if we got here. */ |
| #ifdef PARANOID |
| /* But check it... just in case. */ |
| cmpw EXP_UNDER+1,EXP(%edi) |
| jne L_norm_bugged |
| #endif /* PARANOID */ |
| |
| #ifdef PECULIAR_486 |
| /* |
| * This implements a special feature of 80486 behaviour. |
| * Underflow will be signalled even if the number is |
| * not a denormal after rounding. |
| * This difference occurs only for masked underflow, and not |
| * in the unmasked case. |
| * Actual 80486 behaviour differs from this in some circumstances. |
| */ |
| orl %eax,%eax /* ms bits */ |
| js LPseudoDenormal /* Will be masked underflow */ |
| #else |
| orl %eax,%eax /* ms bits */ |
| js L_Normalised /* No longer a denormal */ |
| #endif /* PECULIAR_486 */ |
| |
| jnz LDenormal_adj_exponent |
| |
| orl %ebx,%ebx |
| jz L_underflow_to_zero /* The contents are zero */ |
| |
| LDenormal_adj_exponent: |
| decw EXP(%edi) |
| |
| LPseudoDenormal: |
| testb $0xff,FPU_bits_lost /* bits lost == underflow */ |
| movl TAG_Special,%edx |
| jz L_deNormalised |
| |
| /* There must be a masked underflow */ |
| push %eax |
| pushl EX_Underflow |
| call EXCEPTION |
| popl %eax |
| popl %eax |
| movl TAG_Special,%edx |
| jmp L_deNormalised |
| |
| |
| /* |
| * The operations resulted in a number too small to represent. |
| * Masked response. |
| */ |
| L_underflow_to_zero: |
| push %eax |
| call set_precision_flag_down |
| popl %eax |
| |
| push %eax |
| pushl EX_Underflow |
| call EXCEPTION |
| popl %eax |
| popl %eax |
| |
| /* Reduce the exponent to EXP_UNDER */ |
| movw EXP_UNDER,EXP(%edi) |
| movl TAG_Zero,%edx |
| jmp L_Store_significand |
| |
| |
| /* The operations resulted in a number too large to represent. */ |
| L_overflow: |
| addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ |
| push %edi |
| call arith_overflow |
| pop %edi |
| jmp fpu_reg_round_signed_special_exit |
| |
| |
| Signal_underflow: |
| /* The number may have been changed to a non-denormal */ |
| /* by the rounding operations. */ |
| cmpw EXP_UNDER,EXP(%edi) |
| jle Do_unmasked_underflow |
| |
| jmp L_Normalised |
| |
| Do_unmasked_underflow: |
| /* Increase the exponent by the magic number */ |
| addw $(3*(1<<13)),EXP(%edi) |
| push %eax |
| pushl EX_Underflow |
| call EXCEPTION |
| popl %eax |
| popl %eax |
| jmp L_Normalised |
| |
| |
| #ifdef PARANOID |
| #ifdef PECULIAR_486 |
| L_bugged_denorm_486: |
| pushl EX_INTERNAL|0x236 |
| call EXCEPTION |
| popl %ebx |
| jmp L_exception_exit |
| #else |
| L_bugged_denorm: |
| pushl EX_INTERNAL|0x230 |
| call EXCEPTION |
| popl %ebx |
| jmp L_exception_exit |
| #endif /* PECULIAR_486 */ |
| |
| L_bugged_round24: |
| pushl EX_INTERNAL|0x231 |
| call EXCEPTION |
| popl %ebx |
| jmp L_exception_exit |
| |
| L_bugged_round53: |
| pushl EX_INTERNAL|0x232 |
| call EXCEPTION |
| popl %ebx |
| jmp L_exception_exit |
| |
| L_bugged_round64: |
| pushl EX_INTERNAL|0x233 |
| call EXCEPTION |
| popl %ebx |
| jmp L_exception_exit |
| |
| L_norm_bugged: |
| pushl EX_INTERNAL|0x234 |
| call EXCEPTION |
| popl %ebx |
| jmp L_exception_exit |
| |
| L_entry_bugged: |
| pushl EX_INTERNAL|0x235 |
| call EXCEPTION |
| popl %ebx |
| L_exception_exit: |
| mov $-1,%eax |
| jmp fpu_reg_round_special_exit |
| #endif /* PARANOID */ |
| |
| ENDPROC(FPU_round) |