| /* |
| * Copyright (c) 2012-2013, 2017-2018 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Authors: Edmund Grimley Evans |
| * Thomas Grocutt |
| */ |
| |
| #include <stdint.h> |
| |
| #include <cassert> |
| |
| #include "base/logging.hh" |
| #include "fplib.hh" |
| |
| namespace ArmISA |
| { |
| |
| #define FPLIB_RN 0 |
| #define FPLIB_RP 1 |
| #define FPLIB_RM 2 |
| #define FPLIB_RZ 3 |
| #define FPLIB_FZ 4 |
| #define FPLIB_DN 8 |
| #define FPLIB_AHP 16 |
| #define FPLIB_FZ16 32 |
| |
| #define FPLIB_IDC 128 // Input Denormal |
| #define FPLIB_IXC 16 // Inexact |
| #define FPLIB_UFC 8 // Underflow |
| #define FPLIB_OFC 4 // Overflow |
| #define FPLIB_DZC 2 // Division by Zero |
| #define FPLIB_IOC 1 // Invalid Operation |
| |
| #define FP16_BITS 16 |
| #define FP32_BITS 32 |
| #define FP64_BITS 64 |
| |
| #define FP16_EXP_BITS 5 |
| #define FP32_EXP_BITS 8 |
| #define FP64_EXP_BITS 11 |
| |
| #define FP16_EXP_BIAS 15 |
| #define FP32_EXP_BIAS 127 |
| #define FP64_EXP_BIAS 1023 |
| |
| #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) |
| #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) |
| #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) |
| |
| #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) |
| #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) |
| #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) |
| |
| #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) |
| #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) |
| #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) |
| |
| #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) |
| #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) |
| #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) |
| |
| static inline uint16_t |
| lsl16(uint16_t x, uint32_t shift) |
| { |
| return shift < 16 ? x << shift : 0; |
| } |
| |
| static inline uint16_t |
| lsr16(uint16_t x, uint32_t shift) |
| { |
| return shift < 16 ? x >> shift : 0; |
| } |
| |
| static inline uint32_t |
| lsl32(uint32_t x, uint32_t shift) |
| { |
| return shift < 32 ? x << shift : 0; |
| } |
| |
| static inline uint32_t |
| lsr32(uint32_t x, uint32_t shift) |
| { |
| return shift < 32 ? x >> shift : 0; |
| } |
| |
| static inline uint64_t |
| lsl64(uint64_t x, uint32_t shift) |
| { |
| return shift < 64 ? x << shift : 0; |
| } |
| |
| static inline uint64_t |
| lsr64(uint64_t x, uint32_t shift) |
| { |
| return shift < 64 ? x >> shift : 0; |
| } |
| |
| static inline void |
| lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) |
| { |
| if (shift == 0) { |
| *r1 = x1; |
| *r0 = x0; |
| } else if (shift < 64) { |
| *r1 = x1 << shift | x0 >> (64 - shift); |
| *r0 = x0 << shift; |
| } else if (shift < 128) { |
| *r1 = x0 << (shift - 64); |
| *r0 = 0; |
| } else { |
| *r1 = 0; |
| *r0 = 0; |
| } |
| } |
| |
| static inline void |
| lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) |
| { |
| if (shift == 0) { |
| *r1 = x1; |
| *r0 = x0; |
| } else if (shift < 64) { |
| *r0 = x0 >> shift | x1 << (64 - shift); |
| *r1 = x1 >> shift; |
| } else if (shift < 128) { |
| *r0 = x1 >> (shift - 64); |
| *r1 = 0; |
| } else { |
| *r0 = 0; |
| *r1 = 0; |
| } |
| } |
| |
| static inline void |
| mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) |
| { |
| uint32_t mask = ((uint32_t)1 << 31) - 1; |
| uint64_t a0 = a & mask; |
| uint64_t a1 = a >> 31 & mask; |
| uint64_t b0 = b & mask; |
| uint64_t b1 = b >> 31 & mask; |
| uint64_t p0 = a0 * b0; |
| uint64_t p2 = a1 * b1; |
| uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; |
| uint64_t s0 = p0; |
| uint64_t s1 = (s0 >> 31) + p1; |
| uint64_t s2 = (s1 >> 31) + p2; |
| *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; |
| *x1 = s2 >> 2; |
| } |
| |
| static inline |
| void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) |
| { |
| uint64_t t0 = (uint64_t)(uint32_t)a * b; |
| uint64_t t1 = (t0 >> 32) + (a >> 32) * b; |
| *x0 = t1 << 32 | (uint32_t)t0; |
| *x1 = t1 >> 32; |
| } |
| |
| static inline void |
| add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, |
| uint64_t b1) |
| { |
| *x0 = a0 + b0; |
| *x1 = a1 + b1 + (*x0 < a0); |
| } |
| |
| static inline void |
| sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, |
| uint64_t b1) |
| { |
| *x0 = a0 - b0; |
| *x1 = a1 - b1 - (*x0 > a0); |
| } |
| |
| static inline int |
| cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) |
| { |
| return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); |
| } |
| |
| static inline uint16_t |
| fp16_normalise(uint16_t mnt, int *exp) |
| { |
| int shift; |
| |
| if (!mnt) { |
| return 0; |
| } |
| |
| for (shift = 8; shift; shift >>= 1) { |
| if (!(mnt >> (16 - shift))) { |
| mnt <<= shift; |
| *exp -= shift; |
| } |
| } |
| return mnt; |
| } |
| |
| static inline uint32_t |
| fp32_normalise(uint32_t mnt, int *exp) |
| { |
| int shift; |
| |
| if (!mnt) { |
| return 0; |
| } |
| |
| for (shift = 16; shift; shift >>= 1) { |
| if (!(mnt >> (32 - shift))) { |
| mnt <<= shift; |
| *exp -= shift; |
| } |
| } |
| return mnt; |
| } |
| |
| static inline uint64_t |
| fp64_normalise(uint64_t mnt, int *exp) |
| { |
| int shift; |
| |
| if (!mnt) { |
| return 0; |
| } |
| |
| for (shift = 32; shift; shift >>= 1) { |
| if (!(mnt >> (64 - shift))) { |
| mnt <<= shift; |
| *exp -= shift; |
| } |
| } |
| return mnt; |
| } |
| |
| static inline void |
| fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) |
| { |
| uint64_t x0 = *mnt0; |
| uint64_t x1 = *mnt1; |
| int shift; |
| |
| if (!x0 && !x1) { |
| return; |
| } |
| |
| if (!x1) { |
| x1 = x0; |
| x0 = 0; |
| *exp -= 64; |
| } |
| |
| for (shift = 32; shift; shift >>= 1) { |
| if (!(x1 >> (64 - shift))) { |
| x1 = x1 << shift | x0 >> (64 - shift); |
| x0 <<= shift; |
| *exp -= shift; |
| } |
| } |
| |
| *mnt0 = x0; |
| *mnt1 = x1; |
| } |
| |
| static inline uint16_t |
| fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) |
| { |
| return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt); |
| } |
| |
| static inline uint32_t |
| fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) |
| { |
| return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt); |
| } |
| |
| static inline uint64_t |
| fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) |
| { |
| return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt); |
| } |
| |
| static inline uint16_t |
| fp16_zero(int sgn) |
| { |
| return fp16_pack(sgn, 0, 0); |
| } |
| |
| static inline uint32_t |
| fp32_zero(int sgn) |
| { |
| return fp32_pack(sgn, 0, 0); |
| } |
| |
| static inline uint64_t |
| fp64_zero(int sgn) |
| { |
| return fp64_pack(sgn, 0, 0); |
| } |
| |
| static inline uint16_t |
| fp16_max_normal(int sgn) |
| { |
| return fp16_pack(sgn, FP16_EXP_INF - 1, -1); |
| } |
| |
| static inline uint32_t |
| fp32_max_normal(int sgn) |
| { |
| return fp32_pack(sgn, FP32_EXP_INF - 1, -1); |
| } |
| |
| static inline uint64_t |
| fp64_max_normal(int sgn) |
| { |
| return fp64_pack(sgn, FP64_EXP_INF - 1, -1); |
| } |
| |
| static inline uint16_t |
| fp16_infinity(int sgn) |
| { |
| return fp16_pack(sgn, FP16_EXP_INF, 0); |
| } |
| |
| static inline uint32_t |
| fp32_infinity(int sgn) |
| { |
| return fp32_pack(sgn, FP32_EXP_INF, 0); |
| } |
| |
| static inline uint64_t |
| fp64_infinity(int sgn) |
| { |
| return fp64_pack(sgn, FP64_EXP_INF, 0); |
| } |
| |
| static inline uint16_t |
| fp16_defaultNaN() |
| { |
| return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1)); |
| } |
| |
| static inline uint32_t |
| fp32_defaultNaN() |
| { |
| return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1)); |
| } |
| |
| static inline uint64_t |
| fp64_defaultNaN() |
| { |
| return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1)); |
| } |
| |
| static inline void |
| fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, |
| int *flags) |
| { |
| *sgn = x >> (FP16_BITS - 1); |
| *exp = FP16_EXP(x); |
| *mnt = FP16_MANT(x); |
| |
| // Handle subnormals: |
| if (*exp) { |
| *mnt |= 1ULL << FP16_MANT_BITS; |
| } else { |
| ++*exp; |
| // IDC (Input Denormal) is not set in this case. |
| if (mode & FPLIB_FZ16) |
| *mnt = 0; |
| } |
| } |
| |
| static inline void |
| fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, |
| int *flags) |
| { |
| *sgn = x >> (FP32_BITS - 1); |
| *exp = FP32_EXP(x); |
| *mnt = FP32_MANT(x); |
| |
| // Handle subnormals: |
| if (*exp) { |
| *mnt |= 1ULL << FP32_MANT_BITS; |
| } else { |
| ++*exp; |
| if ((mode & FPLIB_FZ) && *mnt) { |
| *flags |= FPLIB_IDC; |
| *mnt = 0; |
| } |
| } |
| } |
| |
| static inline void |
| fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, |
| int *flags) |
| { |
| *sgn = x >> (FP64_BITS - 1); |
| *exp = FP64_EXP(x); |
| *mnt = FP64_MANT(x); |
| |
| // Handle subnormals: |
| if (*exp) { |
| *mnt |= 1ULL << FP64_MANT_BITS; |
| } else { |
| ++*exp; |
| if ((mode & FPLIB_FZ) && *mnt) { |
| *flags |= FPLIB_IDC; |
| *mnt = 0; |
| } |
| } |
| } |
| |
| static inline int |
| fp16_is_NaN(int exp, uint16_t mnt) |
| { |
| return exp == FP16_EXP_INF && FP16_MANT(mnt); |
| } |
| |
| static inline int |
| fp32_is_NaN(int exp, uint32_t mnt) |
| { |
| return exp == FP32_EXP_INF && FP32_MANT(mnt); |
| } |
| |
| static inline int |
| fp64_is_NaN(int exp, uint64_t mnt) |
| { |
| return exp == FP64_EXP_INF && FP64_MANT(mnt); |
| } |
| |
| static inline int |
| fp16_is_signalling_NaN(int exp, uint16_t mnt) |
| { |
| return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1); |
| } |
| |
| static inline int |
| fp32_is_signalling_NaN(int exp, uint32_t mnt) |
| { |
| return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1); |
| } |
| |
| static inline int |
| fp64_is_signalling_NaN(int exp, uint64_t mnt) |
| { |
| return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1); |
| } |
| |
| static inline int |
| fp16_is_quiet_NaN(int exp, uint16_t mnt) |
| { |
| return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1); |
| } |
| |
| static inline int |
| fp32_is_quiet_NaN(int exp, uint32_t mnt) |
| { |
| return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1); |
| } |
| |
| static inline int |
| fp64_is_quiet_NaN(int exp, uint64_t mnt) |
| { |
| return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1); |
| } |
| |
| static inline int |
| fp16_is_infinity(int exp, uint16_t mnt) |
| { |
| return exp == FP16_EXP_INF && !FP16_MANT(mnt); |
| } |
| |
| static inline int |
| fp32_is_infinity(int exp, uint32_t mnt) |
| { |
| return exp == FP32_EXP_INF && !FP32_MANT(mnt); |
| } |
| |
| static inline int |
| fp64_is_infinity(int exp, uint64_t mnt) |
| { |
| return exp == FP64_EXP_INF && !FP64_MANT(mnt); |
| } |
| |
| static inline uint16_t |
| fp16_process_NaN(uint16_t a, int mode, int *flags) |
| { |
| if (!(a >> (FP16_MANT_BITS - 1) & 1)) { |
| *flags |= FPLIB_IOC; |
| a |= 1ULL << (FP16_MANT_BITS - 1); |
| } |
| return mode & FPLIB_DN ? fp16_defaultNaN() : a; |
| } |
| |
| static inline uint32_t |
| fp32_process_NaN(uint32_t a, int mode, int *flags) |
| { |
| if (!(a >> (FP32_MANT_BITS - 1) & 1)) { |
| *flags |= FPLIB_IOC; |
| a |= 1ULL << (FP32_MANT_BITS - 1); |
| } |
| return mode & FPLIB_DN ? fp32_defaultNaN() : a; |
| } |
| |
| static inline uint64_t |
| fp64_process_NaN(uint64_t a, int mode, int *flags) |
| { |
| if (!(a >> (FP64_MANT_BITS - 1) & 1)) { |
| *flags |= FPLIB_IOC; |
| a |= 1ULL << (FP64_MANT_BITS - 1); |
| } |
| return mode & FPLIB_DN ? fp64_defaultNaN() : a; |
| } |
| |
| static uint16_t |
| fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_exp = FP16_EXP(a); |
| uint16_t a_mnt = FP16_MANT(a); |
| int b_exp = FP16_EXP(b); |
| uint16_t b_mnt = FP16_MANT(b); |
| |
| // Handle signalling NaNs: |
| if (fp16_is_signalling_NaN(a_exp, a_mnt)) |
| return fp16_process_NaN(a, mode, flags); |
| if (fp16_is_signalling_NaN(b_exp, b_mnt)) |
| return fp16_process_NaN(b, mode, flags); |
| |
| // Handle quiet NaNs: |
| if (fp16_is_NaN(a_exp, a_mnt)) |
| return fp16_process_NaN(a, mode, flags); |
| if (fp16_is_NaN(b_exp, b_mnt)) |
| return fp16_process_NaN(b, mode, flags); |
| |
| return 0; |
| } |
| |
| static uint32_t |
| fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_exp = FP32_EXP(a); |
| uint32_t a_mnt = FP32_MANT(a); |
| int b_exp = FP32_EXP(b); |
| uint32_t b_mnt = FP32_MANT(b); |
| |
| // Handle signalling NaNs: |
| if (fp32_is_signalling_NaN(a_exp, a_mnt)) |
| return fp32_process_NaN(a, mode, flags); |
| if (fp32_is_signalling_NaN(b_exp, b_mnt)) |
| return fp32_process_NaN(b, mode, flags); |
| |
| // Handle quiet NaNs: |
| if (fp32_is_NaN(a_exp, a_mnt)) |
| return fp32_process_NaN(a, mode, flags); |
| if (fp32_is_NaN(b_exp, b_mnt)) |
| return fp32_process_NaN(b, mode, flags); |
| |
| return 0; |
| } |
| |
| static uint64_t |
| fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_exp = FP64_EXP(a); |
| uint64_t a_mnt = FP64_MANT(a); |
| int b_exp = FP64_EXP(b); |
| uint64_t b_mnt = FP64_MANT(b); |
| |
| // Handle signalling NaNs: |
| if (fp64_is_signalling_NaN(a_exp, a_mnt)) |
| return fp64_process_NaN(a, mode, flags); |
| if (fp64_is_signalling_NaN(b_exp, b_mnt)) |
| return fp64_process_NaN(b, mode, flags); |
| |
| // Handle quiet NaNs: |
| if (fp64_is_NaN(a_exp, a_mnt)) |
| return fp64_process_NaN(a, mode, flags); |
| if (fp64_is_NaN(b_exp, b_mnt)) |
| return fp64_process_NaN(b, mode, flags); |
| |
| return 0; |
| } |
| |
| static uint16_t |
| fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags) |
| { |
| int a_exp = FP16_EXP(a); |
| uint16_t a_mnt = FP16_MANT(a); |
| int b_exp = FP16_EXP(b); |
| uint16_t b_mnt = FP16_MANT(b); |
| int c_exp = FP16_EXP(c); |
| uint16_t c_mnt = FP16_MANT(c); |
| |
| // Handle signalling NaNs: |
| if (fp16_is_signalling_NaN(a_exp, a_mnt)) |
| return fp16_process_NaN(a, mode, flags); |
| if (fp16_is_signalling_NaN(b_exp, b_mnt)) |
| return fp16_process_NaN(b, mode, flags); |
| if (fp16_is_signalling_NaN(c_exp, c_mnt)) |
| return fp16_process_NaN(c, mode, flags); |
| |
| // Handle quiet NaNs: |
| if (fp16_is_NaN(a_exp, a_mnt)) |
| return fp16_process_NaN(a, mode, flags); |
| if (fp16_is_NaN(b_exp, b_mnt)) |
| return fp16_process_NaN(b, mode, flags); |
| if (fp16_is_NaN(c_exp, c_mnt)) |
| return fp16_process_NaN(c, mode, flags); |
| |
| return 0; |
| } |
| |
| static uint32_t |
| fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) |
| { |
| int a_exp = FP32_EXP(a); |
| uint32_t a_mnt = FP32_MANT(a); |
| int b_exp = FP32_EXP(b); |
| uint32_t b_mnt = FP32_MANT(b); |
| int c_exp = FP32_EXP(c); |
| uint32_t c_mnt = FP32_MANT(c); |
| |
| // Handle signalling NaNs: |
| if (fp32_is_signalling_NaN(a_exp, a_mnt)) |
| return fp32_process_NaN(a, mode, flags); |
| if (fp32_is_signalling_NaN(b_exp, b_mnt)) |
| return fp32_process_NaN(b, mode, flags); |
| if (fp32_is_signalling_NaN(c_exp, c_mnt)) |
| return fp32_process_NaN(c, mode, flags); |
| |
| // Handle quiet NaNs: |
| if (fp32_is_NaN(a_exp, a_mnt)) |
| return fp32_process_NaN(a, mode, flags); |
| if (fp32_is_NaN(b_exp, b_mnt)) |
| return fp32_process_NaN(b, mode, flags); |
| if (fp32_is_NaN(c_exp, c_mnt)) |
| return fp32_process_NaN(c, mode, flags); |
| |
| return 0; |
| } |
| |
| static uint64_t |
| fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) |
| { |
| int a_exp = FP64_EXP(a); |
| uint64_t a_mnt = FP64_MANT(a); |
| int b_exp = FP64_EXP(b); |
| uint64_t b_mnt = FP64_MANT(b); |
| int c_exp = FP64_EXP(c); |
| uint64_t c_mnt = FP64_MANT(c); |
| |
| // Handle signalling NaNs: |
| if (fp64_is_signalling_NaN(a_exp, a_mnt)) |
| return fp64_process_NaN(a, mode, flags); |
| if (fp64_is_signalling_NaN(b_exp, b_mnt)) |
| return fp64_process_NaN(b, mode, flags); |
| if (fp64_is_signalling_NaN(c_exp, c_mnt)) |
| return fp64_process_NaN(c, mode, flags); |
| |
| // Handle quiet NaNs: |
| if (fp64_is_NaN(a_exp, a_mnt)) |
| return fp64_process_NaN(a, mode, flags); |
| if (fp64_is_NaN(b_exp, b_mnt)) |
| return fp64_process_NaN(b, mode, flags); |
| if (fp64_is_NaN(c_exp, c_mnt)) |
| return fp64_process_NaN(c, mode, flags); |
| |
| return 0; |
| } |
| |
| static uint16_t |
| fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) |
| { |
| int biased_exp; // non-negative exponent value for result |
| uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS) |
| int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 |
| |
| assert(rm != FPRounding_TIEAWAY); |
| |
| // Flush to zero: |
| if ((mode & FPLIB_FZ16) && exp < 1) { |
| *flags |= FPLIB_UFC; |
| return fp16_zero(sgn); |
| } |
| |
| // The bottom FP16_EXP_BITS bits of mnt are orred together: |
| mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) | |
| ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0)); |
| |
| if (exp > 0) { |
| biased_exp = exp; |
| int_mant = mnt >> 2; |
| error = mnt & 3; |
| } else { |
| biased_exp = 0; |
| int_mant = lsr16(mnt, 3 - exp); |
| error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); |
| } |
| |
| if (!biased_exp && error) { // xx should also check fpscr_val<11> |
| *flags |= FPLIB_UFC; |
| } |
| |
| // Round up: |
| if ((rm == FPLIB_RN && (error == 3 || |
| (error == 2 && (int_mant & 1)))) || |
| (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { |
| ++int_mant; |
| if (int_mant == 1ULL << FP16_MANT_BITS) { |
| // Rounded up from denormalized to normalized |
| biased_exp = 1; |
| } |
| if (int_mant == 2ULL << FP16_MANT_BITS) { |
| // Rounded up to next exponent |
| ++biased_exp; |
| int_mant >>= 1; |
| } |
| } |
| |
| // Handle rounding to odd aka Von Neumann rounding: |
| if (error && rm == FPRounding_ODD) |
| int_mant |= 1; |
| |
| // Handle overflow: |
| if (!(mode & FPLIB_AHP)) { |
| if (biased_exp >= (int)FP16_EXP_INF) { |
| *flags |= FPLIB_OFC | FPLIB_IXC; |
| if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || |
| (rm == FPLIB_RM && sgn)) { |
| return fp16_infinity(sgn); |
| } else { |
| return fp16_max_normal(sgn); |
| } |
| } |
| } else { |
| if (biased_exp >= (int)FP16_EXP_INF + 1) { |
| *flags |= FPLIB_IOC; |
| return fp16_pack(sgn, FP16_EXP_INF, -1); |
| } |
| } |
| |
| if (error) { |
| *flags |= FPLIB_IXC; |
| } |
| |
| return fp16_pack(sgn, biased_exp, int_mant); |
| } |
| |
| static uint16_t |
| fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags) |
| { |
| return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags); |
| } |
| |
| static uint32_t |
| fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) |
| { |
| int biased_exp; // non-negative exponent value for result |
| uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS) |
| int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 |
| |
| assert(rm != FPRounding_TIEAWAY); |
| |
| // Flush to zero: |
| if ((mode & FPLIB_FZ) && exp < 1) { |
| *flags |= FPLIB_UFC; |
| return fp32_zero(sgn); |
| } |
| |
| // The bottom FP32_EXP_BITS bits of mnt are orred together: |
| mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) | |
| ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0)); |
| |
| if (exp > 0) { |
| biased_exp = exp; |
| int_mant = mnt >> 2; |
| error = mnt & 3; |
| } else { |
| biased_exp = 0; |
| int_mant = lsr32(mnt, 3 - exp); |
| error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); |
| } |
| |
| if (!biased_exp && error) { // xx should also check fpscr_val<11> |
| *flags |= FPLIB_UFC; |
| } |
| |
| // Round up: |
| if ((rm == FPLIB_RN && (error == 3 || |
| (error == 2 && (int_mant & 1)))) || |
| (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { |
| ++int_mant; |
| if (int_mant == 1ULL << FP32_MANT_BITS) { |
| // Rounded up from denormalized to normalized |
| biased_exp = 1; |
| } |
| if (int_mant == 2ULL << FP32_MANT_BITS) { |
| // Rounded up to next exponent |
| ++biased_exp; |
| int_mant >>= 1; |
| } |
| } |
| |
| // Handle rounding to odd aka Von Neumann rounding: |
| if (error && rm == FPRounding_ODD) |
| int_mant |= 1; |
| |
| // Handle overflow: |
| if (biased_exp >= (int)FP32_EXP_INF) { |
| *flags |= FPLIB_OFC | FPLIB_IXC; |
| if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || |
| (rm == FPLIB_RM && sgn)) { |
| return fp32_infinity(sgn); |
| } else { |
| return fp32_max_normal(sgn); |
| } |
| } |
| |
| if (error) { |
| *flags |= FPLIB_IXC; |
| } |
| |
| return fp32_pack(sgn, biased_exp, int_mant); |
| } |
| |
| static uint32_t |
| fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) |
| { |
| return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); |
| } |
| |
| static uint64_t |
| fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) |
| { |
| int biased_exp; // non-negative exponent value for result |
| uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS) |
| int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 |
| |
| assert(rm != FPRounding_TIEAWAY); |
| |
| // Flush to zero: |
| if ((mode & FPLIB_FZ) && exp < 1) { |
| *flags |= FPLIB_UFC; |
| return fp64_zero(sgn); |
| } |
| |
| // The bottom FP64_EXP_BITS bits of mnt are orred together: |
| mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) | |
| ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0)); |
| |
| if (exp > 0) { |
| biased_exp = exp; |
| int_mant = mnt >> 2; |
| error = mnt & 3; |
| } else { |
| biased_exp = 0; |
| int_mant = lsr64(mnt, 3 - exp); |
| error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); |
| } |
| |
| if (!biased_exp && error) { // xx should also check fpscr_val<11> |
| *flags |= FPLIB_UFC; |
| } |
| |
| // Round up: |
| if ((rm == FPLIB_RN && (error == 3 || |
| (error == 2 && (int_mant & 1)))) || |
| (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { |
| ++int_mant; |
| if (int_mant == 1ULL << FP64_MANT_BITS) { |
| // Rounded up from denormalized to normalized |
| biased_exp = 1; |
| } |
| if (int_mant == 2ULL << FP64_MANT_BITS) { |
| // Rounded up to next exponent |
| ++biased_exp; |
| int_mant >>= 1; |
| } |
| } |
| |
| // Handle rounding to odd aka Von Neumann rounding: |
| if (error && rm == FPRounding_ODD) |
| int_mant |= 1; |
| |
| // Handle overflow: |
| if (biased_exp >= (int)FP64_EXP_INF) { |
| *flags |= FPLIB_OFC | FPLIB_IXC; |
| if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || |
| (rm == FPLIB_RM && sgn)) { |
| return fp64_infinity(sgn); |
| } else { |
| return fp64_max_normal(sgn); |
| } |
| } |
| |
| if (error) { |
| *flags |= FPLIB_IXC; |
| } |
| |
| return fp64_pack(sgn, biased_exp, int_mant); |
| } |
| |
| static uint64_t |
| fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) |
| { |
| return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); |
| } |
| |
| static int |
| fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint16_t a_mnt, b_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp16_is_NaN(a_exp, a_mnt) || |
| fp16_is_NaN(b_exp, b_mnt)) { |
| if (fp16_is_signalling_NaN(a_exp, a_mnt) || |
| fp16_is_signalling_NaN(b_exp, b_mnt)) |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| return a == b || (!a_mnt && !b_mnt); |
| } |
| |
| static int |
| fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint16_t a_mnt, b_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp16_is_NaN(a_exp, a_mnt) || |
| fp16_is_NaN(b_exp, b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| if (!a_mnt && !b_mnt) |
| return 1; |
| if (a_sgn != b_sgn) |
| return b_sgn; |
| if (a_exp != b_exp) |
| return a_sgn ^ (a_exp > b_exp); |
| if (a_mnt != b_mnt) |
| return a_sgn ^ (a_mnt > b_mnt); |
| return 1; |
| } |
| |
| static int |
| fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint16_t a_mnt, b_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp16_is_NaN(a_exp, a_mnt) || |
| fp16_is_NaN(b_exp, b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| if (!a_mnt && !b_mnt) |
| return 0; |
| if (a_sgn != b_sgn) |
| return b_sgn; |
| if (a_exp != b_exp) |
| return a_sgn ^ (a_exp > b_exp); |
| if (a_mnt != b_mnt) |
| return a_sgn ^ (a_mnt > b_mnt); |
| return 0; |
| } |
| |
| static int |
| fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint16_t a_mnt, b_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp16_is_NaN(a_exp, a_mnt) || |
| fp16_is_NaN(b_exp, b_mnt)) { |
| if (fp16_is_signalling_NaN(a_exp, a_mnt) || |
| fp16_is_signalling_NaN(b_exp, b_mnt)) |
| *flags |= FPLIB_IOC; |
| return 1; |
| } |
| return 0; |
| } |
| |
| static int |
| fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint32_t a_mnt, b_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp32_is_NaN(a_exp, a_mnt) || |
| fp32_is_NaN(b_exp, b_mnt)) { |
| if (fp32_is_signalling_NaN(a_exp, a_mnt) || |
| fp32_is_signalling_NaN(b_exp, b_mnt)) |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| return a == b || (!a_mnt && !b_mnt); |
| } |
| |
| static int |
| fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint32_t a_mnt, b_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp32_is_NaN(a_exp, a_mnt) || |
| fp32_is_NaN(b_exp, b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| if (!a_mnt && !b_mnt) |
| return 1; |
| if (a_sgn != b_sgn) |
| return b_sgn; |
| if (a_exp != b_exp) |
| return a_sgn ^ (a_exp > b_exp); |
| if (a_mnt != b_mnt) |
| return a_sgn ^ (a_mnt > b_mnt); |
| return 1; |
| } |
| |
| static int |
| fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint32_t a_mnt, b_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp32_is_NaN(a_exp, a_mnt) || |
| fp32_is_NaN(b_exp, b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| if (!a_mnt && !b_mnt) |
| return 0; |
| if (a_sgn != b_sgn) |
| return b_sgn; |
| if (a_exp != b_exp) |
| return a_sgn ^ (a_exp > b_exp); |
| if (a_mnt != b_mnt) |
| return a_sgn ^ (a_mnt > b_mnt); |
| return 0; |
| } |
| |
| static int |
| fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint32_t a_mnt, b_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp32_is_NaN(a_exp, a_mnt) || |
| fp32_is_NaN(b_exp, b_mnt)) { |
| if (fp32_is_signalling_NaN(a_exp, a_mnt) || |
| fp32_is_signalling_NaN(b_exp, b_mnt)) |
| *flags |= FPLIB_IOC; |
| return 1; |
| } |
| return 0; |
| } |
| |
| static int |
| fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint64_t a_mnt, b_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp64_is_NaN(a_exp, a_mnt) || |
| fp64_is_NaN(b_exp, b_mnt)) { |
| if (fp64_is_signalling_NaN(a_exp, a_mnt) || |
| fp64_is_signalling_NaN(b_exp, b_mnt)) |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| return a == b || (!a_mnt && !b_mnt); |
| } |
| |
| static int |
| fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint64_t a_mnt, b_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp64_is_NaN(a_exp, a_mnt) || |
| fp64_is_NaN(b_exp, b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| if (!a_mnt && !b_mnt) |
| return 1; |
| if (a_sgn != b_sgn) |
| return b_sgn; |
| if (a_exp != b_exp) |
| return a_sgn ^ (a_exp > b_exp); |
| if (a_mnt != b_mnt) |
| return a_sgn ^ (a_mnt > b_mnt); |
| return 1; |
| } |
| |
| static int |
| fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint64_t a_mnt, b_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp64_is_NaN(a_exp, a_mnt) || |
| fp64_is_NaN(b_exp, b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return 0; |
| } |
| if (!a_mnt && !b_mnt) |
| return 0; |
| if (a_sgn != b_sgn) |
| return b_sgn; |
| if (a_exp != b_exp) |
| return a_sgn ^ (a_exp > b_exp); |
| if (a_mnt != b_mnt) |
| return a_sgn ^ (a_mnt > b_mnt); |
| return 0; |
| } |
| |
| static int |
| fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp; |
| uint64_t a_mnt, b_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if (fp64_is_NaN(a_exp, a_mnt) || |
| fp64_is_NaN(b_exp, b_mnt)) { |
| if (fp64_is_signalling_NaN(a_exp, a_mnt) || |
| fp64_is_signalling_NaN(b_exp, b_mnt)) |
| *flags |= FPLIB_IOC; |
| return 1; |
| } |
| return 0; |
| } |
| |
| static uint16_t |
| fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint16_t a_mnt, b_mnt, x, x_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp16_process_NaNs(a, b, mode, flags))) { |
| return x; |
| } |
| |
| b_sgn ^= neg; |
| |
| // Handle infinities and zeroes: |
| if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) { |
| *flags |= FPLIB_IOC; |
| return fp16_defaultNaN(); |
| } else if (a_exp == FP16_EXP_INF) { |
| return fp16_infinity(a_sgn); |
| } else if (b_exp == FP16_EXP_INF) { |
| return fp16_infinity(b_sgn); |
| } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { |
| return fp16_zero(a_sgn); |
| } |
| |
| a_mnt <<= 3; |
| b_mnt <<= 3; |
| if (a_exp >= b_exp) { |
| b_mnt = (lsr16(b_mnt, a_exp - b_exp) | |
| !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1))); |
| b_exp = a_exp; |
| } else { |
| a_mnt = (lsr16(a_mnt, b_exp - a_exp) | |
| !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1))); |
| a_exp = b_exp; |
| } |
| x_sgn = a_sgn; |
| x_exp = a_exp; |
| if (a_sgn == b_sgn) { |
| x_mnt = a_mnt + b_mnt; |
| } else if (a_mnt >= b_mnt) { |
| x_mnt = a_mnt - b_mnt; |
| } else { |
| x_sgn ^= 1; |
| x_mnt = b_mnt - a_mnt; |
| } |
| |
| if (!x_mnt) { |
| // Sign of exact zero result depends on rounding mode |
| return fp16_zero((mode & 3) == 2); |
| } |
| |
| x_mnt = fp16_normalise(x_mnt, &x_exp); |
| |
| return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1, |
| mode, flags); |
| } |
| |
| static uint32_t |
| fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint32_t a_mnt, b_mnt, x, x_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp32_process_NaNs(a, b, mode, flags))) { |
| return x; |
| } |
| |
| b_sgn ^= neg; |
| |
| // Handle infinities and zeroes: |
| if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) { |
| *flags |= FPLIB_IOC; |
| return fp32_defaultNaN(); |
| } else if (a_exp == FP32_EXP_INF) { |
| return fp32_infinity(a_sgn); |
| } else if (b_exp == FP32_EXP_INF) { |
| return fp32_infinity(b_sgn); |
| } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { |
| return fp32_zero(a_sgn); |
| } |
| |
| a_mnt <<= 3; |
| b_mnt <<= 3; |
| if (a_exp >= b_exp) { |
| b_mnt = (lsr32(b_mnt, a_exp - b_exp) | |
| !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); |
| b_exp = a_exp; |
| } else { |
| a_mnt = (lsr32(a_mnt, b_exp - a_exp) | |
| !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); |
| a_exp = b_exp; |
| } |
| x_sgn = a_sgn; |
| x_exp = a_exp; |
| if (a_sgn == b_sgn) { |
| x_mnt = a_mnt + b_mnt; |
| } else if (a_mnt >= b_mnt) { |
| x_mnt = a_mnt - b_mnt; |
| } else { |
| x_sgn ^= 1; |
| x_mnt = b_mnt - a_mnt; |
| } |
| |
| if (!x_mnt) { |
| // Sign of exact zero result depends on rounding mode |
| return fp32_zero((mode & 3) == 2); |
| } |
| |
| x_mnt = fp32_normalise(x_mnt, &x_exp); |
| |
| return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1, |
| mode, flags); |
| } |
| |
| static uint64_t |
| fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint64_t a_mnt, b_mnt, x, x_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp64_process_NaNs(a, b, mode, flags))) { |
| return x; |
| } |
| |
| b_sgn ^= neg; |
| |
| // Handle infinities and zeroes: |
| if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) { |
| *flags |= FPLIB_IOC; |
| return fp64_defaultNaN(); |
| } else if (a_exp == FP64_EXP_INF) { |
| return fp64_infinity(a_sgn); |
| } else if (b_exp == FP64_EXP_INF) { |
| return fp64_infinity(b_sgn); |
| } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { |
| return fp64_zero(a_sgn); |
| } |
| |
| a_mnt <<= 3; |
| b_mnt <<= 3; |
| if (a_exp >= b_exp) { |
| b_mnt = (lsr64(b_mnt, a_exp - b_exp) | |
| !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); |
| b_exp = a_exp; |
| } else { |
| a_mnt = (lsr64(a_mnt, b_exp - a_exp) | |
| !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); |
| a_exp = b_exp; |
| } |
| x_sgn = a_sgn; |
| x_exp = a_exp; |
| if (a_sgn == b_sgn) { |
| x_mnt = a_mnt + b_mnt; |
| } else if (a_mnt >= b_mnt) { |
| x_mnt = a_mnt - b_mnt; |
| } else { |
| x_sgn ^= 1; |
| x_mnt = b_mnt - a_mnt; |
| } |
| |
| if (!x_mnt) { |
| // Sign of exact zero result depends on rounding mode |
| return fp64_zero((mode & 3) == 2); |
| } |
| |
| x_mnt = fp64_normalise(x_mnt, &x_exp); |
| |
| return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1, |
| mode, flags); |
| } |
| |
| static uint16_t |
| fp16_mul(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint16_t a_mnt, b_mnt, x; |
| uint32_t x_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp16_process_NaNs(a, b, mode, flags))) { |
| return x; |
| } |
| |
| // Handle infinities and zeroes: |
| if ((a_exp == FP16_EXP_INF && !b_mnt) || |
| (b_exp == FP16_EXP_INF && !a_mnt)) { |
| *flags |= FPLIB_IOC; |
| return fp16_defaultNaN(); |
| } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) { |
| return fp16_infinity(a_sgn ^ b_sgn); |
| } else if (!a_mnt || !b_mnt) { |
| return fp16_zero(a_sgn ^ b_sgn); |
| } |
| |
| // Multiply and normalise: |
| x_sgn = a_sgn ^ b_sgn; |
| x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1; |
| x_mnt = (uint32_t)a_mnt * b_mnt; |
| x_mnt = fp32_normalise(x_mnt, &x_exp); |
| |
| // Convert to FP16_BITS bits, collapsing error into bottom bit: |
| x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1); |
| |
| return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); |
| } |
| |
| static uint32_t |
| fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint32_t a_mnt, b_mnt, x; |
| uint64_t x_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp32_process_NaNs(a, b, mode, flags))) { |
| return x; |
| } |
| |
| // Handle infinities and zeroes: |
| if ((a_exp == FP32_EXP_INF && !b_mnt) || |
| (b_exp == FP32_EXP_INF && !a_mnt)) { |
| *flags |= FPLIB_IOC; |
| return fp32_defaultNaN(); |
| } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) { |
| return fp32_infinity(a_sgn ^ b_sgn); |
| } else if (!a_mnt || !b_mnt) { |
| return fp32_zero(a_sgn ^ b_sgn); |
| } |
| |
| // Multiply and normalise: |
| x_sgn = a_sgn ^ b_sgn; |
| x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1; |
| x_mnt = (uint64_t)a_mnt * b_mnt; |
| x_mnt = fp64_normalise(x_mnt, &x_exp); |
| |
| // Convert to FP32_BITS bits, collapsing error into bottom bit: |
| x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1); |
| |
| return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); |
| } |
| |
| static uint64_t |
| fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint64_t a_mnt, b_mnt, x; |
| uint64_t x0_mnt, x1_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp64_process_NaNs(a, b, mode, flags))) { |
| return x; |
| } |
| |
| // Handle infinities and zeroes: |
| if ((a_exp == FP64_EXP_INF && !b_mnt) || |
| (b_exp == FP64_EXP_INF && !a_mnt)) { |
| *flags |= FPLIB_IOC; |
| return fp64_defaultNaN(); |
| } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) { |
| return fp64_infinity(a_sgn ^ b_sgn); |
| } else if (!a_mnt || !b_mnt) { |
| return fp64_zero(a_sgn ^ b_sgn); |
| } |
| |
| // Multiply and normalise: |
| x_sgn = a_sgn ^ b_sgn; |
| x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1; |
| mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); |
| fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); |
| |
| // Convert to FP64_BITS bits, collapsing error into bottom bit: |
| x0_mnt = x1_mnt << 1 | !!x0_mnt; |
| |
| return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); |
| } |
| |
| static uint16_t |
| fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, |
| int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; |
| uint16_t a_mnt, b_mnt, c_mnt, x; |
| uint32_t x_mnt, y_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); |
| |
| x = fp16_process_NaNs3(a, b, c, mode, flags); |
| |
| // Quiet NaN added to product of zero and infinity: |
| if (fp16_is_quiet_NaN(a_exp, a_mnt) && |
| ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) || |
| (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) { |
| x = fp16_defaultNaN(); |
| *flags |= FPLIB_IOC; |
| } |
| |
| if (x) { |
| return x; |
| } |
| |
| // Handle infinities and zeroes: |
| if ((b_exp == FP16_EXP_INF && !c_mnt) || |
| (c_exp == FP16_EXP_INF && !b_mnt) || |
| (a_exp == FP16_EXP_INF && |
| (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) && |
| (a_sgn != (b_sgn ^ c_sgn)))) { |
| *flags |= FPLIB_IOC; |
| return fp16_defaultNaN(); |
| } |
| if (a_exp == FP16_EXP_INF) |
| return fp16_infinity(a_sgn); |
| if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) |
| return fp16_infinity(b_sgn ^ c_sgn); |
| if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) |
| return fp16_zero(a_sgn); |
| |
| x_sgn = a_sgn; |
| x_exp = a_exp + 2 * FP16_EXP_BITS - 3; |
| x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4); |
| |
| // Multiply: |
| y_sgn = b_sgn ^ c_sgn; |
| y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3; |
| y_mnt = (uint32_t)b_mnt * c_mnt << 3; |
| if (!y_mnt) { |
| y_exp = x_exp; |
| } |
| |
| // Add: |
| if (x_exp >= y_exp) { |
| y_mnt = (lsr32(y_mnt, x_exp - y_exp) | |
| !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1))); |
| y_exp = x_exp; |
| } else { |
| x_mnt = (lsr32(x_mnt, y_exp - x_exp) | |
| !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1))); |
| x_exp = y_exp; |
| } |
| if (x_sgn == y_sgn) { |
| x_mnt = x_mnt + y_mnt; |
| } else if (x_mnt >= y_mnt) { |
| x_mnt = x_mnt - y_mnt; |
| } else { |
| x_sgn ^= 1; |
| x_mnt = y_mnt - x_mnt; |
| } |
| |
| if (!x_mnt) { |
| // Sign of exact zero result depends on rounding mode |
| return fp16_zero((mode & 3) == 2); |
| } |
| |
| // Normalise into FP16_BITS bits, collapsing error into bottom bit: |
| x_mnt = fp32_normalise(x_mnt, &x_exp); |
| x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); |
| |
| return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags); |
| } |
| |
| static uint32_t |
| fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, |
| int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; |
| uint32_t a_mnt, b_mnt, c_mnt, x; |
| uint64_t x_mnt, y_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); |
| |
| x = fp32_process_NaNs3(a, b, c, mode, flags); |
| |
| // Quiet NaN added to product of zero and infinity: |
| if (fp32_is_quiet_NaN(a_exp, a_mnt) && |
| ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) || |
| (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) { |
| x = fp32_defaultNaN(); |
| *flags |= FPLIB_IOC; |
| } |
| |
| if (x) { |
| return x; |
| } |
| |
| // Handle infinities and zeroes: |
| if ((b_exp == FP32_EXP_INF && !c_mnt) || |
| (c_exp == FP32_EXP_INF && !b_mnt) || |
| (a_exp == FP32_EXP_INF && |
| (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) && |
| (a_sgn != (b_sgn ^ c_sgn)))) { |
| *flags |= FPLIB_IOC; |
| return fp32_defaultNaN(); |
| } |
| if (a_exp == FP32_EXP_INF) |
| return fp32_infinity(a_sgn); |
| if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) |
| return fp32_infinity(b_sgn ^ c_sgn); |
| if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) |
| return fp32_zero(a_sgn); |
| |
| x_sgn = a_sgn; |
| x_exp = a_exp + 2 * FP32_EXP_BITS - 3; |
| x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4); |
| |
| // Multiply: |
| y_sgn = b_sgn ^ c_sgn; |
| y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3; |
| y_mnt = (uint64_t)b_mnt * c_mnt << 3; |
| if (!y_mnt) { |
| y_exp = x_exp; |
| } |
| |
| // Add: |
| if (x_exp >= y_exp) { |
| y_mnt = (lsr64(y_mnt, x_exp - y_exp) | |
| !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); |
| y_exp = x_exp; |
| } else { |
| x_mnt = (lsr64(x_mnt, y_exp - x_exp) | |
| !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); |
| x_exp = y_exp; |
| } |
| if (x_sgn == y_sgn) { |
| x_mnt = x_mnt + y_mnt; |
| } else if (x_mnt >= y_mnt) { |
| x_mnt = x_mnt - y_mnt; |
| } else { |
| x_sgn ^= 1; |
| x_mnt = y_mnt - x_mnt; |
| } |
| |
| if (!x_mnt) { |
| // Sign of exact zero result depends on rounding mode |
| return fp32_zero((mode & 3) == 2); |
| } |
| |
| // Normalise into FP32_BITS bits, collapsing error into bottom bit: |
| x_mnt = fp64_normalise(x_mnt, &x_exp); |
| x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); |
| |
| return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); |
| } |
| |
| static uint64_t |
| fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, |
| int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; |
| uint64_t a_mnt, b_mnt, c_mnt, x; |
| uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); |
| |
| x = fp64_process_NaNs3(a, b, c, mode, flags); |
| |
| // Quiet NaN added to product of zero and infinity: |
| if (fp64_is_quiet_NaN(a_exp, a_mnt) && |
| ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) || |
| (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) { |
| x = fp64_defaultNaN(); |
| *flags |= FPLIB_IOC; |
| } |
| |
| if (x) { |
| return x; |
| } |
| |
| // Handle infinities and zeroes: |
| if ((b_exp == FP64_EXP_INF && !c_mnt) || |
| (c_exp == FP64_EXP_INF && !b_mnt) || |
| (a_exp == FP64_EXP_INF && |
| (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) && |
| (a_sgn != (b_sgn ^ c_sgn)))) { |
| *flags |= FPLIB_IOC; |
| return fp64_defaultNaN(); |
| } |
| if (a_exp == FP64_EXP_INF) |
| return fp64_infinity(a_sgn); |
| if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) |
| return fp64_infinity(b_sgn ^ c_sgn); |
| if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) |
| return fp64_zero(a_sgn); |
| |
| x_sgn = a_sgn; |
| x_exp = a_exp + FP64_EXP_BITS; |
| x0_mnt = 0; |
| x1_mnt = a_mnt; |
| |
| // Multiply: |
| y_sgn = b_sgn ^ c_sgn; |
| y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3; |
| mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); |
| if (!y0_mnt && !y1_mnt) { |
| y_exp = x_exp; |
| } |
| |
| // Add: |
| if (x_exp >= y_exp) { |
| uint64_t t0, t1; |
| lsl128(&t0, &t1, y0_mnt, y1_mnt, |
| x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); |
| lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); |
| y0_mnt |= !!(t0 | t1); |
| y_exp = x_exp; |
| } else { |
| uint64_t t0, t1; |
| lsl128(&t0, &t1, x0_mnt, x1_mnt, |
| y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); |
| lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); |
| x0_mnt |= !!(t0 | t1); |
| x_exp = y_exp; |
| } |
| if (x_sgn == y_sgn) { |
| add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); |
| } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { |
| sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); |
| } else { |
| x_sgn ^= 1; |
| sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); |
| } |
| |
| if (!x0_mnt && !x1_mnt) { |
| // Sign of exact zero result depends on rounding mode |
| return fp64_zero((mode & 3) == 2); |
| } |
| |
| // Normalise into FP64_BITS bits, collapsing error into bottom bit: |
| fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); |
| x0_mnt = x1_mnt << 1 | !!x0_mnt; |
| |
| return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); |
| } |
| |
| static uint16_t |
| fp16_div(uint16_t a, uint16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint16_t a_mnt, b_mnt, x; |
| uint32_t x_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp16_process_NaNs(a, b, mode, flags))) |
| return x; |
| |
| // Handle infinities and zeroes: |
| if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) || |
| (!a_mnt && !b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return fp16_defaultNaN(); |
| } |
| if (a_exp == FP16_EXP_INF || !b_mnt) { |
| if (a_exp != FP16_EXP_INF) |
| *flags |= FPLIB_DZC; |
| return fp16_infinity(a_sgn ^ b_sgn); |
| } |
| if (!a_mnt || b_exp == FP16_EXP_INF) |
| return fp16_zero(a_sgn ^ b_sgn); |
| |
| // Divide, setting bottom bit if inexact: |
| a_mnt = fp16_normalise(a_mnt, &a_exp); |
| x_sgn = a_sgn ^ b_sgn; |
| x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3); |
| x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt; |
| x_mnt |= (x_mnt * b_mnt != |
| (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)); |
| |
| // Normalise into FP16_BITS bits, collapsing error into bottom bit: |
| x_mnt = fp32_normalise(x_mnt, &x_exp); |
| x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1); |
| |
| return fp16_round(x_sgn, x_exp, x_mnt, mode, flags); |
| } |
| |
| static uint32_t |
| fp32_div(uint32_t a, uint32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; |
| uint32_t a_mnt, b_mnt, x; |
| uint64_t x_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp32_process_NaNs(a, b, mode, flags))) |
| return x; |
| |
| // Handle infinities and zeroes: |
| if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) || |
| (!a_mnt && !b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return fp32_defaultNaN(); |
| } |
| if (a_exp == FP32_EXP_INF || !b_mnt) { |
| if (a_exp != FP32_EXP_INF) |
| *flags |= FPLIB_DZC; |
| return fp32_infinity(a_sgn ^ b_sgn); |
| } |
| if (!a_mnt || b_exp == FP32_EXP_INF) |
| return fp32_zero(a_sgn ^ b_sgn); |
| |
| // Divide, setting bottom bit if inexact: |
| a_mnt = fp32_normalise(a_mnt, &a_exp); |
| x_sgn = a_sgn ^ b_sgn; |
| x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3); |
| x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt; |
| x_mnt |= (x_mnt * b_mnt != |
| (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)); |
| |
| // Normalise into FP32_BITS bits, collapsing error into bottom bit: |
| x_mnt = fp64_normalise(x_mnt, &x_exp); |
| x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1); |
| |
| return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); |
| } |
| |
| static uint64_t |
| fp64_div(uint64_t a, uint64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; |
| uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); |
| |
| if ((x = fp64_process_NaNs(a, b, mode, flags))) |
| return x; |
| |
| // Handle infinities and zeroes: |
| if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) || |
| (!a_mnt && !b_mnt)) { |
| *flags |= FPLIB_IOC; |
| return fp64_defaultNaN(); |
| } |
| if (a_exp == FP64_EXP_INF || !b_mnt) { |
| if (a_exp != FP64_EXP_INF) |
| *flags |= FPLIB_DZC; |
| return fp64_infinity(a_sgn ^ b_sgn); |
| } |
| if (!a_mnt || b_exp == FP64_EXP_INF) |
| return fp64_zero(a_sgn ^ b_sgn); |
| |
| // Find reciprocal of divisor with Newton-Raphson: |
| a_mnt = fp64_normalise(a_mnt, &a_exp); |
| b_mnt = fp64_normalise(b_mnt, &b_exp); |
| x_mnt = ~(uint64_t)0 / (b_mnt >> 31); |
| mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); |
| sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); |
| lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); |
| mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); |
| lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); |
| |
| // Multiply by dividend: |
| x_sgn = a_sgn ^ b_sgn; |
| x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8; |
| mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); |
| lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); |
| x_mnt = x1_mnt; |
| |
| // This is an underestimate, so try adding one: |
| mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); |
| c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); |
| if (c <= 0) { |
| ++x_mnt; |
| } |
| |
| x_mnt = fp64_normalise(x_mnt, &x_exp); |
| |
| return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); |
| } |
| |
| static void |
| set_fpscr0(FPSCR &fpscr, int flags) |
| { |
| if (flags & FPLIB_IDC) { |
| fpscr.idc = 1; |
| } |
| if (flags & FPLIB_IOC) { |
| fpscr.ioc = 1; |
| } |
| if (flags & FPLIB_DZC) { |
| fpscr.dzc = 1; |
| } |
| if (flags & FPLIB_OFC) { |
| fpscr.ofc = 1; |
| } |
| if (flags & FPLIB_UFC) { |
| fpscr.ufc = 1; |
| } |
| if (flags & FPLIB_IXC) { |
| fpscr.ixc = 1; |
| } |
| } |
| |
| static uint16_t |
| fp16_scale(uint16_t a, int16_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp; |
| uint16_t a_mnt; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| |
| // Handle NaNs: |
| if (fp16_is_NaN(a_exp, a_mnt)) { |
| return fp16_process_NaN(a, mode, flags); |
| } |
| |
| // Handle zeroes: |
| if (!a_mnt) { |
| return fp16_zero(a_sgn); |
| } |
| |
| // Handle infinities: |
| if (a_exp == FP16_EXP_INF) { |
| return fp16_infinity(a_sgn); |
| } |
| |
| b = b < -300 ? -300 : b; |
| b = b > 300 ? 300 : b; |
| a_exp += b; |
| a_mnt <<= 3; |
| |
| a_mnt = fp16_normalise(a_mnt, &a_exp); |
| |
| return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1, |
| mode, flags); |
| } |
| |
| static uint32_t |
| fp32_scale(uint32_t a, int32_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp; |
| uint32_t a_mnt; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| |
| // Handle NaNs: |
| if (fp32_is_NaN(a_exp, a_mnt)) { |
| return fp32_process_NaN(a, mode, flags); |
| } |
| |
| // Handle zeroes: |
| if (!a_mnt) { |
| return fp32_zero(a_sgn); |
| } |
| |
| // Handle infinities: |
| if (a_exp == FP32_EXP_INF) { |
| return fp32_infinity(a_sgn); |
| } |
| |
| b = b < -300 ? -300 : b; |
| b = b > 300 ? 300 : b; |
| a_exp += b; |
| a_mnt <<= 3; |
| |
| a_mnt = fp32_normalise(a_mnt, &a_exp); |
| |
| return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1, |
| mode, flags); |
| } |
| |
| static uint64_t |
| fp64_scale(uint64_t a, int64_t b, int mode, int *flags) |
| { |
| int a_sgn, a_exp; |
| uint64_t a_mnt; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| |
| // Handle NaNs: |
| if (fp64_is_NaN(a_exp, a_mnt)) { |
| return fp64_process_NaN(a, mode, flags); |
| } |
| |
| // Handle zeroes: |
| if (!a_mnt) { |
| return fp64_zero(a_sgn); |
| } |
| |
| // Handle infinities: |
| if (a_exp == FP64_EXP_INF) { |
| return fp64_infinity(a_sgn); |
| } |
| |
| b = b < -3000 ? -3000 : b; |
| b = b > 3000 ? 3000 : b; |
| a_exp += b; |
| a_mnt <<= 3; |
| |
| a_mnt = fp64_normalise(a_mnt, &a_exp); |
| |
| return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1, |
| mode, flags); |
| } |
| |
| static uint16_t |
| fp16_sqrt(uint16_t a, int mode, int *flags) |
| { |
| int a_sgn, a_exp, x_sgn, x_exp; |
| uint16_t a_mnt, x_mnt; |
| uint32_t x, t0, t1; |
| |
| fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| |
| // Handle NaNs: |
| if (fp16_is_NaN(a_exp, a_mnt)) |
| return fp16_process_NaN(a, mode, flags); |
| |
| // Handle infinities and zeroes: |
| if (!a_mnt) |
| return fp16_zero(a_sgn); |
| if (a_exp == FP16_EXP_INF && !a_sgn) |
| return fp16_infinity(a_sgn); |
| if (a_sgn) { |
| *flags |= FPLIB_IOC; |
| return fp16_defaultNaN(); |
| } |
| |
| a_mnt = fp16_normalise(a_mnt, &a_exp); |
| if (a_exp & 1) { |
| ++a_exp; |
| a_mnt >>= 1; |
| } |
| |
| // x = (a * 3 + 5) / 8 |
| x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28); |
| |
| // x = (a / x + x) / 2; // 8-bit accuracy |
| x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; |
| |
| // x = (a / x + x) / 2; // 16-bit accuracy |
| x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15; |
| |
| x_sgn = 0; |
| x_exp = (a_exp + 27) >> 1; |
| x_mnt = ((x - (1 << 18)) >> 19) + 1; |
| t1 = (uint32_t)x_mnt * x_mnt; |
| t0 = (uint32_t)a_mnt << 9; |
| if (t1 > t0) { |
| --x_mnt; |
| } |
| |
| x_mnt = fp16_normalise(x_mnt, &x_exp); |
| |
| return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); |
| } |
| |
| static uint32_t |
| fp32_sqrt(uint32_t a, int mode, int *flags) |
| { |
| int a_sgn, a_exp, x_sgn, x_exp; |
| uint32_t a_mnt, x, x_mnt; |
| uint64_t t0, t1; |
| |
| fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| |
| // Handle NaNs: |
| if (fp32_is_NaN(a_exp, a_mnt)) |
| return fp32_process_NaN(a, mode, flags); |
| |
| // Handle infinities and zeroes: |
| if (!a_mnt) |
| return fp32_zero(a_sgn); |
| if (a_exp == FP32_EXP_INF && !a_sgn) |
| return fp32_infinity(a_sgn); |
| if (a_sgn) { |
| *flags |= FPLIB_IOC; |
| return fp32_defaultNaN(); |
| } |
| |
| a_mnt = fp32_normalise(a_mnt, &a_exp); |
| if (!(a_exp & 1)) { |
| ++a_exp; |
| a_mnt >>= 1; |
| } |
| |
| // x = (a * 3 + 5) / 8 |
| x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28); |
| |
| // x = (a / x + x) / 2; // 8-bit accuracy |
| x = (a_mnt / (x >> 15) + (x >> 16)) << 15; |
| |
| // x = (a / x + x) / 2; // 16-bit accuracy |
| x = (a_mnt / (x >> 15) + (x >> 16)) << 15; |
| |
| // x = (a / x + x) / 2; // 32-bit accuracy |
| x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); |
| |
| x_sgn = 0; |
| x_exp = (a_exp + 147) >> 1; |
| x_mnt = ((x - (1 << 5)) >> 6) + 1; |
| t1 = (uint64_t)x_mnt * x_mnt; |
| t0 = (uint64_t)a_mnt << 19; |
| if (t1 > t0) { |
| --x_mnt; |
| } |
| |
| x_mnt = fp32_normalise(x_mnt, &x_exp); |
| |
| return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); |
| } |
| |
| static uint64_t |
| fp64_sqrt(uint64_t a, int mode, int *flags) |
| { |
| int a_sgn, a_exp, x_sgn, x_exp, c; |
| uint64_t a_mnt, x_mnt, r, x0, x1; |
| uint32_t x; |
| |
| fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); |
| |
| // Handle NaNs: |
| if (fp64_is_NaN(a_exp, a_mnt)) |
| return fp64_process_NaN(a, mode, flags); |
| |
| // Handle infinities and zeroes: |
| if (!a_mnt) |
| return fp64_zero(a_sgn); |
| if (a_exp == FP64_EXP_INF && !a_sgn) |
| return fp64_infinity(a_sgn); |
| if (a_sgn) { |
| *flags |= FPLIB_IOC; |
| return fp64_defaultNaN(); |
| } |
| |
| a_mnt = fp64_normalise(a_mnt, &a_exp); |
| if (a_exp & 1) { |
| ++a_exp; |
| a_mnt >>= 1; |
| } |
| |
| // x = (a * 3 + 5) / 8 |
| x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28); |
| |
| // x = (a / x + x) / 2; // 8-bit accuracy |
| x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; |
| |
| // x = (a / x + x) / 2; // 16-bit accuracy |
| x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; |
| |
| // x = (a / x + x) / 2; // 32-bit accuracy |
| x = ((a_mnt / x) >> 2) + (x >> 1); |
| |
| // r = 1 / x; // 32-bit accuracy |
| r = ((uint64_t)1 << 62) / x; |
| |
| // r = r * (2 - x * r); // 64-bit accuracy |
| mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); |
| lsr128(&x0, &x1, x0, x1, 31); |
| |
| // x = (x + a * r) / 2; // 64-bit accuracy |
| mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); |
| lsl128(&x0, &x1, x0, x1, 5); |
| lsr128(&x0, &x1, x0, x1, 56); |
| |
| x0 = ((uint64_t)x << 31) + (x0 >> 1); |
| |
| x_sgn = 0; |
| x_exp = (a_exp + 1053) >> 1; |
| x_mnt = x0; |
| x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; |
| mul62x62(&x0, &x1, x_mnt, x_mnt); |
| lsl128(&x0, &x1, x0, x1, 19); |
| c = cmp128(x0, x1, 0, a_mnt); |
| if (c > 0) |
| --x_mnt; |
| |
| x_mnt = fp64_normalise(x_mnt, &x_exp); |
| |
| return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); |
| } |
| |
| static int |
| modeConv(FPSCR fpscr) |
| { |
| uint32_t x = (uint32_t)fpscr; |
| return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0); |
| // AHP bit is ignored. Only fplibConvert uses AHP. |
| } |
| |
| static void |
| set_fpscr(FPSCR &fpscr, int flags) |
| { |
| // translate back to FPSCR |
| bool underflow = false; |
| if (flags & FPLIB_IDC) { |
| fpscr.idc = 1; |
| } |
| if (flags & FPLIB_IOC) { |
| fpscr.ioc = 1; |
| } |
| if (flags & FPLIB_DZC) { |
| fpscr.dzc = 1; |
| } |
| if (flags & FPLIB_OFC) { |
| fpscr.ofc = 1; |
| } |
| if (flags & FPLIB_UFC) { |
| underflow = true; //xx Why is this required? |
| fpscr.ufc = 1; |
| } |
| if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { |
| fpscr.ixc = 1; |
| } |
| } |
| |
| template <> |
| bool |
| fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp16_compare_un(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp32_compare_un(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| bool |
| fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr) |
| { |
| int flags = 0; |
| int x = fp64_compare_un(a, b, modeConv(fpscr), &flags); |
| set_fpscr(fpscr, flags); |
| return x; |
| } |
| |
| template <> |
| uint16_t |
| fplibAbs(uint16_t op) |
| { |
| return op & ~(1ULL << (FP16_BITS - 1)); |
| } |
| |
| template <> |
| uint32_t |
| fplibAbs(uint32_t op) |
| { |
| return op & ~(1ULL << (FP32_BITS - 1)); |
| } |
| |
| template <> |
| uint64_t |
| fplibAbs(uint64_t op) |
| { |
| return op & ~(1ULL << (FP64_BITS - 1)); |
| } |
| |
| template <> |
| uint16_t |
| fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr) |
| { |
| int flags = 0; |
| uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags); |
| set_fpscr0(fpscr, flags); |
| return result; |
| } |
| |
| template <> |
| uint32_t |
| fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) |
| { |
| int flags = 0; |
| uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); |
| set_fpscr0(fpscr, flags); |
| return result; |
| } |
| |
| template <> |
| uint64_t |
| fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) |
| { |
| int flags = 0; |
| uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); |
| set_fpscr0(fpscr, flags); |
| return result; |
| } |
| |
| template <> |
| int |
| fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr) |
| { |
| int mode = modeConv(fpscr); |
| int flags = 0; |
| int sgn1, exp1, sgn2, exp2, result; |
| uint16_t mnt1, mnt2; |
| |
| fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); |
| fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); |
| |
| if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) { |
| result = 3; |
| if (fp16_is_signalling_NaN(exp1, mnt1) || |
| fp16_is_signalling_NaN(exp2, mnt2) || signal_nans) |
| flags |= FPLIB_IOC; |
| } else { |
| if (op1 == op2 || (!mnt1 && !mnt2)) { |
| result = 6; |
| } else if (sgn1 != sgn2) { |
| result = sgn1 ? 8 : 2; |
| } else if (exp1 != exp2) { |
| result = sgn1 ^ (exp1 < exp2) ? 8 : 2; |
| } else { |
| result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; |
| } |
| } |
| |
| set_fpscr0(fpscr, flags); |
| |
| return result; |
| } |
| |
| template <> |
| int |
| fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) |
| { |
| int mode = modeConv(fpscr); |
| int flags = 0; |
| int sgn1, exp1, sgn2, exp2, result; |
| uint32_t mnt1, mnt2; |
| |
| fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); |
| fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); |
| |
| if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) { |
| result = 3; |
| if (fp32_is_signalling_NaN(exp1, mnt1) || |
| fp32_is_signalling_NaN(exp2, mnt2) || signal_nans) |
| flags |= FPLIB_IOC; |
| } else { |
| if (op1 == op2 || (!mnt1 && !mnt2)) { |
| result = 6; |
| } else if (sgn1 != sgn2) { |
| result = sgn1 ? 8 : 2; |
| } else if (exp1 != exp2) { |
| result = sgn1 ^ (exp1 < exp2) ? 8 : 2; |
| } else { |
| result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; |
| } |
| } |
| |
| set_fpscr0(fpscr, flags); |
| |
| return result; |
| } |
| |
| template <> |
| int |
| fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) |
| { |
| int mode = modeConv(fpscr); |
| int flags = 0; |
| int sgn1, exp1, sgn2, exp2, result; |
| uint64_t mnt1, mnt2; |
| |
| fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); |
| fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); |
| |
| if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) { |
| result = 3; |
| if (fp64_is_signalling_NaN(exp1, mnt1) || |
| fp64_is_signalling_NaN(exp2, mnt2) || signal_nans) |
| flags |= FPLIB_IOC; |
| } else { |
| if (op1 == op2 || (!mnt1 && !mnt2)) { |
| result = 6; |
| } else if (sgn1 != sgn2) { |
| result = sgn1 ? 8 : 2; |
| } else if (exp1 != exp2) { |
| result = sgn1 ^ (exp1 < exp2) ? 8 : 2; |
| } else { |
| result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; |
| } |
| } |
| |
| set_fpscr0(fpscr, flags); |
| |
| return result; |
| } |
| |
| static uint16_t |
| fp16_FPConvertNaN_32(uint32_t op) |
| { |
| return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF, |
| 1ULL << (FP16_MANT_BITS - 1) | |
| op >> (FP32_MANT_BITS - FP16_MANT_BITS)); |
| } |
| |
| static uint16_t |
| fp16_FPConvertNaN_64(uint64_t op) |
| { |
| return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF, |
| 1ULL << (FP16_MANT_BITS - 1) | |
| op >> (FP64_MANT_BITS - FP16_MANT_BITS)); |
| } |
| |
| static uint32_t |
| fp32_FPConvertNaN_16(uint16_t op) |
| { |
| return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF, |
| 1ULL << (FP32_MANT_BITS - 1) | |
| (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS)); |
| } |
| |
| static uint32_t |
| fp32_FPConvertNaN_64(uint64_t op) |
| { |
| return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF, |
| 1ULL << (FP32_MANT_BITS - 1) | |
| op >> (FP64_MANT_BITS - FP32_MANT_BITS)); |
| } |
| |
| static uint64_t |
| fp64_FPConvertNaN_16(uint16_t op) |
| { |
| return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF, |
| 1ULL << (FP64_MANT_BITS - 1) | |
| (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS)); |
| } |
| |
| static uint64_t |
| fp64_FPConvertNaN_32(uint32_t op) |
| { |
| return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF, |
| 1ULL << (FP64_MANT_BITS - 1) | |
| (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS)); |
| } |
| |
| static uint16_t |
| fp16_FPOnePointFive(int sgn) |
| { |
| return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1)); |
| } |
| |
| static uint32_t |
| fp32_FPOnePointFive(int sgn) |
| { |
| return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1)); |
| } |
| |
| static uint64_t |
| fp64_FPOnePointFive(int sgn) |
| { |
| return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1)); |
| } |
| |
| static uint16_t |
| fp16_FPThree(int sgn) |
| { |
| return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1)); |
| } |
| |
| static uint32_t |
| fp32_FPThree(int sgn) |
| { |
| return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1)); |
| } |
| |
| static uint64_t |
| fp64_FPThree(int sgn) |
| { |
| return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1)); |
| } |
| |
| static uint16_t |
| fp16_FPTwo(int sgn) |
| { |
| return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0); |
| } |
| |
| static uint32_t |
| fp32_FPTwo(int sgn) |
| { |
| return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0); |
| } |
| |
| static uint64_t |
| fp64_FPTwo(int sgn) |
| { |
| return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0); |
| } |
| |
| template <> |
| uint16_t |
| fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) |
| { |
| int mode = modeConv(fpscr); |
| int flags = 0; |
| int sgn, exp; |
| uint32_t mnt; |
| uint16_t result; |
| |
| // Unpack floating-point operand optionally with flush-to-zero: |
| fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); |
| |
| bool alt_hp = fpscr.ahp; |
| |
| if (fp32_is_NaN(exp, mnt)) { |
| if (alt_hp) { |
| result = fp16_zero(sgn); |
| } else if (fpscr.dn) { |
| result = fp16_defaultNaN(); |
| } else { |
| result = fp16_FPConvertNaN_32(op); |
| } |
| if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) { |
| flags |= FPLIB_IOC; |
| } |
| } else if (exp == FP32_EXP_INF) { |
| if (alt_hp) { |
| result = ((uint16_t)sgn << (FP16_BITS - 1) | |
| ((1ULL << (FP16_BITS - 1)) - 1)); |
| flags |= FPLIB_IOC; |
| } else { |
| result = fp16_infinity(sgn); |
| } |
| } else if (!mnt) { |
| result = fp16_zero(sgn); |
| } else { |
| result = |
| fp16_round_(sgn, exp - FP32_EXP_BIAS + FP16_EXP_BIAS, |
| mnt >> (FP32_MANT_BITS - FP16_BITS) | |
| !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)), |
| rounding, (mode & 0xf) | alt_hp << 4, &flags); |
| } |
| |
| set_fpscr0(fpscr, flags); |
| |
| return result; |
| } |
| |
| template <> |
| uint16_t |
| fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) |
| { |
| int mode = modeConv(fpscr); |
| int flags = 0; |
| int sgn, exp; |
| uint64_t mnt; |
| uint16_t result; |
| |
| // Unpack floating-point operand optionally with flush-to-zero: |
| fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); |
| |
| bool alt_hp = fpscr.ahp; |
| |
| if (fp64_is_NaN(exp, mnt)) { |
| if (alt_hp) { |
| result = fp16_zero(sgn); |
| } else if (fpscr.dn) { |
| result = fp16_defaultNaN(); |
| } else { |
| result = fp16_FPConvertNaN_64(op); |
| } |
| if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) { |
| flags |= FPLIB_IOC; |
| } |
| } else if (exp == FP64_EXP_INF) { |
| if (alt_hp) { |
| result = ((uint16_t)sgn << (FP16_BITS - 1) | |
| ((1ULL << (FP16_BITS - 1)) - 1)); |
| flags |= FPLIB_IOC; |
| } else { |
| result = fp16_infinity(sgn); |
| } |
| } else if (!mnt) { |
| result = fp16_zero(sgn); |
| } else { |
| result = |
| fp16_round_(sgn, exp - FP64_EXP_BIAS + FP16_EXP_BIAS, |
| mnt >> (FP64_MANT_BITS - FP16_BITS) | |
| !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)), |
| rounding, (mode & 0xf) | alt_hp << 4, &flags); |
| } |
| |
| set_fpscr0(fpscr, flags); |
| |
| return result; |
| } |
| |
| template <> |
| uint32_t |
| fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) |
| { |
| int mode = modeConv(fpscr); |
| int flags = 0; |
| int sgn, exp; |
| uint16_t mnt; |
| uint32_t result; |
| |
| // Unpack floating-point operand optionally with flush-to-zero: |
| fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags); |
| |
| if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) { |
| if (fpscr.dn) { |
| result = fp32_defaultNaN(); |
| } else |