=============================================================================*/
+/* FIXME: Flush-To-Zero only effects results. Denormal inputs should also
+ be flushed to zero. */
#include "softfloat.h"
/*----------------------------------------------------------------------------
return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
}
if ( zExp < 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
isTiny =
( STATUS(float_detect_tininess) == float_tininess_before_rounding )
|| ( zExp < -1 )
return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
}
if ( zExp < 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
isTiny =
( STATUS(float_detect_tininess) == float_tininess_before_rounding )
|| ( zExp < -1 )
goto overflow;
}
if ( zExp <= 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
isTiny =
( STATUS(float_detect_tininess) == float_tininess_before_rounding )
|| ( zExp < 0 )
return packFloat128( zSign, 0x7FFF, 0, 0 );
}
if ( zExp < 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
isTiny =
( STATUS(float_detect_tininess) == float_tininess_before_rounding )
|| ( zExp < -1 )
if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
return a;
}
- if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+ if ( aExp == 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
+ return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+ }
zSig = 0x40000000 + aSig + bSig;
zExp = aExp;
goto roundAndPack;
}
/*----------------------------------------------------------------------------
+| Returns the binary log of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float32 float32_log2( float32 a STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int16 aExp;
+ bits32 aSig, zSig, i;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( aSign ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ return a;
+ }
+
+ aExp -= 0x7F;
+ aSig |= 0x00800000;
+ zSign = aExp < 0;
+ zSig = aExp << 23;
+
+ for (i = 1 << 22; i > 0; i >>= 1) {
+ aSig = ( (bits64)aSig * aSig ) >> 23;
+ if ( aSig & 0x01000000 ) {
+ aSig >>= 1;
+ zSig |= i;
+ }
+ }
+
+ if ( zSign )
+ zSig = -zSig;
+
+ return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
+}
+
+/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is equal to
| the corresponding value `b', and 0 otherwise. The comparison is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
return a;
}
- if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+ if ( aExp == 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
+ return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+ }
zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
zExp = aExp;
goto roundAndPack;
}
/*----------------------------------------------------------------------------
+| Returns the binary log of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float64 float64_log2( float64 a STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int16 aExp;
+ bits64 aSig, aSig0, aSig1, zSig, i;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( aSign ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
+ return a;
+ }
+
+ aExp -= 0x3FF;
+ aSig |= LIT64( 0x0010000000000000 );
+ zSign = aExp < 0;
+ zSig = (bits64)aExp << 52;
+ for (i = 1LL << 51; i > 0; i >>= 1) {
+ mul64To128( aSig, aSig, &aSig0, &aSig1 );
+ aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
+ if ( aSig & LIT64( 0x0020000000000000 ) ) {
+ aSig >>= 1;
+ zSig |= i;
+ }
+ }
+
+ if ( zSign )
+ zSig = -zSig;
+ return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
+}
+
+/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is equal to the
| corresponding value `b', and 0 otherwise. The comparison is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
return a;
}
add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+ if ( aExp == 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
+ return packFloat128( zSign, 0, zSig0, zSig1 );
+ }
zSig2 = 0;
zSig0 |= LIT64( 0x0002000000000000 );
zExp = aExp;
if ( aExp == 0xFF ) {
return a;
}
- aExp += n;
- return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+ if ( aExp != 0 )
+ aSig |= 0x00800000;
+ else if ( aSig == 0 )
+ return a;
+
+ aExp += n - 1;
+ aSig <<= 7;
+ return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
}
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
if ( aExp == 0x7FF ) {
return a;
}
- aExp += n;
- return roundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
+ if ( aExp != 0 )
+ aSig |= LIT64( 0x0010000000000000 );
+ else if ( aSig == 0 )
+ return a;
+
+ aExp += n - 1;
+ aSig <<= 10;
+ return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
}
#ifdef FLOATX80
if ( aExp == 0x7FF ) {
return a;
}
+ if (aExp == 0 && aSig == 0)
+ return a;
+
aExp += n;
- return roundAndPackFloatx80( STATUS(floatx80_rounding_precision),
- aSign, aExp, aSig, 0 STATUS_VAR );
+ return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
+ aSign, aExp, aSig, 0 STATUS_VAR );
}
#endif
if ( aExp == 0x7FFF ) {
return a;
}
- aExp += n;
- return roundAndPackFloat128( aSign, aExp, aSig0, aSig1, 0 STATUS_VAR );
+ if ( aExp != 0 )
+ aSig0 |= LIT64( 0x0001000000000000 );
+ else if ( aSig0 == 0 && aSig1 == 0 )
+ return a;
+
+ aExp += n - 1;
+ return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
+ STATUS_VAR );
}
#endif