Return to Revisions

2 of 3

Fixed minor grammar issues

edit approved Sep 7, 2019 at 3:27

user avatar

edit approved Sep 7, 2019 at 3:27

user avatar

Software floating-point multiplication

I wrote a floating-point multiplication function as an excercise. Thr program compares its result to the usual hardware multiplication result and for this purpose i use unspecified behavior, but the function itself should be fine. I do get warnings about XOR-ing 1-bit ints but I have no idea why.

I noticed there aren't that many comments in the multiplication function so I wonder what i could put there.

#include <math.h>
#include <assert.h>
#include <memory.h>
#include <limits.h>
#include <fenv.h>
#include <float.h>
#include <thread>
#include <mutex>
#include <iostream>
#include <random>
int msb( uint64_t v )
{
 if(v == 0) return 0;
 return 63-__builtin_clzll(v);
}
void roundedShift(uint64_t* a, unsigned int n) { 
 if(n >= sizeof(uint64_t)*CHAR_BIT) {
 // we don't care about rounding if n == 64 because mantissa product is always less than 2^46
 *a = 0;
 }
 uint64_t roundoff = *a - ((*a >> n) << n);
 *a >>= n;
 if(roundoff*2 > (1ull<<n)
 ||
 (roundoff*2 == (1ull<<n) && (*a)%2 == 1)) {
 ++(*a);
 }
}
struct SoftFloat {
 unsigned int mantissa : 23; 
 unsigned int exponent : 8;
 unsigned int sign : 1;
 
 static const SoftFloat nan;
 SoftFloat operator *(SoftFloat right) const {
 if(exponent == 255 || right.exponent == 255) {
 return specialMultiplication(right);
 }
 uint64_t fullLeftMantissa = (mantissa)+(exponent!=0?(1<<23):0);
 uint64_t fullRightMantissa = (right.mantissa)+(right.exponent!=0?(1<<23):0);
 uint64_t fullNewMantissa = fullLeftMantissa * fullRightMantissa;
 // experiment have shown that operating on biased exponents is faster than 
 // computing unbiased exponent and then adding bias
 short leftNormalBiasedExponent = exponent != 0 ? exponent: 1; 
 short rightNormalBiasedExponent = right.exponent != 0 ? right.exponent : 1;
 short newNormalBiasedExponent = leftNormalBiasedExponent + 
 rightNormalBiasedExponent - 127 - 23;
 int totalShift = 0;
 if(newNormalBiasedExponent < 1) {
 int diff = -newNormalBiasedExponent + 1;
 newNormalBiasedExponent = 1;
 totalShift += diff;
 }
 int implicitBit = msb(fullNewMantissa);
 int shift = implicitBit - totalShift - 23;
 if(shift >= 0) {
 newNormalBiasedExponent += shift;
 totalShift += shift;
 fullNewMantissa &= ~(1ll << implicitBit);
 } else {
 newNormalBiasedExponent = 0;
 }
 roundedShift(&fullNewMantissa, totalShift);
 if(fullNewMantissa == (1 << 23)) {
 ++newNormalBiasedExponent;
 fullNewMantissa = 0;
 }
 if(newNormalBiasedExponent >= 255) {
 newNormalBiasedExponent = 255;
 fullNewMantissa = 0;
 }
 return SoftFloat{(uint)fullNewMantissa, (uint)newNormalBiasedExponent, sign ^ right.sign};
 // i don't really understan why this return expression gives warning
 // narrowing conversion of ‘(((int)((const SoftFloat*)this)->SoftFloat::sign) ^ ((int)right.SoftFloat::sign))’ from ‘int’ to ‘unsigned int’
 }
 bool isNan() const {
 return exponent == 255 && mantissa != 0;
 // This is same as (*reinterpret_cast<const unsigned int*>(this) << 1) > 0b11111111000000000000000000000000u, 
 // but the experiments have shown it doesn't make any difference for speed on my machine.
 // Current version does not invoke any UB 
 }
 bool isZero() const {
 return exponent == 0 && mantissa == 0;
 }
 
 bool isRepresentationEqual(const SoftFloat& right) {
 return !memcmp(this, &right, sizeof(SoftFloat));
 }
 // Correctness testing relies on specific layout of floats and of fields inside SoftFloat, but the multiplication function itself does not
 float toHardFloat() const {
 float res;
 memcpy(&res, this, sizeof(SoftFloat));
 return res;
 }
 static SoftFloat fromOrdinalNumber(const unsigned int& a) {
 SoftFloat res;
 memcpy(&res, &a, sizeof(SoftFloat));
 return res;
 }
 static SoftFloat fromHardFloat(const float& a) {
 SoftFloat res;
 memcpy(&res, &a, sizeof(SoftFloat));
 return res;
 }
private:
 SoftFloat specialMultiplication(SoftFloat right) const {
 // precondition - at least one of *this, right is either inf, -inf or nan
 if(isNan() || right.isNan()) {
 return nan;
 }
 if(isZero() || right.isZero()) {
 return nan;
 }
 return {0, 255, sign ^ right.sign};
 }
};
const SoftFloat SoftFloat::nan = {1, 255, 0};
std::mutex coutMutex;
void checkOneMultiplication(SoftFloat left, SoftFloat right) {
 SoftFloat softProduct = left*right;
 SoftFloat hardProduct = SoftFloat::fromHardFloat(
 left.toHardFloat() *
 right.toHardFloat());
 bool equalRepresentation = softProduct.isRepresentationEqual(hardProduct);
 bool bothNan = softProduct.isNan() && 
 hardProduct.isNan();
 if(!(equalRepresentation || bothNan)) {
 std::lock_guard<std::mutex> lock(coutMutex);
 std::cerr << "failed\n";
 std::cerr << "left operand\t" << left.mantissa << " " << left.exponent << " " << left.sign << " " << left.toHardFloat() << std::endl;
 std::cerr << "right operand\t" << right.mantissa << " " << right.exponent << " " << right.sign << " " << right.toHardFloat() << std::endl;
 std::cerr << "soft product\t" << softProduct.mantissa << " " << softProduct.exponent << " " << softProduct.sign << " " << softProduct.toHardFloat() << std::endl;
 std::cerr << "hard product\t" << hardProduct.mantissa << " " << hardProduct.exponent << " " << hardProduct.sign << " " << hardProduct.toHardFloat() << std::endl;
 abort();
 }
}
void checkRange(unsigned int start, unsigned int end, int threadNumber) {
 // this loop checks multiplication with every one of 2^28 possible floats in given range of representaions. Right operand is pseudorandom but deterministic
 std::uniform_int_distribution<unsigned int> distr;
 std::mt19937 gen(threadNumber);
 for(unsigned int representationNumber = start; representationNumber != end; representationNumber++) {
 // can't use < in condition because last block ends with 0
 int rigntOperandRepresentationNumber = distr(gen);
 SoftFloat leftOperand = SoftFloat::fromOrdinalNumber(representationNumber);
 SoftFloat rightOperand = SoftFloat::fromOrdinalNumber(rigntOperandRepresentationNumber);
 checkOneMultiplication(leftOperand, rightOperand);
 if(representationNumber%0x10000000 == 0 && representationNumber > start) {
 std::lock_guard<std::mutex> lock(coutMutex);
 std::cout << "thread " << threadNumber << " checked 0x" << std::hex << representationNumber-start << " multiplications" << std::endl;
 }
 }
}
int main(int argc, char *argv[])
{
 static_assert(sizeof(SoftFloat) == sizeof(float));
 static_assert(alignof(SoftFloat) == alignof(float), "");
 static_assert(sizeof(SoftFloat) == sizeof(int), "");
 static_assert(__BYTE_ORDER__ == LITTLE_ENDIAN, "");
 // Unfortunately we can't statically check further details of layout of structures
 
 assert(SoftFloat::fromHardFloat(std::numeric_limits<float>::denorm_min()).isRepresentationEqual(SoftFloat{1, 0, 0}));
 assert(SoftFloat::fromHardFloat(FLT_MIN).isRepresentationEqual(SoftFloat{0, 1, 0}));
 
 float nonConstant = -1;
 assert(SoftFloat::fromHardFloat(nonConstant*0).isRepresentationEqual(SoftFloat{0, 0, 1}));
 // Test can't run if any of the above asserts fail
 const int threadCount = 4;
 fesetround(FE_TONEAREST); // assuming mantissa is rounded to even when there are two nearest
 std::thread threads[threadCount-1];
 const unsigned int blockSize = UINT32_MAX/4+1;
 for(int i = 1; i < threadCount; i++) {
 threads[i-1] = std::thread(checkRange, blockSize*i, blockSize*(i+1), i);
 }
 checkRange(0, blockSize, 0);
 SoftFloat specialCases[] = {{0, 0, 0}, // 0
 {0, 0, 1}, // -0
 {0, 255, 0}, //inf
 {0, 255, 1}, //-inf
 SoftFloat::nan,
 SoftFloat::fromHardFloat(std::nanf("")),
 SoftFloat::fromHardFloat(FLT_MIN),
 SoftFloat::fromHardFloat(1),
 SoftFloat::fromHardFloat(FLT_MAX),
 SoftFloat::fromHardFloat(FLT_EPSILON),
 SoftFloat::fromHardFloat(std::numeric_limits<float>::denorm_min()),
 SoftFloat::fromHardFloat(-FLT_MAX),
 SoftFloat::fromHardFloat(FLT_MIN_EXP),
 SoftFloat::fromHardFloat(FLT_MIN_10_EXP),
 SoftFloat::fromHardFloat(FLT_MAX_EXP),
 SoftFloat::fromHardFloat(FLT_MAX_10_EXP)};
 int numberOfSpecialCases = sizeof(specialCases)/sizeof(SoftFloat);
 for(int i = 0; i < numberOfSpecialCases; i++) {
 for(int j = 0; j < numberOfSpecialCases; j++) {
 checkOneMultiplication(specialCases[i], specialCases[j]);
 }
 }
 for(int i = 1; i < threadCount; i++) {
 threads[i-1].join();
 }
 std::cout << "checked 0x" << UINT32_MAX + numberOfSpecialCases*numberOfSpecialCases << " multiplications";
 return 0;
}

The program tests multiplication of every possible floatin-point number with a random number. This runs a long time so i made it threaded. I also used fancy c++11 RNG stuff to make it deterministic.

The function uses __builtin_clzll which is available on gcc and clang but not on msvc I beleive.

c++ floating-point

asked Sep 7, 2019 at 2:10

Nikita

lang-cpp