Coverage Report

Created: 2024-04-30 09:35

test/zserio/FloatUtilTest.cpp
Line
Count
Source
1
#include <array>
2
3
#include "gtest/gtest.h"
4
#include "zserio/FloatUtil.h"
5
6
namespace zserio
7
{
8
9
class FloatUtilTest : public ::testing::Test
10
{
11
protected:
12
    uint16_t createFloat16Value(uint16_t sign, uint16_t exponent, uint16_t significand)
13
23
    {
14
23
        return static_cast<uint16_t>((static_cast<uint32_t>(sign) << FLOAT16_SIGN_BIT_POSITION) |
15
23
                (static_cast<uint32_t>(exponent) << FLOAT16_EXPONENT_BIT_POSITION) | significand);
16
23
    }
17
18
    uint32_t createFloat32Value(uint32_t sign, uint32_t exponent, uint32_t significand)
19
35
    {
20
35
        return (sign << FLOAT32_SIGN_BIT_POSITION) | (exponent << FLOAT32_EXPONENT_BIT_POSITION) | significand;
21
35
    }
22
23
    uint64_t createFloat64Value(uint64_t sign, uint64_t exponent, uint64_t significand)
24
16
    {
25
16
        return (sign << FLOAT64_SIGN_BIT_POSITION) | (exponent << FLOAT64_EXPONENT_BIT_POSITION) | significand;
26
16
    }
27
28
    void checkFloat16ToFloat32Conversion(uint16_t float16Value, uint32_t expectedFloat32Value)
29
7
    {
30
7
        const float float32 = convertUInt16ToFloat(float16Value);
31
7
        ASSERT_EQ(expectedFloat32Value, convertFloatToUInt32(float32));
32
7
    }
33
34
    void checkFloat16ToFloat32Conversion(uint16_t float16Value, float expectedFloat32)
35
4
    {
36
4
        ASSERT_EQ(expectedFloat32, convertUInt16ToFloat(float16Value));
37
4
    }
38
39
    void checkFloat32ToFloat16Conversion(uint32_t float32Value, uint16_t expectedFloat16Value)
40
12
    {
41
12
        const float float32 = convertUInt32ToFloat(float32Value);
42
12
        ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32));
43
12
    }
44
45
    void checkFloat32ToFloat16Conversion(float float32, uint16_t expectedFloat16Value)
46
4
    {
47
4
        ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32));
48
4
    }
49
50
    struct TestFloat32Element
51
    {
52
        uint32_t sign;
53
        uint32_t exponent;
54
        uint32_t significand;
55
        float expectedFloat;
56
    };
57
58
    struct TestFloat64Element
59
    {
60
        uint64_t sign;
61
        uint64_t exponent;
62
        uint64_t significand;
63
        double expectedDouble;
64
    };
65
66
    static const std::array<TestFloat32Element, 8> TEST_FLOAT32_DATA;
67
    static const std::array<TestFloat64Element, 8> TEST_FLOAT64_DATA;
68
69
private:
70
    static const uint16_t FLOAT16_SIGN_BIT_POSITION;
71
    static const uint16_t FLOAT16_EXPONENT_BIT_POSITION;
72
73
    static const uint32_t FLOAT32_SIGN_BIT_POSITION;
74
    static const uint32_t FLOAT32_EXPONENT_BIT_POSITION;
75
76
    static const uint64_t FLOAT64_SIGN_BIT_POSITION;
77
    static const uint64_t FLOAT64_EXPONENT_BIT_POSITION;
78
};
79
80
const std::array<FloatUtilTest::TestFloat32Element, 8> FloatUtilTest::TEST_FLOAT32_DATA = {
81
        TestFloat32Element{0, 0, UINT32_C(0), 0.0F},
82
        TestFloat32Element{1, 0, UINT32_C(0), -0.0F},
83
        TestFloat32Element{0, 127, UINT32_C(0), +1.0F},
84
        TestFloat32Element{1, 127, UINT32_C(0), -1.0F},
85
        // 2^1 (1 + 2^-1 + 2^-2)
86
        TestFloat32Element{0, 128, UINT32_C(0x600000), 3.5F},
87
        // 2^-1 (1 + 2^-1 + 2^-2)
88
        TestFloat32Element{0, 126, UINT32_C(0x600000), 0.875F},
89
        // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
90
        TestFloat32Element{0, 130, UINT32_C(0x1E0000), 9.875F},
91
        // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
92
        TestFloat32Element{0, 126, UINT32_C(0x1E0000), 0.6171875F},
93
};
94
95
const std::array<FloatUtilTest::TestFloat64Element, 8> FloatUtilTest::TEST_FLOAT64_DATA = {
96
        TestFloat64Element{0, 0, UINT64_C(0), 0.0},
97
        TestFloat64Element{1, 0, UINT64_C(0), -0.0},
98
        TestFloat64Element{0, 1023, UINT64_C(0), +1.0},
99
        TestFloat64Element{1, 1023, UINT64_C(0), -1.0},
100
        // 2^1 (1 + 2^-1 + 2^-2)
101
        TestFloat64Element{0, 1024, UINT64_C(0xC000000000000), 3.5},
102
        // 2^-1 (1 + 2^-1 + 2^-2)
103
        TestFloat64Element{0, 1022, UINT64_C(0xC000000000000), 0.875},
104
        // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
105
        TestFloat64Element{0, 1026, UINT64_C(0x3C00000000000), 9.875},
106
        // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
107
        TestFloat64Element{0, 1022, UINT64_C(0x3C00000000000), 0.6171875},
108
};
109
110
const uint16_t FloatUtilTest::FLOAT16_SIGN_BIT_POSITION = UINT16_C(15);
111
const uint16_t FloatUtilTest::FLOAT16_EXPONENT_BIT_POSITION = UINT16_C(10);
112
113
const uint32_t FloatUtilTest::FLOAT32_SIGN_BIT_POSITION = UINT16_C(31);
114
const uint32_t FloatUtilTest::FLOAT32_EXPONENT_BIT_POSITION = UINT16_C(23);
115
116
const uint64_t FloatUtilTest::FLOAT64_SIGN_BIT_POSITION = UINT16_C(63);
117
const uint64_t FloatUtilTest::FLOAT64_EXPONENT_BIT_POSITION = UINT16_C(52);
118
119
TEST_F(FloatUtilTest, convertUInt16ToFloat)
120
1
{
121
    // plus zero
122
1
    const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0
123
1
    checkFloat16ToFloat32Conversion(float16ValuePlusZero, 0.0F);
124
125
    // minus zero
126
1
    const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0
127
1
    checkFloat16ToFloat32Conversion(float16ValueMinusZero, -0.0F);
128
129
    // plus infinity
130
1
    const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF
131
1
    const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF
132
1
    checkFloat16ToFloat32Conversion(float16ValuePlusInfinity, float32ValuePlusInfinity);
133
134
    // minus infinity
135
1
    const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF
136
1
    const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF
137
1
    checkFloat16ToFloat32Conversion(float16ValueMinusInfinity, float32ValueMinusInfinity);
138
139
    // quiet NaN
140
1
    const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF); // +NaN
141
1
    const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN
142
1
    checkFloat16ToFloat32Conversion(float16ValueQuietNan, float32ValueQuietNan);
143
144
    // signaling NaN
145
1
    const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF); // -NaN
146
1
    const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN
147
1
    checkFloat16ToFloat32Conversion(float16ValueSignalingNan, float32ValueSignalingNan);
148
149
    // normal numbers
150
1
    const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0
151
1
    checkFloat16ToFloat32Conversion(float16ValueOne, 1.0F);
152
153
1
    const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01); // 1.0 + 2^-10
154
1
    const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10
155
1
    checkFloat16ToFloat32Conversion(float16ValueOnePlus, float32ValueOnePlus);
156
157
1
    const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10)
158
1
    checkFloat16ToFloat32Conversion(float16ValueMax, 65504.0F);
159
160
    // subnormal numbers
161
1
    const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1); // 2^-14 (2^-10)
162
1
    const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 103, 0); // 2^-24
163
1
    checkFloat16ToFloat32Conversion(float16ValueMinSubnormal, float32ValueMinSubnormal);
164
165
1
    const uint16_t float16ValueMaxSubnormal = createFloat16Value(0, 0, 0x3FF); // 2^-14 (2^-1 + ... + 2^-10)
166
                                                                               // 2^-15 (1 + 2^-1 + ... + 2^-9)
167
1
    const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 112, 0x7FC000);
168
1
    checkFloat16ToFloat32Conversion(float16ValueMaxSubnormal, float32ValueMaxSubnormal);
169
1
}
170
171
TEST_F(FloatUtilTest, convertFloatToUInt16)
172
1
{
173
    // plus zero
174
1
    const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0
175
1
    checkFloat32ToFloat16Conversion(0.0F, float16ValuePlusZero);
176
177
    // minus zero
178
1
    const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0
179
1
    checkFloat32ToFloat16Conversion(-0.0F, float16ValueMinusZero);
180
181
    // plus infinity
182
1
    const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF
183
1
    const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF
184
1
    checkFloat32ToFloat16Conversion(float32ValuePlusInfinity, float16ValuePlusInfinity);
185
186
    // minus infinity
187
1
    const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF
188
1
    const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF
189
1
    checkFloat32ToFloat16Conversion(float32ValueMinusInfinity, float16ValueMinusInfinity);
190
191
    // quiet NaN
192
1
    const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN
193
1
    const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF); // +NaN
194
1
    checkFloat32ToFloat16Conversion(float32ValueQuietNan, float16ValueQuietNan);
195
196
    // signaling NaN
197
1
    const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN
198
1
    const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF); // -NaN
199
1
    checkFloat32ToFloat16Conversion(float32ValueSignalingNan, float16ValueSignalingNan);
200
201
    // normal numbers
202
1
    const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0
203
1
    checkFloat32ToFloat16Conversion(1.0F, float16ValueOne);
204
205
1
    const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10
206
1
    const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01); // 1.0 + 2^-10
207
1
    checkFloat32ToFloat16Conversion(float32ValueOnePlus, float16ValueOnePlus);
208
209
1
    const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10)
210
1
    checkFloat32ToFloat16Conversion(65504.0F, float16ValueMax);
211
212
    // normal numbers converted to zero
213
1
    const uint32_t float32ValueUnderflow = createFloat32Value(0, 102, 0); // 2^-25
214
1
    checkFloat32ToFloat16Conversion(float32ValueUnderflow, float16ValuePlusZero);
215
216
    // normal numbers converted to subnormal numbers
217
1
    const uint32_t float32ValueMinUnderflow = createFloat32Value(0, 103, 1); // 2^-24 (1 + 2^-23)
218
1
    const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1); // 2^-24
219
1
    checkFloat32ToFloat16Conversion(float32ValueMinUnderflow, float16ValueMinSubnormal);
220
221
    // normal numbers converted to subnormal numbers with rounding
222
1
    const uint32_t float32ValueMinUnderflowRounding = createFloat32Value(0, 104, 0x200000); // 2^-23 (1 + 2^-2)
223
1
    const uint16_t float16ValueMinSubnormalRounding = createFloat16Value(0, 0, 0x3); // 2^-14 (2^-9 + 2^-10)
224
1
    checkFloat32ToFloat16Conversion(float32ValueMinUnderflowRounding, float16ValueMinSubnormalRounding);
225
226
    // normal numbers converted to infinity
227
1
    const uint32_t float32ValueOverflow = createFloat32Value(0, 144, 0); // 2^17
228
1
    checkFloat32ToFloat16Conversion(float32ValueOverflow, float16ValuePlusInfinity);
229
230
    // normal numbers converted with rounding
231
1
    const uint32_t float32ValueRounding = createFloat32Value(0, 127, 0x401000); // 1 + 2^-1 + 2^-11
232
1
    const uint16_t float16ValueRounding = createFloat16Value(0, 15, 0x201); // 1 + 2^-1 + 2^-10
233
1
    checkFloat32ToFloat16Conversion(float32ValueRounding, float16ValueRounding);
234
235
    // subnormal numbers
236
1
    const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 0, 1); // 2^-126 (2^-23)
237
1
    checkFloat32ToFloat16Conversion(float32ValueMinSubnormal, float16ValuePlusZero);
238
239
    // 2^-126 (2^-1 + ... + 2^-23)
240
1
    const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 0, 0x007FFFFF);
241
1
    checkFloat32ToFloat16Conversion(float32ValueMaxSubnormal, float16ValuePlusZero);
242
1
}
243
244
TEST_F(FloatUtilTest, convertUInt32ToFloat)
245
1
{
246
1
    for (TestFloat32Element testElement : TEST_FLOAT32_DATA)
247
8
    {
248
8
        const uint32_t float32Value =
249
8
                createFloat32Value(testElement.sign, testElement.exponent, testElement.significand);
250
8
        const float convertedFloat = convertUInt32ToFloat(float32Value);
251
252
8
        ASSERT_EQ(testElement.expectedFloat, convertedFloat);
253
8
    }
254
1
}
255
256
TEST_F(FloatUtilTest, convertFloatToUInt32)
257
1
{
258
1
    for (TestFloat32Element testElement : TEST_FLOAT32_DATA)
259
8
    {
260
8
        const uint32_t convertedFloatValue = convertFloatToUInt32(testElement.expectedFloat);
261
8
        const uint32_t expectedFloatValue =
262
8
                createFloat32Value(testElement.sign, testElement.exponent, testElement.significand);
263
264
8
        ASSERT_EQ(expectedFloatValue, convertedFloatValue);
265
8
    }
266
1
}
267
268
TEST_F(FloatUtilTest, convertUInt64ToDouble)
269
1
{
270
1
    for (TestFloat64Element testElement : TEST_FLOAT64_DATA)
271
8
    {
272
8
        const uint64_t float64Value =
273
8
                createFloat64Value(testElement.sign, testElement.exponent, testElement.significand);
274
8
        const double convertedDouble = convertUInt64ToDouble(float64Value);
275
276
8
        ASSERT_EQ(testElement.expectedDouble, convertedDouble);
277
8
    }
278
1
}
279
280
TEST_F(FloatUtilTest, convertDoubleToUInt64)
281
1
{
282
1
    for (TestFloat64Element testElement : TEST_FLOAT64_DATA)
283
8
    {
284
8
        const uint64_t convertedDoubleValue = convertDoubleToUInt64(testElement.expectedDouble);
285
8
        const uint64_t expectedDoubleValue =
286
8
                createFloat64Value(testElement.sign, testElement.exponent, testElement.significand);
287
288
8
        ASSERT_EQ(expectedDoubleValue, convertedDoubleValue);
289
8
    }
290
1
}
291
292
} // namespace zserio