GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: test/zserio/FloatUtilTest.cpp Lines: 117 117 100.0 %
Date: 2023-12-13 14:51:09 Branches: 67 174 38.5 %

Line Branch Exec Source
1
#include <array>
2
3
#include "zserio/FloatUtil.h"
4
5
#include "gtest/gtest.h"
6
7
namespace zserio
8
{
9
10
12
class FloatUtilTest : public ::testing::Test
11
{
12
protected:
13
23
    uint16_t createFloat16Value(uint16_t sign, uint16_t exponent, uint16_t significand)
14
    {
15
        return static_cast<uint16_t>(
16
23
                (static_cast<uint32_t>(sign) << FLOAT16_SIGN_BIT_POSITION) |
17
23
                (static_cast<uint32_t>(exponent) << FLOAT16_EXPONENT_BIT_POSITION) |
18
46
                significand);
19
    }
20
21
35
    uint32_t createFloat32Value(uint32_t sign, uint32_t exponent, uint32_t significand)
22
    {
23
35
        return (sign << FLOAT32_SIGN_BIT_POSITION) | (exponent << FLOAT32_EXPONENT_BIT_POSITION) | significand;
24
    }
25
26
16
    uint64_t createFloat64Value(uint64_t sign, uint64_t exponent, uint64_t significand)
27
    {
28
16
        return (sign << FLOAT64_SIGN_BIT_POSITION) | (exponent << FLOAT64_EXPONENT_BIT_POSITION) | significand;
29
    }
30
31
7
    void checkFloat16ToFloat32Conversion(uint16_t float16Value, uint32_t expectedFloat32Value)
32
    {
33
7
        const float float32 = convertUInt16ToFloat(float16Value);
34



7
        ASSERT_EQ(expectedFloat32Value, convertFloatToUInt32(float32));
35
    }
36
37
4
    void checkFloat16ToFloat32Conversion(uint16_t float16Value, float expectedFloat32)
38
    {
39



4
        ASSERT_EQ(expectedFloat32, convertUInt16ToFloat(float16Value));
40
    }
41
42
12
    void checkFloat32ToFloat16Conversion(uint32_t float32Value, uint16_t expectedFloat16Value)
43
    {
44
12
        const float float32 = convertUInt32ToFloat(float32Value);
45



12
        ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32));
46
    }
47
48
4
    void checkFloat32ToFloat16Conversion(float float32, uint16_t expectedFloat16Value)
49
    {
50



4
        ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32));
51
    }
52
53
    struct TestFloat32Element
54
    {
55
        uint32_t sign;
56
        uint32_t exponent;
57
        uint32_t significand;
58
        float    expectedFloat;
59
    };
60
61
    struct TestFloat64Element
62
    {
63
        uint64_t sign;
64
        uint64_t exponent;
65
        uint64_t significand;
66
        double   expectedDouble;
67
    };
68
69
    static const std::array<TestFloat32Element, 8> TEST_FLOAT32_DATA;
70
    static const std::array<TestFloat64Element, 8> TEST_FLOAT64_DATA;
71
72
private:
73
    static const uint16_t FLOAT16_SIGN_BIT_POSITION;
74
    static const uint16_t FLOAT16_EXPONENT_BIT_POSITION;
75
76
    static const uint32_t FLOAT32_SIGN_BIT_POSITION;
77
    static const uint32_t FLOAT32_EXPONENT_BIT_POSITION;
78
79
    static const uint64_t FLOAT64_SIGN_BIT_POSITION;
80
    static const uint64_t FLOAT64_EXPONENT_BIT_POSITION;
81
};
82
83
const std::array<FloatUtilTest::TestFloat32Element, 8> FloatUtilTest::TEST_FLOAT32_DATA =
84
{
85
    TestFloat32Element{0, 0,   UINT32_C(0),        0.0F},
86
    TestFloat32Element{1, 0,   UINT32_C(0),       -0.0F},
87
    TestFloat32Element{0, 127, UINT32_C(0),       +1.0F},
88
    TestFloat32Element{1, 127, UINT32_C(0),       -1.0F},
89
    TestFloat32Element{0, 128, UINT32_C(0x600000), 3.5F},      // 2^1 (1 + 2^-1 + 2^-2)
90
    TestFloat32Element{0, 126, UINT32_C(0x600000), 0.875F},    // 2^-1 (1 + 2^-1 + 2^-2)
91
    TestFloat32Element{0, 130, UINT32_C(0x1E0000), 9.875F},    // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
92
    TestFloat32Element{0, 126, UINT32_C(0x1E0000), 0.6171875F} // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
93
};
94
95
const std::array<FloatUtilTest::TestFloat64Element, 8> FloatUtilTest::TEST_FLOAT64_DATA =
96
{
97
    TestFloat64Element{0, 0,    UINT64_C(0),                0.0},
98
    TestFloat64Element{1, 0,    UINT64_C(0),               -0.0},
99
    TestFloat64Element{0, 1023, UINT64_C(0),               +1.0},
100
    TestFloat64Element{1, 1023, UINT64_C(0),               -1.0},
101
    TestFloat64Element{0, 1024, UINT64_C(0xC000000000000), 3.5},      // 2^1 (1 + 2^-1 + 2^-2)
102
    TestFloat64Element{0, 1022, UINT64_C(0xC000000000000), 0.875},    // 2^-1 (1 + 2^-1 + 2^-2)
103
    TestFloat64Element{0, 1026, UINT64_C(0x3C00000000000), 9.875},    // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
104
    TestFloat64Element{0, 1022, UINT64_C(0x3C00000000000), 0.6171875} // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
105
};
106
107
const uint16_t FloatUtilTest::FLOAT16_SIGN_BIT_POSITION = UINT16_C(15);
108
const uint16_t FloatUtilTest::FLOAT16_EXPONENT_BIT_POSITION = UINT16_C(10);
109
110
const uint32_t FloatUtilTest::FLOAT32_SIGN_BIT_POSITION = UINT16_C(31);
111
const uint32_t FloatUtilTest::FLOAT32_EXPONENT_BIT_POSITION = UINT16_C(23);
112
113
const uint64_t FloatUtilTest::FLOAT64_SIGN_BIT_POSITION = UINT16_C(63);
114
const uint64_t FloatUtilTest::FLOAT64_EXPONENT_BIT_POSITION = UINT16_C(52);
115
116


802
TEST_F(FloatUtilTest, convertUInt16ToFloat)
117
{
118
    // plus zero
119
1
    const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0
120
1
    checkFloat16ToFloat32Conversion(float16ValuePlusZero, 0.0F);
121
122
    // minus zero
123
1
    const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0
124
1
    checkFloat16ToFloat32Conversion(float16ValueMinusZero, -0.0F);
125
126
    // plus infinity
127
1
    const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF
128
1
    const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF
129
1
    checkFloat16ToFloat32Conversion(float16ValuePlusInfinity, float32ValuePlusInfinity);
130
131
    // minus infinity
132
1
    const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF
133
1
    const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF
134
1
    checkFloat16ToFloat32Conversion(float16ValueMinusInfinity, float32ValueMinusInfinity);
135
136
    // quiet NaN
137
1
    const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF);    // +NaN
138
1
    const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN
139
1
    checkFloat16ToFloat32Conversion(float16ValueQuietNan, float32ValueQuietNan);
140
141
    // signaling NaN
142
1
    const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF);    // -NaN
143
1
    const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN
144
1
    checkFloat16ToFloat32Conversion(float16ValueSignalingNan, float32ValueSignalingNan);
145
146
    // normal numbers
147
1
    const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0
148
1
    checkFloat16ToFloat32Conversion(float16ValueOne, 1.0F);
149
150
1
    const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01);    // 1.0 + 2^-10
151
1
    const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10
152
1
    checkFloat16ToFloat32Conversion(float16ValueOnePlus, float32ValueOnePlus);
153
154
1
    const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10)
155
1
    checkFloat16ToFloat32Conversion(float16ValueMax, 65504.0F);
156
157
    // subnormal numbers
158
1
    const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1);   // 2^-14 (2^-10)
159
1
    const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 103, 0); // 2^-24
160
1
    checkFloat16ToFloat32Conversion(float16ValueMinSubnormal, float32ValueMinSubnormal);
161
162
1
    const uint16_t float16ValueMaxSubnormal = createFloat16Value(0, 0, 0x3FF); // 2^-14 (2^-1 + ... + 2^-10)
163
                                                                               // 2^-15 (1 + 2^-1 + ... + 2^-9)
164
1
    const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 112, 0x7FC000);
165
1
    checkFloat16ToFloat32Conversion(float16ValueMaxSubnormal, float32ValueMaxSubnormal);
166
1
}
167
168


802
TEST_F(FloatUtilTest, convertFloatToUInt16)
169
{
170
    // plus zero
171
1
    const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0
172
1
    checkFloat32ToFloat16Conversion(0.0F, float16ValuePlusZero);
173
174
    // minus zero
175
1
    const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0
176
1
    checkFloat32ToFloat16Conversion(-0.0F, float16ValueMinusZero);
177
178
    // plus infinity
179
1
    const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF
180
1
    const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF
181
1
    checkFloat32ToFloat16Conversion(float32ValuePlusInfinity, float16ValuePlusInfinity);
182
183
    // minus infinity
184
1
    const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF
185
1
    const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF
186
1
    checkFloat32ToFloat16Conversion(float32ValueMinusInfinity, float16ValueMinusInfinity);
187
188
    // quiet NaN
189
1
    const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN
190
1
    const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF);    // +NaN
191
1
    checkFloat32ToFloat16Conversion(float32ValueQuietNan, float16ValueQuietNan);
192
193
    // signaling NaN
194
1
    const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN
195
1
    const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF);    // -NaN
196
1
    checkFloat32ToFloat16Conversion(float32ValueSignalingNan, float16ValueSignalingNan);
197
198
    // normal numbers
199
1
    const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0
200
1
    checkFloat32ToFloat16Conversion(1.0F, float16ValueOne);
201
202
1
    const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10
203
1
    const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01);    // 1.0 + 2^-10
204
1
    checkFloat32ToFloat16Conversion(float32ValueOnePlus, float16ValueOnePlus);
205
206
1
    const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10)
207
1
    checkFloat32ToFloat16Conversion(65504.0F, float16ValueMax);
208
209
    // normal numbers converted to zero
210
1
    const uint32_t float32ValueUnderflow = createFloat32Value(0, 102, 0); // 2^-25
211
1
    checkFloat32ToFloat16Conversion(float32ValueUnderflow, float16ValuePlusZero);
212
213
    // normal numbers converted to subnormal numbers
214
1
    const uint32_t float32ValueMinUnderflow = createFloat32Value(0, 103, 1);  // 2^-24 (1 + 2^-23)
215
1
    const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1);    // 2^-24
216
1
    checkFloat32ToFloat16Conversion(float32ValueMinUnderflow, float16ValueMinSubnormal);
217
218
    // normal numbers converted to subnormal numbers with rounding
219
1
    const uint32_t float32ValueMinUnderflowRounding = createFloat32Value(0, 104, 0x200000); // 2^-23 (1 + 2^-2)
220
1
    const uint16_t float16ValueMinSubnormalRounding = createFloat16Value(0, 0, 0x3); // 2^-14 (2^-9 + 2^-10)
221
1
    checkFloat32ToFloat16Conversion(float32ValueMinUnderflowRounding, float16ValueMinSubnormalRounding);
222
223
    // normal numbers converted to infinity
224
1
    const uint32_t float32ValueOverflow = createFloat32Value(0, 144, 0); // 2^17
225
1
    checkFloat32ToFloat16Conversion(float32ValueOverflow, float16ValuePlusInfinity);
226
227
    // normal numbers converted with rounding
228
1
    const uint32_t float32ValueRounding = createFloat32Value(0, 127, 0x401000); // 1 + 2^-1 + 2^-11
229
1
    const uint16_t float16ValueRounding = createFloat16Value(0, 15, 0x201);     // 1 + 2^-1 + 2^-10
230
1
    checkFloat32ToFloat16Conversion(float32ValueRounding, float16ValueRounding);
231
232
    // subnormal numbers
233
1
    const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 0, 1); // 2^-126 (2^-23)
234
1
    checkFloat32ToFloat16Conversion(float32ValueMinSubnormal, float16ValuePlusZero);
235
236
                                                                           // 2^-126 (2^-1 + ... + 2^-23)
237
1
    const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 0, 0x007FFFFF);
238
1
    checkFloat32ToFloat16Conversion(float32ValueMaxSubnormal, float16ValuePlusZero);
239
1
}
240
241


802
TEST_F(FloatUtilTest, convertUInt32ToFloat)
242
{
243
9
    for (TestFloat32Element testElement : TEST_FLOAT32_DATA)
244
    {
245
8
        const uint32_t float32Value = createFloat32Value(testElement.sign, testElement.exponent,
246
8
                testElement.significand);
247
8
        const float convertedFloat = convertUInt32ToFloat(float32Value);
248
249



8
        ASSERT_EQ(testElement.expectedFloat, convertedFloat);
250
    }
251
}
252
253


802
TEST_F(FloatUtilTest, convertFloatToUInt32)
254
{
255
9
    for (TestFloat32Element testElement : TEST_FLOAT32_DATA)
256
    {
257
8
        const uint32_t convertedFloatValue = convertFloatToUInt32(testElement.expectedFloat);
258
8
        const uint32_t expectedFloatValue = createFloat32Value(testElement.sign, testElement.exponent,
259
8
                testElement.significand);
260
261



8
        ASSERT_EQ(expectedFloatValue, convertedFloatValue);
262
    }
263
}
264
265


802
TEST_F(FloatUtilTest, convertUInt64ToDouble)
266
{
267
9
    for (TestFloat64Element testElement : TEST_FLOAT64_DATA)
268
    {
269
8
        const uint64_t float64Value = createFloat64Value(testElement.sign, testElement.exponent,
270
8
                testElement.significand);
271
8
        const double convertedDouble = convertUInt64ToDouble(float64Value);
272
273



8
        ASSERT_EQ(testElement.expectedDouble, convertedDouble);
274
    }
275
}
276
277


802
TEST_F(FloatUtilTest, convertDoubleToUInt64)
278
{
279
9
    for (TestFloat64Element testElement : TEST_FLOAT64_DATA)
280
    {
281
8
        const uint64_t convertedDoubleValue = convertDoubleToUInt64(testElement.expectedDouble);
282
8
        const uint64_t expectedDoubleValue = createFloat64Value(testElement.sign, testElement.exponent,
283
8
                testElement.significand);
284
285



8
        ASSERT_EQ(expectedDoubleValue, convertedDoubleValue);
286
    }
287
}
288
289

2394
} // namespace zserio