test/zserio/FloatUtilTest.cpp
Line | Count | Source |
1 | | #include <array> |
2 | | |
3 | | #include "zserio/FloatUtil.h" |
4 | | |
5 | | #include "gtest/gtest.h" |
6 | | |
7 | | namespace zserio |
8 | | { |
9 | | |
10 | | class FloatUtilTest : public ::testing::Test |
11 | | { |
12 | | protected: |
13 | | uint16_t createFloat16Value(uint16_t sign, uint16_t exponent, uint16_t significand) |
14 | 23 | { |
15 | 23 | return static_cast<uint16_t>( |
16 | 23 | (static_cast<uint32_t>(sign) << FLOAT16_SIGN_BIT_POSITION) | |
17 | 23 | (static_cast<uint32_t>(exponent) << FLOAT16_EXPONENT_BIT_POSITION) | |
18 | 23 | significand); |
19 | 23 | } |
20 | | |
21 | | uint32_t createFloat32Value(uint32_t sign, uint32_t exponent, uint32_t significand) |
22 | 35 | { |
23 | 35 | return (sign << FLOAT32_SIGN_BIT_POSITION) | (exponent << FLOAT32_EXPONENT_BIT_POSITION) | significand; |
24 | 35 | } |
25 | | |
26 | | uint64_t createFloat64Value(uint64_t sign, uint64_t exponent, uint64_t significand) |
27 | 16 | { |
28 | 16 | return (sign << FLOAT64_SIGN_BIT_POSITION) | (exponent << FLOAT64_EXPONENT_BIT_POSITION) | significand; |
29 | 16 | } |
30 | | |
31 | | void checkFloat16ToFloat32Conversion(uint16_t float16Value, uint32_t expectedFloat32Value) |
32 | 7 | { |
33 | 7 | const float float32 = convertUInt16ToFloat(float16Value); |
34 | 7 | ASSERT_EQ(expectedFloat32Value, convertFloatToUInt32(float32)); |
35 | 7 | } |
36 | | |
37 | | void checkFloat16ToFloat32Conversion(uint16_t float16Value, float expectedFloat32) |
38 | 4 | { |
39 | 4 | ASSERT_EQ(expectedFloat32, convertUInt16ToFloat(float16Value)); |
40 | 4 | } |
41 | | |
42 | | void checkFloat32ToFloat16Conversion(uint32_t float32Value, uint16_t expectedFloat16Value) |
43 | 12 | { |
44 | 12 | const float float32 = convertUInt32ToFloat(float32Value); |
45 | 12 | ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32)); |
46 | 12 | } |
47 | | |
48 | | void checkFloat32ToFloat16Conversion(float float32, uint16_t expectedFloat16Value) |
49 | 4 | { |
50 | 4 | ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32)); |
51 | 4 | } |
52 | | |
53 | | struct TestFloat32Element |
54 | | { |
55 | | uint32_t sign; |
56 | | uint32_t exponent; |
57 | | uint32_t significand; |
58 | | float expectedFloat; |
59 | | }; |
60 | | |
61 | | struct TestFloat64Element |
62 | | { |
63 | | uint64_t sign; |
64 | | uint64_t exponent; |
65 | | uint64_t significand; |
66 | | double expectedDouble; |
67 | | }; |
68 | | |
69 | | static const std::array<TestFloat32Element, 8> TEST_FLOAT32_DATA; |
70 | | static const std::array<TestFloat64Element, 8> TEST_FLOAT64_DATA; |
71 | | |
72 | | private: |
73 | | static const uint16_t FLOAT16_SIGN_BIT_POSITION; |
74 | | static const uint16_t FLOAT16_EXPONENT_BIT_POSITION; |
75 | | |
76 | | static const uint32_t FLOAT32_SIGN_BIT_POSITION; |
77 | | static const uint32_t FLOAT32_EXPONENT_BIT_POSITION; |
78 | | |
79 | | static const uint64_t FLOAT64_SIGN_BIT_POSITION; |
80 | | static const uint64_t FLOAT64_EXPONENT_BIT_POSITION; |
81 | | }; |
82 | | |
83 | | const std::array<FloatUtilTest::TestFloat32Element, 8> FloatUtilTest::TEST_FLOAT32_DATA = |
84 | | { |
85 | | TestFloat32Element{0, 0, UINT32_C(0), 0.0F}, |
86 | | TestFloat32Element{1, 0, UINT32_C(0), -0.0F}, |
87 | | TestFloat32Element{0, 127, UINT32_C(0), +1.0F}, |
88 | | TestFloat32Element{1, 127, UINT32_C(0), -1.0F}, |
89 | | TestFloat32Element{0, 128, UINT32_C(0x600000), 3.5F}, // 2^1 (1 + 2^-1 + 2^-2) |
90 | | TestFloat32Element{0, 126, UINT32_C(0x600000), 0.875F}, // 2^-1 (1 + 2^-1 + 2^-2) |
91 | | TestFloat32Element{0, 130, UINT32_C(0x1E0000), 9.875F}, // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
92 | | TestFloat32Element{0, 126, UINT32_C(0x1E0000), 0.6171875F} // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
93 | | }; |
94 | | |
95 | | const std::array<FloatUtilTest::TestFloat64Element, 8> FloatUtilTest::TEST_FLOAT64_DATA = |
96 | | { |
97 | | TestFloat64Element{0, 0, UINT64_C(0), 0.0}, |
98 | | TestFloat64Element{1, 0, UINT64_C(0), -0.0}, |
99 | | TestFloat64Element{0, 1023, UINT64_C(0), +1.0}, |
100 | | TestFloat64Element{1, 1023, UINT64_C(0), -1.0}, |
101 | | TestFloat64Element{0, 1024, UINT64_C(0xC000000000000), 3.5}, // 2^1 (1 + 2^-1 + 2^-2) |
102 | | TestFloat64Element{0, 1022, UINT64_C(0xC000000000000), 0.875}, // 2^-1 (1 + 2^-1 + 2^-2) |
103 | | TestFloat64Element{0, 1026, UINT64_C(0x3C00000000000), 9.875}, // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
104 | | TestFloat64Element{0, 1022, UINT64_C(0x3C00000000000), 0.6171875} // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
105 | | }; |
106 | | |
107 | | const uint16_t FloatUtilTest::FLOAT16_SIGN_BIT_POSITION = UINT16_C(15); |
108 | | const uint16_t FloatUtilTest::FLOAT16_EXPONENT_BIT_POSITION = UINT16_C(10); |
109 | | |
110 | | const uint32_t FloatUtilTest::FLOAT32_SIGN_BIT_POSITION = UINT16_C(31); |
111 | | const uint32_t FloatUtilTest::FLOAT32_EXPONENT_BIT_POSITION = UINT16_C(23); |
112 | | |
113 | | const uint64_t FloatUtilTest::FLOAT64_SIGN_BIT_POSITION = UINT16_C(63); |
114 | | const uint64_t FloatUtilTest::FLOAT64_EXPONENT_BIT_POSITION = UINT16_C(52); |
115 | | |
116 | | TEST_F(FloatUtilTest, convertUInt16ToFloat) |
117 | 1 | { |
118 | | // plus zero |
119 | 1 | const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0 |
120 | 1 | checkFloat16ToFloat32Conversion(float16ValuePlusZero, 0.0F); |
121 | | |
122 | | // minus zero |
123 | 1 | const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0 |
124 | 1 | checkFloat16ToFloat32Conversion(float16ValueMinusZero, -0.0F); |
125 | | |
126 | | // plus infinity |
127 | 1 | const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF |
128 | 1 | const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF |
129 | 1 | checkFloat16ToFloat32Conversion(float16ValuePlusInfinity, float32ValuePlusInfinity); |
130 | | |
131 | | // minus infinity |
132 | 1 | const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF |
133 | 1 | const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF |
134 | 1 | checkFloat16ToFloat32Conversion(float16ValueMinusInfinity, float32ValueMinusInfinity); |
135 | | |
136 | | // quiet NaN |
137 | 1 | const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF); // +NaN |
138 | 1 | const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN |
139 | 1 | checkFloat16ToFloat32Conversion(float16ValueQuietNan, float32ValueQuietNan); |
140 | | |
141 | | // signaling NaN |
142 | 1 | const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF); // -NaN |
143 | 1 | const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN |
144 | 1 | checkFloat16ToFloat32Conversion(float16ValueSignalingNan, float32ValueSignalingNan); |
145 | | |
146 | | // normal numbers |
147 | 1 | const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0 |
148 | 1 | checkFloat16ToFloat32Conversion(float16ValueOne, 1.0F); |
149 | | |
150 | 1 | const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01); // 1.0 + 2^-10 |
151 | 1 | const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10 |
152 | 1 | checkFloat16ToFloat32Conversion(float16ValueOnePlus, float32ValueOnePlus); |
153 | | |
154 | 1 | const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10) |
155 | 1 | checkFloat16ToFloat32Conversion(float16ValueMax, 65504.0F); |
156 | | |
157 | | // subnormal numbers |
158 | 1 | const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1); // 2^-14 (2^-10) |
159 | 1 | const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 103, 0); // 2^-24 |
160 | 1 | checkFloat16ToFloat32Conversion(float16ValueMinSubnormal, float32ValueMinSubnormal); |
161 | | |
162 | 1 | const uint16_t float16ValueMaxSubnormal = createFloat16Value(0, 0, 0x3FF); // 2^-14 (2^-1 + ... + 2^-10) |
163 | | // 2^-15 (1 + 2^-1 + ... + 2^-9) |
164 | 1 | const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 112, 0x7FC000); |
165 | 1 | checkFloat16ToFloat32Conversion(float16ValueMaxSubnormal, float32ValueMaxSubnormal); |
166 | 1 | } |
167 | | |
168 | | TEST_F(FloatUtilTest, convertFloatToUInt16) |
169 | 1 | { |
170 | | // plus zero |
171 | 1 | const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0 |
172 | 1 | checkFloat32ToFloat16Conversion(0.0F, float16ValuePlusZero); |
173 | | |
174 | | // minus zero |
175 | 1 | const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0 |
176 | 1 | checkFloat32ToFloat16Conversion(-0.0F, float16ValueMinusZero); |
177 | | |
178 | | // plus infinity |
179 | 1 | const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF |
180 | 1 | const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF |
181 | 1 | checkFloat32ToFloat16Conversion(float32ValuePlusInfinity, float16ValuePlusInfinity); |
182 | | |
183 | | // minus infinity |
184 | 1 | const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF |
185 | 1 | const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF |
186 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinusInfinity, float16ValueMinusInfinity); |
187 | | |
188 | | // quiet NaN |
189 | 1 | const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN |
190 | 1 | const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF); // +NaN |
191 | 1 | checkFloat32ToFloat16Conversion(float32ValueQuietNan, float16ValueQuietNan); |
192 | | |
193 | | // signaling NaN |
194 | 1 | const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN |
195 | 1 | const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF); // -NaN |
196 | 1 | checkFloat32ToFloat16Conversion(float32ValueSignalingNan, float16ValueSignalingNan); |
197 | | |
198 | | // normal numbers |
199 | 1 | const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0 |
200 | 1 | checkFloat32ToFloat16Conversion(1.0F, float16ValueOne); |
201 | | |
202 | 1 | const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10 |
203 | 1 | const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01); // 1.0 + 2^-10 |
204 | 1 | checkFloat32ToFloat16Conversion(float32ValueOnePlus, float16ValueOnePlus); |
205 | | |
206 | 1 | const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10) |
207 | 1 | checkFloat32ToFloat16Conversion(65504.0F, float16ValueMax); |
208 | | |
209 | | // normal numbers converted to zero |
210 | 1 | const uint32_t float32ValueUnderflow = createFloat32Value(0, 102, 0); // 2^-25 |
211 | 1 | checkFloat32ToFloat16Conversion(float32ValueUnderflow, float16ValuePlusZero); |
212 | | |
213 | | // normal numbers converted to subnormal numbers |
214 | 1 | const uint32_t float32ValueMinUnderflow = createFloat32Value(0, 103, 1); // 2^-24 (1 + 2^-23) |
215 | 1 | const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1); // 2^-24 |
216 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinUnderflow, float16ValueMinSubnormal); |
217 | | |
218 | | // normal numbers converted to subnormal numbers with rounding |
219 | 1 | const uint32_t float32ValueMinUnderflowRounding = createFloat32Value(0, 104, 0x200000); // 2^-23 (1 + 2^-2) |
220 | 1 | const uint16_t float16ValueMinSubnormalRounding = createFloat16Value(0, 0, 0x3); // 2^-14 (2^-9 + 2^-10) |
221 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinUnderflowRounding, float16ValueMinSubnormalRounding); |
222 | | |
223 | | // normal numbers converted to infinity |
224 | 1 | const uint32_t float32ValueOverflow = createFloat32Value(0, 144, 0); // 2^17 |
225 | 1 | checkFloat32ToFloat16Conversion(float32ValueOverflow, float16ValuePlusInfinity); |
226 | | |
227 | | // normal numbers converted with rounding |
228 | 1 | const uint32_t float32ValueRounding = createFloat32Value(0, 127, 0x401000); // 1 + 2^-1 + 2^-11 |
229 | 1 | const uint16_t float16ValueRounding = createFloat16Value(0, 15, 0x201); // 1 + 2^-1 + 2^-10 |
230 | 1 | checkFloat32ToFloat16Conversion(float32ValueRounding, float16ValueRounding); |
231 | | |
232 | | // subnormal numbers |
233 | 1 | const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 0, 1); // 2^-126 (2^-23) |
234 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinSubnormal, float16ValuePlusZero); |
235 | | |
236 | | // 2^-126 (2^-1 + ... + 2^-23) |
237 | 1 | const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 0, 0x007FFFFF); |
238 | 1 | checkFloat32ToFloat16Conversion(float32ValueMaxSubnormal, float16ValuePlusZero); |
239 | 1 | } |
240 | | |
241 | | TEST_F(FloatUtilTest, convertUInt32ToFloat) |
242 | 1 | { |
243 | 1 | for (TestFloat32Element testElement : TEST_FLOAT32_DATA) |
244 | 8 | { |
245 | 8 | const uint32_t float32Value = createFloat32Value(testElement.sign, testElement.exponent, |
246 | 8 | testElement.significand); |
247 | 8 | const float convertedFloat = convertUInt32ToFloat(float32Value); |
248 | | |
249 | 8 | ASSERT_EQ(testElement.expectedFloat, convertedFloat); |
250 | 8 | } |
251 | 1 | } |
252 | | |
253 | | TEST_F(FloatUtilTest, convertFloatToUInt32) |
254 | 1 | { |
255 | 1 | for (TestFloat32Element testElement : TEST_FLOAT32_DATA) |
256 | 8 | { |
257 | 8 | const uint32_t convertedFloatValue = convertFloatToUInt32(testElement.expectedFloat); |
258 | 8 | const uint32_t expectedFloatValue = createFloat32Value(testElement.sign, testElement.exponent, |
259 | 8 | testElement.significand); |
260 | | |
261 | 8 | ASSERT_EQ(expectedFloatValue, convertedFloatValue); |
262 | 8 | } |
263 | 1 | } |
264 | | |
265 | | TEST_F(FloatUtilTest, convertUInt64ToDouble) |
266 | 1 | { |
267 | 1 | for (TestFloat64Element testElement : TEST_FLOAT64_DATA) |
268 | 8 | { |
269 | 8 | const uint64_t float64Value = createFloat64Value(testElement.sign, testElement.exponent, |
270 | 8 | testElement.significand); |
271 | 8 | const double convertedDouble = convertUInt64ToDouble(float64Value); |
272 | | |
273 | 8 | ASSERT_EQ(testElement.expectedDouble, convertedDouble); |
274 | 8 | } |
275 | 1 | } |
276 | | |
277 | | TEST_F(FloatUtilTest, convertDoubleToUInt64) |
278 | 1 | { |
279 | 1 | for (TestFloat64Element testElement : TEST_FLOAT64_DATA) |
280 | 8 | { |
281 | 8 | const uint64_t convertedDoubleValue = convertDoubleToUInt64(testElement.expectedDouble); |
282 | 8 | const uint64_t expectedDoubleValue = createFloat64Value(testElement.sign, testElement.exponent, |
283 | 8 | testElement.significand); |
284 | | |
285 | 8 | ASSERT_EQ(expectedDoubleValue, convertedDoubleValue); |
286 | 8 | } |
287 | 1 | } |
288 | | |
289 | | } // namespace zserio |