Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef ZSERIO_JSON_DECODER_H_INC |
2 | | #define ZSERIO_JSON_DECODER_H_INC |
3 | | |
4 | | #include <utility> |
5 | | #include <cerrno> |
6 | | #include <cmath> |
7 | | #include <cstdlib> |
8 | | #include <cstring> |
9 | | |
10 | | #include "zserio/AllocatorHolder.h" |
11 | | #include "zserio/AnyHolder.h" |
12 | | #include "zserio/CppRuntimeException.h" |
13 | | #include "zserio/String.h" |
14 | | #include "zserio/StringView.h" |
15 | | |
16 | | namespace zserio |
17 | | { |
18 | | |
19 | | /** |
20 | | * JSON value decoder. |
21 | | */ |
22 | | template <typename ALLOC = std::allocator<uint8_t>> |
23 | | class BasicJsonDecoder : public AllocatorHolder<ALLOC> |
24 | | { |
25 | | public: |
26 | | using AllocatorHolder<ALLOC>::get_allocator; |
27 | | |
28 | | /** |
29 | | * Decoder result value. |
30 | | */ |
31 | | struct DecoderResult |
32 | | { |
33 | | /** |
34 | | * Constructor used for decoder failure. |
35 | | * |
36 | | * \param numRead Number of processed characters. |
37 | | * \param allocator Allocator to use. |
38 | | */ |
39 | | DecoderResult(size_t numRead, const ALLOC& allocator) : |
40 | | numReadChars(numRead), value(allocator), integerOverflow(false) |
41 | 132 | {} |
42 | | |
43 | | /** |
44 | | * Constructor for decoder success. |
45 | | * |
46 | | * \param numRead Number of processed characters. |
47 | | * \param decodedValue Value decoded from JSON stream. |
48 | | * \param allocator Allocator to use. |
49 | | */ |
50 | | template <typename T> |
51 | | DecoderResult(size_t numRead, T&& decodedValue, const ALLOC& allocator) : |
52 | | numReadChars(numRead), value(std::forward<T>(decodedValue), allocator), integerOverflow(false) |
53 | 20.3k | {} |
54 | | |
55 | | /** |
56 | | * Constructor used for integer decoder. |
57 | | * |
58 | | * \param numRead Number of processed characters. |
59 | | * \param decodedValue Value decoded from JSON stream. |
60 | | * \param overflow True in case of integer overflow. |
61 | | * \param allocator Allocator to use. |
62 | | */ |
63 | | template <typename T> |
64 | | DecoderResult(size_t numRead, T&& decodedValue, bool overflow, const ALLOC& allocator) : |
65 | | numReadChars(numRead), value(createValue(decodedValue, overflow, allocator)), |
66 | | integerOverflow(overflow) |
67 | 4.10k | {} |
68 | | |
69 | | size_t numReadChars; /**< Number of processed characters. */ |
70 | | AnyHolder<ALLOC> value; /**< Decoded value. Empty on failure. */ |
71 | | bool integerOverflow; /**< True if decoded value was bigger than UINT64_MAX or was not in interval |
72 | | <INT64_MIN, INT64_MAX>. */ |
73 | | |
74 | | private: |
75 | | template <typename T> |
76 | | AnyHolder<ALLOC> createValue(T&& decodedValue, bool overflow, const ALLOC& allocator) |
77 | 4.10k | { |
78 | 4.10k | return overflow ? AnyHolder<ALLOC>(allocator)2 : |
79 | 4.10k | AnyHolder<ALLOC>(std::forward<T>(decodedValue), allocator)4.10k ; |
80 | 4.10k | } |
81 | | }; |
82 | | |
83 | | /** |
84 | | * Empty constructor. |
85 | | */ |
86 | | BasicJsonDecoder() : |
87 | | AllocatorHolder<ALLOC>(ALLOC()) |
88 | 12 | {} |
89 | | |
90 | | /** |
91 | | * Constructor from given allocator. |
92 | | * |
93 | | * \param allocator Allocator to use. |
94 | | */ |
95 | | explicit BasicJsonDecoder(const ALLOC& allocator) : |
96 | | AllocatorHolder<ALLOC>(allocator) |
97 | 91 | {} |
98 | | |
99 | | /** |
100 | | * Decodes the JSON value from the input. |
101 | | * |
102 | | * \param input Input to decode from. |
103 | | * |
104 | | * \return Decoder result. |
105 | | */ |
106 | | DecoderResult decodeValue(StringView input) |
107 | 24.5k | { |
108 | 24.5k | if (input.empty()) |
109 | 1 | return DecoderResult(0, get_allocator()); |
110 | | |
111 | 24.5k | switch (input[0]) |
112 | 24.5k | { |
113 | 9 | case 'n': |
114 | 9 | return decodeLiteral(input, "null"_sv, nullptr); |
115 | 7 | case 't': |
116 | 7 | return decodeLiteral(input, "true"_sv, true); |
117 | 4 | case 'f': |
118 | 4 | return decodeLiteral(input, "false"_sv, false); |
119 | 4 | case 'N': |
120 | 4 | return decodeLiteral(input, "NaN"_sv, static_cast<double>(NAN)); |
121 | 4 | case 'I': |
122 | 4 | return decodeLiteral(input, "Infinity"_sv, static_cast<double>(INFINITY)); |
123 | 16.3k | case '"': |
124 | 16.3k | return decodeString(input); |
125 | 20 | case '-': |
126 | 20 | if (input.size() > 1 && input[1] == 'I'19 ) |
127 | 5 | return decodeLiteral(input, "-Infinity"_sv, -static_cast<double>(INFINITY)); |
128 | 15 | return decodeNumber(input); |
129 | 8.12k | default: |
130 | 8.12k | return decodeNumber(input); |
131 | 24.5k | } |
132 | 24.5k | } |
133 | | |
134 | | private: |
135 | | template <typename T> |
136 | | DecoderResult decodeLiteral(StringView input, StringView literal, T&& value); |
137 | | DecoderResult decodeString(StringView input); |
138 | | static bool decodeUnicodeEscape(StringView input, StringView::const_iterator& inputIt, |
139 | | string<ALLOC>& value); |
140 | | static char decodeHex(char ch); |
141 | | size_t checkNumber(StringView input, bool& isDouble, bool& isSigned); |
142 | | DecoderResult decodeNumber(StringView input); |
143 | | DecoderResult decodeSigned(StringView input); |
144 | | DecoderResult decodeUnsigned(StringView input); |
145 | | DecoderResult decodeDouble(StringView input, size_t numChars); |
146 | | }; |
147 | | |
148 | | template <typename ALLOC> |
149 | | template <typename T> |
150 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeLiteral( |
151 | | StringView input, StringView literal, T&& value) |
152 | 33 | { |
153 | 33 | StringView::const_iterator literalIt = literal.begin(); |
154 | 33 | StringView::const_iterator inputIt = input.begin(); |
155 | 174 | while (inputIt != input.end() && literalIt != literal.end()162 ) |
156 | 148 | { |
157 | 148 | if (*inputIt++ != *literalIt++) |
158 | 7 | { |
159 | | // failure, not decoded |
160 | 7 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
161 | 7 | } |
162 | 148 | } |
163 | | |
164 | 26 | if (literalIt != literal.end()) |
165 | 6 | { |
166 | | // short input, not decoded |
167 | 6 | return DecoderResult(input.size(), get_allocator()); |
168 | 6 | } |
169 | | |
170 | | // success |
171 | 20 | return DecoderResult(literal.size(), std::forward<T>(value), get_allocator()); |
172 | 26 | } |
173 | | |
174 | | template <typename ALLOC> |
175 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeString(StringView input) |
176 | 16.3k | { |
177 | 16.3k | StringView::const_iterator inputIt = input.begin() + 1; // we know that at the beginning is '"' |
178 | 16.3k | string<ALLOC> value(get_allocator()); |
179 | | |
180 | 82.9k | while (inputIt != input.end()) |
181 | 82.9k | { |
182 | 82.9k | if (*inputIt == '\\') |
183 | 24 | { |
184 | 24 | ++inputIt; |
185 | 24 | if (inputIt == input.end()) |
186 | 1 | { |
187 | | // wrong escape, not decoded |
188 | 1 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
189 | 1 | } |
190 | | |
191 | 23 | char nextChar = *inputIt; |
192 | 23 | switch (nextChar) |
193 | 23 | { |
194 | 2 | case '\\': |
195 | 4 | case '"': |
196 | 4 | value.push_back(nextChar); |
197 | 4 | ++inputIt; |
198 | 4 | break; |
199 | 1 | case 'b': |
200 | 1 | value.push_back('\b'); |
201 | 1 | ++inputIt; |
202 | 1 | break; |
203 | 1 | case 'f': |
204 | 1 | value.push_back('\f'); |
205 | 1 | ++inputIt; |
206 | 1 | break; |
207 | 2 | case 'n': |
208 | 2 | value.push_back('\n'); |
209 | 2 | ++inputIt; |
210 | 2 | break; |
211 | 1 | case 'r': |
212 | 1 | value.push_back('\r'); |
213 | 1 | ++inputIt; |
214 | 1 | break; |
215 | 2 | case 't': |
216 | 2 | value.push_back('\t'); |
217 | 2 | ++inputIt; |
218 | 2 | break; |
219 | 11 | case 'u': // unicode escape |
220 | 11 | { |
221 | 11 | ++inputIt; |
222 | 11 | if (!decodeUnicodeEscape(input, inputIt, value)) |
223 | 10 | { |
224 | | // unsupported unicode escape, not decoded |
225 | 10 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
226 | 10 | } |
227 | 1 | break; |
228 | 11 | } |
229 | 1 | default: |
230 | 1 | ++inputIt; |
231 | | // unknown escape, not decoded |
232 | 1 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
233 | 23 | } |
234 | 23 | } |
235 | 82.8k | else if (*inputIt == '"') |
236 | 16.3k | { |
237 | 16.3k | ++inputIt; |
238 | | // successfully decoded |
239 | 16.3k | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), std::move(value), |
240 | 16.3k | get_allocator()); |
241 | 16.3k | } |
242 | 66.5k | else |
243 | 66.5k | { |
244 | 66.5k | value.push_back(*inputIt++); |
245 | 66.5k | } |
246 | 82.9k | } |
247 | | |
248 | | // unterminated string, not decoded |
249 | 2 | return DecoderResult(input.size(), get_allocator()); |
250 | 16.3k | } |
251 | | |
252 | | template <typename ALLOC> |
253 | | bool BasicJsonDecoder<ALLOC>::decodeUnicodeEscape(StringView input, StringView::const_iterator& inputIt, |
254 | | string<ALLOC>& value) |
255 | 11 | { |
256 | | // TODO[Mi-L@]: Simplified just to decode what zserio encodes, for complex solution we could use |
257 | | // std::wstring_convert but it's deprecated in C++17. |
258 | 11 | if (inputIt == input.end() || *inputIt++ != '0'10 ) |
259 | 2 | return false; |
260 | 9 | if (inputIt == input.end() || *inputIt++ != '0'8 ) |
261 | 2 | return false; |
262 | | |
263 | 7 | if (inputIt == input.end()) |
264 | 1 | return false; |
265 | 6 | const char ch1 = decodeHex(*inputIt++); |
266 | 6 | if (ch1 == -1) |
267 | 2 | return false; |
268 | | |
269 | 4 | if (inputIt == input.end()) |
270 | 1 | return false; |
271 | 3 | const char ch2 = decodeHex(*inputIt++); |
272 | 3 | if (ch2 == -1) |
273 | 2 | return false; |
274 | | |
275 | 1 | value.push_back(static_cast<char>((static_cast<uint32_t>(ch1) << 4U) | static_cast<uint32_t>(ch2))); |
276 | 1 | return true; |
277 | 3 | } |
278 | | |
279 | | template <typename ALLOC> |
280 | | char BasicJsonDecoder<ALLOC>::decodeHex(char ch) |
281 | 9 | { |
282 | 9 | if (ch >= '0' && ch <= '9'8 ) |
283 | 3 | return static_cast<char>(ch - '0'); |
284 | 6 | else if (ch >= 'a' && ch <= 'f'2 ) |
285 | 1 | return static_cast<char>(ch - 'a' + 10); |
286 | 5 | else if (ch >= 'A' && ch <= 'F'4 ) |
287 | 1 | return static_cast<char>(ch - 'A' + 10); |
288 | | |
289 | 4 | return -1; |
290 | 9 | } |
291 | | |
292 | | template <typename ALLOC> |
293 | | size_t BasicJsonDecoder<ALLOC>::checkNumber(StringView input, bool& isDouble, bool& isSigned) |
294 | 8.13k | { |
295 | 8.13k | StringView::const_iterator inputIt = input.begin(); |
296 | 8.13k | bool acceptExpSign = false; |
297 | 8.13k | isDouble = false; |
298 | | |
299 | 8.13k | if (*inputIt == '-') // we know that at the beginning is at least one character |
300 | 15 | { |
301 | 15 | ++inputIt; |
302 | 15 | isSigned = true; |
303 | 15 | } |
304 | 8.12k | else |
305 | 8.12k | { |
306 | 8.12k | isSigned = false; |
307 | 8.12k | } |
308 | | |
309 | 56.5k | while (inputIt != input.end()) |
310 | 56.5k | { |
311 | 56.5k | if (acceptExpSign) |
312 | 4.01k | { |
313 | 4.01k | acceptExpSign = false; |
314 | 4.01k | if (*inputIt == '+' || *inputIt == '-'4.00k ) |
315 | 11 | { |
316 | 11 | ++inputIt; |
317 | 11 | continue; |
318 | 11 | } |
319 | 4.01k | } |
320 | 56.5k | if (*inputIt >= '0' && *inputIt <= '9'48.4k ) |
321 | 44.3k | { |
322 | 44.3k | ++inputIt; |
323 | 44.3k | continue; |
324 | 44.3k | } |
325 | 12.1k | if (!isDouble && (8.12k *inputIt == '.'8.12k || *inputIt == 'e'8.11k || *inputIt == 'E'4.11k )) |
326 | 4.02k | { |
327 | 4.02k | isDouble = true; |
328 | 4.02k | if (*inputIt == 'e' || *inputIt == 'E'16 ) |
329 | 4.01k | acceptExpSign = true; |
330 | 4.02k | ++inputIt; |
331 | 4.02k | continue; |
332 | 4.02k | } |
333 | | |
334 | 8.11k | break; // end of a number |
335 | 12.1k | } |
336 | | |
337 | 8.13k | const size_t numberLen = static_cast<size_t>(inputIt - input.begin()); |
338 | 8.13k | if (isSigned && numberLen == 115 ) |
339 | 3 | return 0; // single minus is not a number |
340 | | |
341 | 8.13k | return numberLen; |
342 | 8.13k | } |
343 | | |
344 | | template <typename ALLOC> |
345 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeNumber(StringView input) |
346 | 8.13k | { |
347 | 8.13k | bool isDouble = false; |
348 | 8.13k | bool isSigned = false; |
349 | 8.13k | const size_t numChars = checkNumber(input, isDouble, isSigned); |
350 | 8.13k | if (numChars == 0) |
351 | 7 | return DecoderResult(1, get_allocator()); |
352 | | |
353 | | // for decodeSigned and decodeUnsigned, we know that all numChars will be processed because checkNumber |
354 | | // already checked this |
355 | 8.13k | if (isDouble) |
356 | 4.02k | return decodeDouble(input, numChars); |
357 | 4.10k | else if (isSigned) |
358 | 7 | return decodeSigned(input); |
359 | 4.10k | else |
360 | 4.10k | return decodeUnsigned(input); |
361 | 8.13k | } |
362 | | |
363 | | template <typename ALLOC> |
364 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeSigned(StringView input) |
365 | 7 | { |
366 | 7 | char* pEnd = nullptr; |
367 | 7 | errno = 0; // no library function sets its value back to zero once changed |
368 | 7 | const int64_t value = std::strtoll(input.begin(), &pEnd, 10); |
369 | | |
370 | 7 | const bool overflow = (errno == ERANGE); |
371 | | |
372 | 7 | return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator()); |
373 | 7 | } |
374 | | |
375 | | template <typename ALLOC> |
376 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeUnsigned(StringView input) |
377 | 4.10k | { |
378 | 4.10k | char* pEnd = nullptr; |
379 | 4.10k | errno = 0; // no library function sets its value back to zero once changed |
380 | 4.10k | const uint64_t value = std::strtoull(input.begin(), &pEnd, 10); |
381 | | |
382 | 4.10k | const bool overflow = (errno == ERANGE); |
383 | | |
384 | 4.10k | return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator()); |
385 | 4.10k | } |
386 | | |
387 | | template <typename ALLOC> |
388 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeDouble( |
389 | | StringView input, size_t numChars) |
390 | 4.02k | { |
391 | 4.02k | char* pEnd = nullptr; |
392 | 4.02k | const double value = std::strtod(input.begin(), &pEnd); |
393 | 4.02k | if (static_cast<size_t>(pEnd - input.begin()) != numChars) |
394 | 6 | return DecoderResult(numChars, get_allocator()); |
395 | | |
396 | 4.01k | return DecoderResult(numChars, value, get_allocator()); |
397 | 4.02k | } |
398 | | |
399 | | } // namespace zserio |
400 | | |
401 | | #endif // ZSERIO_JSON_DECODER_H_INC |