Coverage Report

Created: 2024-07-18 11:41

src/zserio/JsonDecoder.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef ZSERIO_JSON_DECODER_H_INC
2
#define ZSERIO_JSON_DECODER_H_INC
3
4
#include <cerrno>
5
#include <cmath>
6
#include <cstdlib>
7
#include <cstring>
8
#include <utility>
9
10
#include "zserio/AllocatorHolder.h"
11
#include "zserio/AnyHolder.h"
12
#include "zserio/CppRuntimeException.h"
13
#include "zserio/String.h"
14
#include "zserio/StringView.h"
15
16
namespace zserio
17
{
18
19
/**
20
 * JSON value decoder.
21
 */
22
template <typename ALLOC = std::allocator<uint8_t>>
23
class BasicJsonDecoder : public AllocatorHolder<ALLOC>
24
{
25
public:
26
    using AllocatorHolder<ALLOC>::get_allocator;
27
28
    /**
29
     * Decoder result value.
30
     */
31
    struct DecoderResult
32
    {
33
        /**
34
         * Constructor used for decoder failure.
35
         *
36
         * \param numRead Number of processed characters.
37
         * \param allocator Allocator to use.
38
         */
39
        DecoderResult(size_t numRead, const ALLOC& allocator) :
40
                numReadChars(numRead),
41
                value(allocator),
42
                integerOverflow(false)
43
136
        {}
44
45
        /**
46
         * Constructor for decoder success.
47
         *
48
         * \param numRead Number of processed characters.
49
         * \param decodedValue Value decoded from JSON stream.
50
         * \param allocator Allocator to use.
51
         */
52
        template <typename T>
53
        DecoderResult(size_t numRead, T&& decodedValue, const ALLOC& allocator) :
54
                numReadChars(numRead),
55
                value(std::forward<T>(decodedValue), allocator),
56
                integerOverflow(false)
57
20.3k
        {}
58
59
        /**
60
         * Constructor used for integer decoder.
61
         *
62
         * \param numRead Number of processed characters.
63
         * \param decodedValue Value decoded from JSON stream.
64
         * \param overflow True in case of integer overflow.
65
         * \param allocator Allocator to use.
66
         */
67
        template <typename T>
68
        DecoderResult(size_t numRead, T&& decodedValue, bool overflow, const ALLOC& allocator) :
69
                numReadChars(numRead),
70
                value(createValue(decodedValue, overflow, allocator)),
71
                integerOverflow(overflow)
72
4.11k
        {}
73
74
        size_t numReadChars; /**< Number of processed characters. */
75
        AnyHolder<ALLOC> value; /**< Decoded value. Empty on failure. */
76
        bool integerOverflow; /**< True if decoded value was bigger than UINT64_MAX or was not in interval
77
                                   <INT64_MIN, INT64_MAX>. */
78
79
    private:
80
        template <typename T>
81
        AnyHolder<ALLOC> createValue(T&& decodedValue, bool overflow, const ALLOC& allocator)
82
4.11k
        {
83
4.11k
            return overflow ? 
AnyHolder<ALLOC>(allocator)2
84
4.11k
                            : 
AnyHolder<ALLOC>(std::forward<T>(decodedValue), allocator)4.11k
;
85
4.11k
        }
86
    };
87
88
    /**
89
     * Empty constructor.
90
     */
91
    BasicJsonDecoder() :
92
            AllocatorHolder<ALLOC>(ALLOC())
93
12
    {}
94
95
    /**
96
     * Constructor from given allocator.
97
     *
98
     * \param allocator Allocator to use.
99
     */
100
    explicit BasicJsonDecoder(const ALLOC& allocator) :
101
            AllocatorHolder<ALLOC>(allocator)
102
93
    {}
103
104
    /**
105
     * Decodes the JSON value from the input.
106
     *
107
     * \param input Input to decode from.
108
     *
109
     * \return Decoder result.
110
     */
111
    DecoderResult decodeValue(StringView input)
112
24.5k
    {
113
24.5k
        if (input.empty())
114
1
        {
115
1
            return DecoderResult(0, get_allocator());
116
1
        }
117
118
24.5k
        switch (input[0])
119
24.5k
        {
120
9
        case 'n':
121
9
            return decodeLiteral(input, "null"_sv, nullptr);
122
7
        case 't':
123
7
            return decodeLiteral(input, "true"_sv, true);
124
4
        case 'f':
125
4
            return decodeLiteral(input, "false"_sv, false);
126
4
        case 'N':
127
4
            return decodeLiteral(input, "NaN"_sv, static_cast<double>(NAN));
128
4
        case 'I':
129
4
            return decodeLiteral(input, "Infinity"_sv, static_cast<double>(INFINITY));
130
16.3k
        case '"':
131
16.3k
            return decodeString(input);
132
24
        case '-':
133
24
            if (input.size() > 1 && 
input[1] == 'I'23
)
134
5
            {
135
5
                return decodeLiteral(input, "-Infinity"_sv, -static_cast<double>(INFINITY));
136
5
            }
137
19
            return decodeNumber(input);
138
8.13k
        default:
139
8.13k
            return decodeNumber(input);
140
24.5k
        }
141
24.5k
    }
142
143
private:
144
    template <typename T>
145
    DecoderResult decodeLiteral(StringView input, StringView literal, T&& value);
146
    DecoderResult decodeString(StringView input);
147
    static bool decodeUnicodeEscape(
148
            StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value);
149
    static char decodeHex(char character);
150
    size_t checkNumber(StringView input, bool& isDouble, bool& isSigned);
151
    DecoderResult decodeNumber(StringView input);
152
    DecoderResult decodeSigned(StringView input);
153
    DecoderResult decodeUnsigned(StringView input);
154
    DecoderResult decodeDouble(StringView input, size_t numChars);
155
};
156
157
template <typename ALLOC>
158
template <typename T>
159
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeLiteral(
160
        StringView input, StringView literal, T&& value)
161
33
{
162
33
    StringView::const_iterator literalIt = literal.begin();
163
33
    StringView::const_iterator inputIt = input.begin();
164
174
    while (inputIt != input.end() && 
literalIt != literal.end()162
)
165
148
    {
166
148
        if (*inputIt++ != *literalIt++)
167
7
        {
168
            // failure, not decoded
169
7
            return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
170
7
        }
171
148
    }
172
173
26
    if (literalIt != literal.end())
174
6
    {
175
        // short input, not decoded
176
6
        return DecoderResult(input.size(), get_allocator());
177
6
    }
178
179
    // success
180
20
    return DecoderResult(literal.size(), std::forward<T>(value), get_allocator());
181
26
}
182
183
template <typename ALLOC>
184
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeString(StringView input)
185
16.3k
{
186
16.3k
    StringView::const_iterator inputIt = input.begin() + 1; // we know that at the beginning is '"'
187
16.3k
    string<ALLOC> value(get_allocator());
188
189
83.1k
    while (inputIt != input.end())
190
83.1k
    {
191
83.1k
        if (*inputIt == '\\')
192
24
        {
193
24
            ++inputIt;
194
24
            if (inputIt == input.end())
195
1
            {
196
                // wrong escape, not decoded
197
1
                return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
198
1
            }
199
200
23
            char nextChar = *inputIt;
201
23
            switch (nextChar)
202
23
            {
203
2
            case '\\':
204
4
            case '"':
205
4
                value.push_back(nextChar);
206
4
                ++inputIt;
207
4
                break;
208
1
            case 'b':
209
1
                value.push_back('\b');
210
1
                ++inputIt;
211
1
                break;
212
1
            case 'f':
213
1
                value.push_back('\f');
214
1
                ++inputIt;
215
1
                break;
216
2
            case 'n':
217
2
                value.push_back('\n');
218
2
                ++inputIt;
219
2
                break;
220
1
            case 'r':
221
1
                value.push_back('\r');
222
1
                ++inputIt;
223
1
                break;
224
2
            case 't':
225
2
                value.push_back('\t');
226
2
                ++inputIt;
227
2
                break;
228
11
            case 'u': // unicode escape
229
11
                {
230
11
                    ++inputIt;
231
11
                    if (!decodeUnicodeEscape(input, inputIt, value))
232
10
                    {
233
                        // unsupported unicode escape, not decoded
234
10
                        return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
235
10
                    }
236
1
                    break;
237
11
                }
238
1
            default:
239
1
                ++inputIt;
240
                // unknown escape, not decoded
241
1
                return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
242
23
            }
243
23
        }
244
83.0k
        else if (*inputIt == '"')
245
16.3k
        {
246
16.3k
            ++inputIt;
247
            // successfully decoded
248
16.3k
            return DecoderResult(
249
16.3k
                    static_cast<size_t>(inputIt - input.begin()), std::move(value), get_allocator());
250
16.3k
        }
251
66.7k
        else
252
66.7k
        {
253
66.7k
            value.push_back(*inputIt++);
254
66.7k
        }
255
83.1k
    }
256
257
    // unterminated string, not decoded
258
2
    return DecoderResult(input.size(), get_allocator());
259
16.3k
}
260
261
template <typename ALLOC>
262
bool BasicJsonDecoder<ALLOC>::decodeUnicodeEscape(
263
        StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value)
264
11
{
265
    // TODO[Mi-L@]: Simplified just to decode what zserio encodes, for complex solution we could use
266
    //              std::wstring_convert but it's deprecated in C++17.
267
11
    if (inputIt == input.end() || 
*inputIt++ != '0'10
)
268
2
    {
269
2
        return false;
270
2
    }
271
9
    if (inputIt == input.end() || 
*inputIt++ != '0'8
)
272
2
    {
273
2
        return false;
274
2
    }
275
276
7
    if (inputIt == input.end())
277
1
    {
278
1
        return false;
279
1
    }
280
6
    const char char1 = decodeHex(*inputIt++);
281
6
    if (char1 == -1)
282
2
    {
283
2
        return false;
284
2
    }
285
286
4
    if (inputIt == input.end())
287
1
    {
288
1
        return false;
289
1
    }
290
3
    const char char2 = decodeHex(*inputIt++);
291
3
    if (char2 == -1)
292
2
    {
293
2
        return false;
294
2
    }
295
296
1
    value.push_back(static_cast<char>((static_cast<uint32_t>(char1) << 4U) | static_cast<uint32_t>(char2)));
297
1
    return true;
298
3
}
299
300
template <typename ALLOC>
301
char BasicJsonDecoder<ALLOC>::decodeHex(char character)
302
9
{
303
9
    if (character >= '0' && 
character <= '9'8
)
304
3
    {
305
3
        return static_cast<char>(character - '0');
306
3
    }
307
6
    else if (character >= 'a' && 
character <= 'f'2
)
308
1
    {
309
1
        return static_cast<char>(character - 'a' + 10);
310
1
    }
311
5
    else if (character >= 'A' && 
character <= 'F'4
)
312
1
    {
313
1
        return static_cast<char>(character - 'A' + 10);
314
1
    }
315
316
4
    return -1;
317
9
}
318
319
template <typename ALLOC>
320
size_t BasicJsonDecoder<ALLOC>::checkNumber(StringView input, bool& isDouble, bool& isSigned)
321
8.15k
{
322
8.15k
    StringView::const_iterator inputIt = input.begin();
323
8.15k
    bool acceptExpSign = false;
324
8.15k
    bool isScientificDouble = false;
325
8.15k
    isDouble = false;
326
327
8.15k
    if (*inputIt == '-') // we know that at the beginning is at least one character
328
19
    {
329
19
        ++inputIt;
330
19
        isSigned = true;
331
19
    }
332
8.13k
    else
333
8.13k
    {
334
8.13k
        isSigned = false;
335
8.13k
    }
336
337
56.6k
    while (inputIt != input.end())
338
56.6k
    {
339
56.6k
        if (acceptExpSign)
340
4.01k
        {
341
4.01k
            acceptExpSign = false;
342
4.01k
            if (*inputIt == '+' || 
*inputIt == '-'4.01k
)
343
16
            {
344
16
                ++inputIt;
345
16
                continue;
346
16
            }
347
4.01k
        }
348
349
56.5k
        if (*inputIt >= '0' && 
*inputIt <= '9'48.4k
)
350
44.4k
        {
351
44.4k
            ++inputIt;
352
44.4k
            continue;
353
44.4k
        }
354
355
12.1k
        if ((*inputIt == 'e' || 
*inputIt == 'E'8.15k
) &&
!isScientificDouble4.02k
)
356
4.02k
        {
357
4.02k
            isDouble = true;
358
4.02k
            isScientificDouble = true;
359
4.02k
            acceptExpSign = true;
360
4.02k
            ++inputIt;
361
4.02k
            continue;
362
4.02k
        }
363
364
8.13k
        if (*inputIt == '.' && 
!isDouble14
)
365
13
        {
366
13
            isDouble = true;
367
13
            ++inputIt;
368
13
            continue;
369
13
        }
370
371
8.12k
        break; // end of a number
372
8.13k
    }
373
374
8.15k
    const size_t numberLen = static_cast<size_t>(inputIt - input.begin());
375
8.15k
    if (isSigned && 
numberLen == 119
)
376
3
    {
377
3
        return 0; // single minus is not a number
378
3
    }
379
380
8.15k
    return numberLen;
381
8.15k
}
382
383
template <typename ALLOC>
384
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeNumber(StringView input)
385
8.15k
{
386
8.15k
    bool isDouble = false;
387
8.15k
    bool isSigned = false;
388
8.15k
    const size_t numChars = checkNumber(input, isDouble, isSigned);
389
8.15k
    if (numChars == 0)
390
7
    {
391
7
        return DecoderResult(1, get_allocator());
392
7
    }
393
394
    // for decodeSigned and decodeUnsigned, we know that all numChars will be processed because checkNumber
395
    // already checked this
396
8.14k
    if (isDouble)
397
4.03k
    {
398
4.03k
        return decodeDouble(input, numChars);
399
4.03k
    }
400
4.11k
    else if (isSigned)
401
9
    {
402
9
        return decodeSigned(input);
403
9
    }
404
4.11k
    else
405
4.11k
    {
406
4.11k
        return decodeUnsigned(input);
407
4.11k
    }
408
8.14k
}
409
410
template <typename ALLOC>
411
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeSigned(StringView input)
412
9
{
413
9
    char* pEnd = nullptr;
414
9
    errno = 0; // no library function sets its value back to zero once changed
415
9
    const int64_t value = std::strtoll(input.begin(), &pEnd, 10);
416
417
9
    const bool overflow = (errno == ERANGE);
418
419
9
    return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator());
420
9
}
421
422
template <typename ALLOC>
423
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeUnsigned(StringView input)
424
4.11k
{
425
4.11k
    char* pEnd = nullptr;
426
4.11k
    errno = 0; // no library function sets its value back to zero once changed
427
4.11k
    const uint64_t value = std::strtoull(input.begin(), &pEnd, 10);
428
429
4.11k
    const bool overflow = (errno == ERANGE);
430
431
4.11k
    return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator());
432
4.11k
}
433
434
template <typename ALLOC>
435
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeDouble(
436
        StringView input, size_t numChars)
437
4.03k
{
438
4.03k
    char* pEnd = nullptr;
439
4.03k
    const double value = std::strtod(input.begin(), &pEnd);
440
4.03k
    if (static_cast<size_t>(pEnd - input.begin()) != numChars)
441
8
    {
442
8
        return DecoderResult(numChars, get_allocator());
443
8
    }
444
445
4.02k
    return DecoderResult(numChars, value, get_allocator());
446
4.03k
}
447
448
} // namespace zserio
449
450
#endif // ZSERIO_JSON_DECODER_H_INC