Coverage Report

Created: 2023-12-13 14:58

src/zserio/JsonTokenizer.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef ZSERIO_JSON_TOKENIZER_H_INC
2
#define ZSERIO_JSON_TOKENIZER_H_INC
3
4
#include <memory>
5
#include <istream>
6
#include <array>
7
8
#include "zserio/AnyHolder.h"
9
#include "zserio/CppRuntimeException.h"
10
#include "zserio/JsonDecoder.h"
11
#include "zserio/Types.h"
12
13
namespace zserio
14
{
15
16
/**
17
 * Tokens used by Json Tokenizer.
18
 */
19
enum class JsonToken : int8_t
20
{
21
    UNKNOWN = -1,
22
    BEGIN_OF_FILE,
23
    END_OF_FILE,
24
    BEGIN_OBJECT,
25
    END_OBJECT,
26
    BEGIN_ARRAY,
27
    END_ARRAY,
28
    KEY_SEPARATOR,
29
    ITEM_SEPARATOR,
30
    VALUE
31
};
32
33
34
/**
35
 * Exception used to distinguish exceptions from the JsonParser.
36
 */
37
class JsonParserException : public CppRuntimeException
38
{
39
public:
40
    using CppRuntimeException::CppRuntimeException;
41
};
42
43
/**
44
 * Allows to append JsonToken to CppRuntimeException.
45
 *
46
 * \param exception Exception to modify.
47
 * \param token JSON Token to append.
48
 *
49
 * \return Reference to the exception to allow operator chaining.
50
 */
51
CppRuntimeException& operator<<(CppRuntimeException& exception, JsonToken token);
52
53
/**
54
 * Json Tokenizer used by Json Parser.
55
 */
56
template <typename ALLOC = std::allocator<uint8_t>>
57
class BasicJsonTokenizer
58
{
59
public:
60
    /**
61
     * Constructor.
62
     *
63
     * \param in Input stream to tokenize.
64
     * \param allocator Allocator to use.
65
     */
66
    BasicJsonTokenizer(std::istream& in, const ALLOC& allocator) :
67
            m_buffer(), m_in(in), m_decoder(allocator), m_decoderResult(0, allocator),
68
            m_content(readContent(allocator)), m_value(allocator)
69
91
    {
70
91
        m_token = m_content.empty() ? 
JsonToken::END_OF_FILE1
:
JsonToken::BEGIN_OF_FILE90
;
71
91
    }
72
73
    /**
74
     * Move to the next token.
75
     *
76
     * \return Next token.
77
     * \throw JsonParserException In case that tokenizing fails - i.e. unknown token is reached.
78
     */
79
    JsonToken next();
80
81
    /**
82
     * Gets current token.
83
     *
84
     * \return Current token.
85
     */
86
1.74k
    JsonToken getToken() const { return m_token; }
87
88
    /**
89
     * Gets current value.
90
     *
91
     * Any holder can be either unset - i.e. beginning or end of the input,
92
     * or it can hold one of the types defined in IObserver::visitValue.
93
     *
94
     * \return Current value as an AnyHolder.
95
     */
96
48.4k
    const AnyHolder<ALLOC>& getValue() const { return m_value; }
97
98
    /**
99
     * Gets line number of the current token.
100
     *
101
     * \return Line number.
102
     */
103
48.0k
    size_t getLine() const { return m_lineNumber; }
104
105
    /**
106
     * Gets column number of the current token.
107
     *
108
     * \return Column number.
109
     */
110
48.0k
    size_t getColumn() const { return m_tokenColumnNumber; }
111
112
private:
113
    string<ALLOC> readContent(const ALLOC& allocator);
114
115
    bool decodeNext();
116
    bool skipWhitespaces();
117
118
    template <typename T>
119
    void setToken(JsonToken token, T&& value);
120
    void setToken(JsonToken token, AnyHolder<ALLOC>&& value);
121
    void setToken(JsonToken token);
122
    void setPosition(size_t newPos, size_t newColumnNumber);
123
    void setTokenValue();
124
125
    static constexpr size_t BUFFER_SIZE = 64 * 1024;
126
    std::array<char, BUFFER_SIZE> m_buffer;
127
128
    std::istream& m_in;
129
    BasicJsonDecoder<ALLOC> m_decoder;
130
    typename BasicJsonDecoder<ALLOC>::DecoderResult m_decoderResult;
131
    string<ALLOC> m_content;
132
    size_t m_lineNumber = 1;
133
    size_t m_columnNumber = 1;
134
    size_t m_tokenColumnNumber = 1;
135
    size_t m_pos = 0;
136
    JsonToken m_token;
137
    AnyHolder<ALLOC> m_value;
138
};
139
140
template <typename ALLOC>
141
JsonToken BasicJsonTokenizer<ALLOC>::next()
142
49.1k
{
143
49.1k
    while (!decodeNext())
144
58
    {
145
58
        string<ALLOC> newContent = readContent(m_content.get_allocator());
146
58
        if (newContent.empty())
147
55
        {
148
55
            if (m_token == JsonToken::END_OF_FILE)
149
50
            {
150
50
                m_tokenColumnNumber = m_columnNumber;
151
50
            }
152
5
            else
153
5
            {
154
                // stream is finished but last token is not EOF => value must be at the end
155
5
                setTokenValue();
156
5
            }
157
158
55
            return m_token;
159
55
        }
160
161
3
        m_content = m_content.substr(m_pos) + newContent;
162
3
        m_pos = 0;
163
3
    }
164
165
49.0k
    return m_token;
166
49.1k
}
167
168
template <typename ALLOC>
169
string<ALLOC> BasicJsonTokenizer<ALLOC>::readContent(const ALLOC& allocator)
170
149
{
171
149
    const size_t count = static_cast<size_t>(m_in.rdbuf()->sgetn(m_buffer.data(), BUFFER_SIZE));
172
149
    return string<ALLOC>(m_buffer.data(), count, allocator);
173
149
}
174
175
template <typename ALLOC>
176
bool BasicJsonTokenizer<ALLOC>::decodeNext()
177
49.1k
{
178
49.1k
    if (!skipWhitespaces())
179
50
        return false;
180
181
49.0k
    m_tokenColumnNumber = m_columnNumber;
182
183
49.0k
    const char nextChar = m_content[m_pos];
184
49.0k
    switch (nextChar)
185
49.0k
    {
186
156
    case '{':
187
156
        setToken(JsonToken::BEGIN_OBJECT, nextChar);
188
156
        setPosition(m_pos + 1, m_columnNumber + 1);
189
156
        break;
190
88
    case '}':
191
88
        setToken(JsonToken::END_OBJECT, nextChar);
192
88
        setPosition(m_pos + 1, m_columnNumber + 1);
193
88
        break;
194
40
    case '[':
195
40
        setToken(JsonToken::BEGIN_ARRAY, nextChar);
196
40
        setPosition(m_pos + 1, m_columnNumber + 1);
197
40
        break;
198
34
    case ']':
199
34
        setToken(JsonToken::END_ARRAY, nextChar);
200
34
        setPosition(m_pos + 1, m_columnNumber + 1);
201
34
        break;
202
12.2k
    case ':':
203
12.2k
        setToken(JsonToken::KEY_SEPARATOR, nextChar);
204
12.2k
        setPosition(m_pos + 1, m_columnNumber + 1);
205
12.2k
        break;
206
12.0k
    case ',':
207
12.0k
        setToken(JsonToken::ITEM_SEPARATOR, nextChar);
208
12.0k
        setPosition(m_pos + 1, m_columnNumber + 1);
209
12.0k
        break;
210
24.4k
    default:
211
24.4k
        m_decoderResult = m_decoder.decodeValue(StringView(m_content.data()).substr(m_pos));
212
24.4k
        if (m_pos + m_decoderResult.numReadChars >= m_content.size())
213
8
            return false; // we are at the end of chunk => read more
214
215
24.4k
        setTokenValue();
216
24.4k
        break;
217
49.0k
    }
218
219
49.0k
    return true;
220
49.0k
}
221
222
template <typename ALLOC>
223
bool BasicJsonTokenizer<ALLOC>::skipWhitespaces()
224
49.1k
{
225
124k
    while (true)
226
124k
    {
227
124k
        if (m_pos >= m_content.size())
228
49
        {
229
49
            setToken(JsonToken::END_OF_FILE);
230
49
            return false;
231
49
        }
232
233
124k
        const char nextChar = m_content[m_pos];
234
124k
        switch (nextChar)
235
124k
        {
236
63.1k
        case ' ':
237
63.1k
        case '\t':
238
63.1k
            setPosition(m_pos + 1, m_columnNumber + 1);
239
63.1k
            break;
240
12.3k
        case '\n':
241
12.3k
            m_lineNumber++;
242
12.3k
            setPosition(m_pos + 1, 1);
243
12.3k
            break;
244
3
        case '\r':
245
3
            if (m_pos + 1 >= m_content.size())
246
1
            {
247
1
                setToken(JsonToken::END_OF_FILE);
248
1
                return false;
249
1
            }
250
2
            m_lineNumber++;
251
2
            setPosition(m_pos + (m_content[m_pos + 1] == '\n' ? 
21
:
11
), 1);
252
2
            break;
253
49.0k
        default:
254
49.0k
            return true;
255
124k
        }
256
124k
    }
257
49.1k
}
258
259
template <typename ALLOC>
260
template <typename T>
261
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, T&& value)
262
24.6k
{
263
24.6k
    m_token = token;
264
24.6k
    m_value.set(std::forward<T>(value));
265
24.6k
}
266
267
template <typename ALLOC>
268
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, AnyHolder<ALLOC>&& value)
269
24.4k
{
270
24.4k
    m_token = token;
271
24.4k
    m_value = std::move(value);
272
24.4k
}
273
274
template <typename ALLOC>
275
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token)
276
50
{
277
50
    m_token = token;
278
50
    m_value.reset();
279
50
}
280
281
template <typename ALLOC>
282
void BasicJsonTokenizer<ALLOC>::setPosition(size_t newPos, size_t newColumnNumber)
283
124k
{
284
124k
    m_pos = newPos;
285
124k
    m_columnNumber = newColumnNumber;
286
124k
}
287
288
template <typename ALLOC>
289
void BasicJsonTokenizer<ALLOC>::setTokenValue()
290
24.4k
{
291
24.4k
    if (!m_decoderResult.value.hasValue())
292
2
    {
293
2
        throw JsonParserException("JsonTokenizer:") << m_lineNumber << ":" << m_tokenColumnNumber << ": " <<
294
2
                (m_decoderResult.integerOverflow
295
2
                        ? 
"Value is outside of the 64-bit integer range!"1
296
2
                        : 
"Unknown token!"1
);
297
2
    }
298
299
24.4k
    setToken(JsonToken::VALUE, std::move(m_decoderResult.value));
300
24.4k
    setPosition(m_pos + m_decoderResult.numReadChars, m_columnNumber + m_decoderResult.numReadChars);
301
24.4k
}
302
303
} // namespace zserio
304
305
#endif // ZSERIO_JSON_TOKENIZER_H_INC