src/zserio/JsonTokenizer.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef ZSERIO_JSON_TOKENIZER_H_INC |
2 | | #define ZSERIO_JSON_TOKENIZER_H_INC |
3 | | |
4 | | #include <memory> |
5 | | #include <istream> |
6 | | #include <array> |
7 | | |
8 | | #include "zserio/AnyHolder.h" |
9 | | #include "zserio/CppRuntimeException.h" |
10 | | #include "zserio/JsonDecoder.h" |
11 | | #include "zserio/Types.h" |
12 | | |
13 | | namespace zserio |
14 | | { |
15 | | |
16 | | /** |
17 | | * Tokens used by Json Tokenizer. |
18 | | */ |
19 | | enum class JsonToken : int8_t |
20 | | { |
21 | | UNKNOWN = -1, |
22 | | BEGIN_OF_FILE, |
23 | | END_OF_FILE, |
24 | | BEGIN_OBJECT, |
25 | | END_OBJECT, |
26 | | BEGIN_ARRAY, |
27 | | END_ARRAY, |
28 | | KEY_SEPARATOR, |
29 | | ITEM_SEPARATOR, |
30 | | VALUE |
31 | | }; |
32 | | |
33 | | |
34 | | /** |
35 | | * Exception used to distinguish exceptions from the JsonParser. |
36 | | */ |
37 | | class JsonParserException : public CppRuntimeException |
38 | | { |
39 | | public: |
40 | | using CppRuntimeException::CppRuntimeException; |
41 | | }; |
42 | | |
43 | | /** |
44 | | * Allows to append JsonToken to CppRuntimeException. |
45 | | * |
46 | | * \param exception Exception to modify. |
47 | | * \param token JSON Token to append. |
48 | | * |
49 | | * \return Reference to the exception to allow operator chaining. |
50 | | */ |
51 | | CppRuntimeException& operator<<(CppRuntimeException& exception, JsonToken token); |
52 | | |
53 | | /** |
54 | | * Json Tokenizer used by Json Parser. |
55 | | */ |
56 | | template <typename ALLOC = std::allocator<uint8_t>> |
57 | | class BasicJsonTokenizer |
58 | | { |
59 | | public: |
60 | | /** |
61 | | * Constructor. |
62 | | * |
63 | | * \param in Input stream to tokenize. |
64 | | * \param allocator Allocator to use. |
65 | | */ |
66 | | BasicJsonTokenizer(std::istream& in, const ALLOC& allocator) : |
67 | | m_buffer(), m_in(in), m_decoder(allocator), m_decoderResult(0, allocator), |
68 | | m_content(readContent(allocator)), m_value(allocator) |
69 | 91 | { |
70 | 91 | m_token = m_content.empty() ? JsonToken::END_OF_FILE1 : JsonToken::BEGIN_OF_FILE90 ; |
71 | 91 | } |
72 | | |
73 | | /** |
74 | | * Move to the next token. |
75 | | * |
76 | | * \return Next token. |
77 | | * \throw JsonParserException In case that tokenizing fails - i.e. unknown token is reached. |
78 | | */ |
79 | | JsonToken next(); |
80 | | |
81 | | /** |
82 | | * Gets current token. |
83 | | * |
84 | | * \return Current token. |
85 | | */ |
86 | 1.74k | JsonToken getToken() const { return m_token; } |
87 | | |
88 | | /** |
89 | | * Gets current value. |
90 | | * |
91 | | * Any holder can be either unset - i.e. beginning or end of the input, |
92 | | * or it can hold one of the types defined in IObserver::visitValue. |
93 | | * |
94 | | * \return Current value as an AnyHolder. |
95 | | */ |
96 | 48.4k | const AnyHolder<ALLOC>& getValue() const { return m_value; } |
97 | | |
98 | | /** |
99 | | * Gets line number of the current token. |
100 | | * |
101 | | * \return Line number. |
102 | | */ |
103 | 48.0k | size_t getLine() const { return m_lineNumber; } |
104 | | |
105 | | /** |
106 | | * Gets column number of the current token. |
107 | | * |
108 | | * \return Column number. |
109 | | */ |
110 | 48.0k | size_t getColumn() const { return m_tokenColumnNumber; } |
111 | | |
112 | | private: |
113 | | string<ALLOC> readContent(const ALLOC& allocator); |
114 | | |
115 | | bool decodeNext(); |
116 | | bool skipWhitespaces(); |
117 | | |
118 | | template <typename T> |
119 | | void setToken(JsonToken token, T&& value); |
120 | | void setToken(JsonToken token, AnyHolder<ALLOC>&& value); |
121 | | void setToken(JsonToken token); |
122 | | void setPosition(size_t newPos, size_t newColumnNumber); |
123 | | void setTokenValue(); |
124 | | |
125 | | static constexpr size_t BUFFER_SIZE = 64 * 1024; |
126 | | std::array<char, BUFFER_SIZE> m_buffer; |
127 | | |
128 | | std::istream& m_in; |
129 | | BasicJsonDecoder<ALLOC> m_decoder; |
130 | | typename BasicJsonDecoder<ALLOC>::DecoderResult m_decoderResult; |
131 | | string<ALLOC> m_content; |
132 | | size_t m_lineNumber = 1; |
133 | | size_t m_columnNumber = 1; |
134 | | size_t m_tokenColumnNumber = 1; |
135 | | size_t m_pos = 0; |
136 | | JsonToken m_token; |
137 | | AnyHolder<ALLOC> m_value; |
138 | | }; |
139 | | |
140 | | template <typename ALLOC> |
141 | | JsonToken BasicJsonTokenizer<ALLOC>::next() |
142 | 49.1k | { |
143 | 49.1k | while (!decodeNext()) |
144 | 58 | { |
145 | 58 | string<ALLOC> newContent = readContent(m_content.get_allocator()); |
146 | 58 | if (newContent.empty()) |
147 | 55 | { |
148 | 55 | if (m_token == JsonToken::END_OF_FILE) |
149 | 50 | { |
150 | 50 | m_tokenColumnNumber = m_columnNumber; |
151 | 50 | } |
152 | 5 | else |
153 | 5 | { |
154 | | // stream is finished but last token is not EOF => value must be at the end |
155 | 5 | setTokenValue(); |
156 | 5 | } |
157 | | |
158 | 55 | return m_token; |
159 | 55 | } |
160 | | |
161 | 3 | m_content = m_content.substr(m_pos) + newContent; |
162 | 3 | m_pos = 0; |
163 | 3 | } |
164 | | |
165 | 49.0k | return m_token; |
166 | 49.1k | } |
167 | | |
168 | | template <typename ALLOC> |
169 | | string<ALLOC> BasicJsonTokenizer<ALLOC>::readContent(const ALLOC& allocator) |
170 | 149 | { |
171 | 149 | const size_t count = static_cast<size_t>(m_in.rdbuf()->sgetn(m_buffer.data(), BUFFER_SIZE)); |
172 | 149 | return string<ALLOC>(m_buffer.data(), count, allocator); |
173 | 149 | } |
174 | | |
175 | | template <typename ALLOC> |
176 | | bool BasicJsonTokenizer<ALLOC>::decodeNext() |
177 | 49.1k | { |
178 | 49.1k | if (!skipWhitespaces()) |
179 | 50 | return false; |
180 | | |
181 | 49.0k | m_tokenColumnNumber = m_columnNumber; |
182 | | |
183 | 49.0k | const char nextChar = m_content[m_pos]; |
184 | 49.0k | switch (nextChar) |
185 | 49.0k | { |
186 | 156 | case '{': |
187 | 156 | setToken(JsonToken::BEGIN_OBJECT, nextChar); |
188 | 156 | setPosition(m_pos + 1, m_columnNumber + 1); |
189 | 156 | break; |
190 | 88 | case '}': |
191 | 88 | setToken(JsonToken::END_OBJECT, nextChar); |
192 | 88 | setPosition(m_pos + 1, m_columnNumber + 1); |
193 | 88 | break; |
194 | 40 | case '[': |
195 | 40 | setToken(JsonToken::BEGIN_ARRAY, nextChar); |
196 | 40 | setPosition(m_pos + 1, m_columnNumber + 1); |
197 | 40 | break; |
198 | 34 | case ']': |
199 | 34 | setToken(JsonToken::END_ARRAY, nextChar); |
200 | 34 | setPosition(m_pos + 1, m_columnNumber + 1); |
201 | 34 | break; |
202 | 12.2k | case ':': |
203 | 12.2k | setToken(JsonToken::KEY_SEPARATOR, nextChar); |
204 | 12.2k | setPosition(m_pos + 1, m_columnNumber + 1); |
205 | 12.2k | break; |
206 | 12.0k | case ',': |
207 | 12.0k | setToken(JsonToken::ITEM_SEPARATOR, nextChar); |
208 | 12.0k | setPosition(m_pos + 1, m_columnNumber + 1); |
209 | 12.0k | break; |
210 | 24.4k | default: |
211 | 24.4k | m_decoderResult = m_decoder.decodeValue(StringView(m_content.data()).substr(m_pos)); |
212 | 24.4k | if (m_pos + m_decoderResult.numReadChars >= m_content.size()) |
213 | 8 | return false; // we are at the end of chunk => read more |
214 | | |
215 | 24.4k | setTokenValue(); |
216 | 24.4k | break; |
217 | 49.0k | } |
218 | | |
219 | 49.0k | return true; |
220 | 49.0k | } |
221 | | |
222 | | template <typename ALLOC> |
223 | | bool BasicJsonTokenizer<ALLOC>::skipWhitespaces() |
224 | 49.1k | { |
225 | 124k | while (true) |
226 | 124k | { |
227 | 124k | if (m_pos >= m_content.size()) |
228 | 49 | { |
229 | 49 | setToken(JsonToken::END_OF_FILE); |
230 | 49 | return false; |
231 | 49 | } |
232 | | |
233 | 124k | const char nextChar = m_content[m_pos]; |
234 | 124k | switch (nextChar) |
235 | 124k | { |
236 | 63.1k | case ' ': |
237 | 63.1k | case '\t': |
238 | 63.1k | setPosition(m_pos + 1, m_columnNumber + 1); |
239 | 63.1k | break; |
240 | 12.3k | case '\n': |
241 | 12.3k | m_lineNumber++; |
242 | 12.3k | setPosition(m_pos + 1, 1); |
243 | 12.3k | break; |
244 | 3 | case '\r': |
245 | 3 | if (m_pos + 1 >= m_content.size()) |
246 | 1 | { |
247 | 1 | setToken(JsonToken::END_OF_FILE); |
248 | 1 | return false; |
249 | 1 | } |
250 | 2 | m_lineNumber++; |
251 | 2 | setPosition(m_pos + (m_content[m_pos + 1] == '\n' ? 21 : 11 ), 1); |
252 | 2 | break; |
253 | 49.0k | default: |
254 | 49.0k | return true; |
255 | 124k | } |
256 | 124k | } |
257 | 49.1k | } |
258 | | |
259 | | template <typename ALLOC> |
260 | | template <typename T> |
261 | | void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, T&& value) |
262 | 24.6k | { |
263 | 24.6k | m_token = token; |
264 | 24.6k | m_value.set(std::forward<T>(value)); |
265 | 24.6k | } |
266 | | |
267 | | template <typename ALLOC> |
268 | | void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, AnyHolder<ALLOC>&& value) |
269 | 24.4k | { |
270 | 24.4k | m_token = token; |
271 | 24.4k | m_value = std::move(value); |
272 | 24.4k | } |
273 | | |
274 | | template <typename ALLOC> |
275 | | void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token) |
276 | 50 | { |
277 | 50 | m_token = token; |
278 | 50 | m_value.reset(); |
279 | 50 | } |
280 | | |
281 | | template <typename ALLOC> |
282 | | void BasicJsonTokenizer<ALLOC>::setPosition(size_t newPos, size_t newColumnNumber) |
283 | 124k | { |
284 | 124k | m_pos = newPos; |
285 | 124k | m_columnNumber = newColumnNumber; |
286 | 124k | } |
287 | | |
288 | | template <typename ALLOC> |
289 | | void BasicJsonTokenizer<ALLOC>::setTokenValue() |
290 | 24.4k | { |
291 | 24.4k | if (!m_decoderResult.value.hasValue()) |
292 | 2 | { |
293 | 2 | throw JsonParserException("JsonTokenizer:") << m_lineNumber << ":" << m_tokenColumnNumber << ": " << |
294 | 2 | (m_decoderResult.integerOverflow |
295 | 2 | ? "Value is outside of the 64-bit integer range!"1 |
296 | 2 | : "Unknown token!"1 ); |
297 | 2 | } |
298 | | |
299 | 24.4k | setToken(JsonToken::VALUE, std::move(m_decoderResult.value)); |
300 | 24.4k | setPosition(m_pos + m_decoderResult.numReadChars, m_columnNumber + m_decoderResult.numReadChars); |
301 | 24.4k | } |
302 | | |
303 | | } // namespace zserio |
304 | | |
305 | | #endif // ZSERIO_JSON_TOKENIZER_H_INC |