GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
#ifndef ZSERIO_JSON_TOKENIZER_H_INC |
||
2 |
#define ZSERIO_JSON_TOKENIZER_H_INC |
||
3 |
|||
4 |
#include <memory> |
||
5 |
#include <istream> |
||
6 |
#include <array> |
||
7 |
|||
8 |
#include "zserio/AnyHolder.h" |
||
9 |
#include "zserio/CppRuntimeException.h" |
||
10 |
#include "zserio/JsonDecoder.h" |
||
11 |
#include "zserio/Types.h" |
||
12 |
|||
13 |
namespace zserio |
||
14 |
{ |
||
15 |
|||
16 |
/** |
||
17 |
* Tokens used by Json Tokenizer. |
||
18 |
*/ |
||
19 |
enum class JsonToken : int8_t |
||
20 |
{ |
||
21 |
UNKNOWN = -1, |
||
22 |
BEGIN_OF_FILE, |
||
23 |
END_OF_FILE, |
||
24 |
BEGIN_OBJECT, |
||
25 |
END_OBJECT, |
||
26 |
BEGIN_ARRAY, |
||
27 |
END_ARRAY, |
||
28 |
KEY_SEPARATOR, |
||
29 |
ITEM_SEPARATOR, |
||
30 |
VALUE |
||
31 |
}; |
||
32 |
|||
33 |
|||
34 |
/** |
||
35 |
* Exception used to distinguish exceptions from the JsonParser. |
||
36 |
*/ |
||
37 |
✗✗✓ | 47 |
class JsonParserException : public CppRuntimeException |
38 |
{ |
||
39 |
public: |
||
40 |
9 |
using CppRuntimeException::CppRuntimeException; |
|
41 |
}; |
||
42 |
|||
43 |
/** |
||
44 |
* Allows to append JsonToken to CppRuntimeException. |
||
45 |
* |
||
46 |
* \param exception Exception to modify. |
||
47 |
* \param token JSON Token to append. |
||
48 |
* |
||
49 |
* \return Reference to the exception to allow operator chaining. |
||
50 |
*/ |
||
51 |
CppRuntimeException& operator<<(CppRuntimeException& exception, JsonToken token); |
||
52 |
|||
53 |
/** |
||
54 |
* Json Tokenizer used by Json Parser. |
||
55 |
*/ |
||
56 |
template <typename ALLOC = std::allocator<uint8_t>> |
||
57 |
91 |
class BasicJsonTokenizer |
|
58 |
{ |
||
59 |
public: |
||
60 |
/** |
||
61 |
* Constructor. |
||
62 |
* |
||
63 |
* \param in Input stream to tokenize. |
||
64 |
* \param allocator Allocator to use. |
||
65 |
*/ |
||
66 |
91 |
BasicJsonTokenizer(std::istream& in, const ALLOC& allocator) : |
|
67 |
m_buffer(), m_in(in), m_decoder(allocator), m_decoderResult(0, allocator), |
||
68 |
✗✓✗✗ ✗✗✓✗ ✓✗ |
91 |
m_content(readContent(allocator)), m_value(allocator) |
69 |
{ |
||
70 |
✗✓✓✓ |
91 |
m_token = m_content.empty() ? JsonToken::END_OF_FILE : JsonToken::BEGIN_OF_FILE; |
71 |
91 |
} |
|
72 |
|||
73 |
/** |
||
74 |
* Move to the next token. |
||
75 |
* |
||
76 |
* \return Next token. |
||
77 |
* \throw JsonParserException In case that tokenizing fails - i.e. unknown token is reached. |
||
78 |
*/ |
||
79 |
JsonToken next(); |
||
80 |
|||
81 |
/** |
||
82 |
* Gets current token. |
||
83 |
* |
||
84 |
* \return Current token. |
||
85 |
*/ |
||
86 |
1741 |
JsonToken getToken() const { return m_token; } |
|
87 |
|||
88 |
/** |
||
89 |
* Gets current value. |
||
90 |
* |
||
91 |
* Any holder can be either unset - i.e. beginning or end of the input, |
||
92 |
* or it can hold one of the types defined in IObserver::visitValue. |
||
93 |
* |
||
94 |
* \return Current value as an AnyHolder. |
||
95 |
*/ |
||
96 |
48425 |
const AnyHolder<ALLOC>& getValue() const { return m_value; } |
|
97 |
|||
98 |
/** |
||
99 |
* Gets line number of the current token. |
||
100 |
* |
||
101 |
* \return Line number. |
||
102 |
*/ |
||
103 |
48056 |
size_t getLine() const { return m_lineNumber; } |
|
104 |
|||
105 |
/** |
||
106 |
* Gets column number of the current token. |
||
107 |
* |
||
108 |
* \return Column number. |
||
109 |
*/ |
||
110 |
48056 |
size_t getColumn() const { return m_tokenColumnNumber; } |
|
111 |
|||
112 |
private: |
||
113 |
string<ALLOC> readContent(const ALLOC& allocator); |
||
114 |
|||
115 |
bool decodeNext(); |
||
116 |
bool skipWhitespaces(); |
||
117 |
|||
118 |
template <typename T> |
||
119 |
void setToken(JsonToken token, T&& value); |
||
120 |
void setToken(JsonToken token, AnyHolder<ALLOC>&& value); |
||
121 |
void setToken(JsonToken token); |
||
122 |
void setPosition(size_t newPos, size_t newColumnNumber); |
||
123 |
void setTokenValue(); |
||
124 |
|||
125 |
static constexpr size_t BUFFER_SIZE = 64 * 1024; |
||
126 |
std::array<char, BUFFER_SIZE> m_buffer; |
||
127 |
|||
128 |
std::istream& m_in; |
||
129 |
BasicJsonDecoder<ALLOC> m_decoder; |
||
130 |
typename BasicJsonDecoder<ALLOC>::DecoderResult m_decoderResult; |
||
131 |
string<ALLOC> m_content; |
||
132 |
size_t m_lineNumber = 1; |
||
133 |
size_t m_columnNumber = 1; |
||
134 |
size_t m_tokenColumnNumber = 1; |
||
135 |
size_t m_pos = 0; |
||
136 |
JsonToken m_token; |
||
137 |
AnyHolder<ALLOC> m_value; |
||
138 |
}; |
||
139 |
|||
140 |
template <typename ALLOC> |
||
141 |
49103 |
JsonToken BasicJsonTokenizer<ALLOC>::next() |
|
142 |
{ |
||
143 |
✓✓✓✓ |
49106 |
while (!decodeNext()) |
144 |
{ |
||
145 |
✓✗✓✗ |
61 |
string<ALLOC> newContent = readContent(m_content.get_allocator()); |
146 |
✓✗✓✓ |
58 |
if (newContent.empty()) |
147 |
{ |
||
148 |
✓✗✓✓ |
55 |
if (m_token == JsonToken::END_OF_FILE) |
149 |
{ |
||
150 |
50 |
m_tokenColumnNumber = m_columnNumber; |
|
151 |
} |
||
152 |
else |
||
153 |
{ |
||
154 |
// stream is finished but last token is not EOF => value must be at the end |
||
155 |
✗✗✓✗ |
5 |
setTokenValue(); |
156 |
} |
||
157 |
|||
158 |
55 |
return m_token; |
|
159 |
} |
||
160 |
|||
161 |
✗✗✗✗ ✗✗✓✗ ✓✗✓✗ |
3 |
m_content = m_content.substr(m_pos) + newContent; |
162 |
✗✓✓✓ |
3 |
m_pos = 0; |
163 |
} |
||
164 |
|||
165 |
49043 |
return m_token; |
|
166 |
} |
||
167 |
|||
168 |
template <typename ALLOC> |
||
169 |
149 |
string<ALLOC> BasicJsonTokenizer<ALLOC>::readContent(const ALLOC& allocator) |
|
170 |
{ |
||
171 |
149 |
const size_t count = static_cast<size_t>(m_in.rdbuf()->sgetn(m_buffer.data(), BUFFER_SIZE)); |
|
172 |
✓✗✓✗ |
149 |
return string<ALLOC>(m_buffer.data(), count, allocator); |
173 |
} |
||
174 |
|||
175 |
template <typename ALLOC> |
||
176 |
49103 |
bool BasicJsonTokenizer<ALLOC>::decodeNext() |
|
177 |
{ |
||
178 |
✓✗✓✓ ✓✗✓✓ |
49103 |
if (!skipWhitespaces()) |
179 |
50 |
return false; |
|
180 |
|||
181 |
49053 |
m_tokenColumnNumber = m_columnNumber; |
|
182 |
|||
183 |
✗✓✗ | 49053 |
const char nextChar = m_content[m_pos]; |
184 |
✓✓✗✗ ✓✗✓✓ ✓✓✓✓ ✓✓ |
49053 |
switch (nextChar) |
185 |
{ |
||
186 |
case '{': |
||
187 |
✓✗✓✗ |
156 |
setToken(JsonToken::BEGIN_OBJECT, nextChar); |
188 |
156 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
189 |
156 |
break; |
|
190 |
case '}': |
||
191 |
✓✗✓✗ |
88 |
setToken(JsonToken::END_OBJECT, nextChar); |
192 |
88 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
193 |
88 |
break; |
|
194 |
case '[': |
||
195 |
✗✗✓✗ |
40 |
setToken(JsonToken::BEGIN_ARRAY, nextChar); |
196 |
40 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
197 |
40 |
break; |
|
198 |
case ']': |
||
199 |
✗✗✓✗ |
34 |
setToken(JsonToken::END_ARRAY, nextChar); |
200 |
34 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
201 |
34 |
break; |
|
202 |
case ':': |
||
203 |
✓✗✓✗ |
12221 |
setToken(JsonToken::KEY_SEPARATOR, nextChar); |
204 |
12221 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
205 |
12221 |
break; |
|
206 |
case ',': |
||
207 |
✗✗✓✗ |
12098 |
setToken(JsonToken::ITEM_SEPARATOR, nextChar); |
208 |
12098 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
209 |
12098 |
break; |
|
210 |
default: |
||
211 |
✓✗✓✗ ✓✗✓✗ ✓✗✓✗ |
24416 |
m_decoderResult = m_decoder.decodeValue(StringView(m_content.data()).substr(m_pos)); |
212 |
✗✓✓✓ |
24416 |
if (m_pos + m_decoderResult.numReadChars >= m_content.size()) |
213 |
8 |
return false; // we are at the end of chunk => read more |
|
214 |
|||
215 |
✓✗✓✓ |
24408 |
setTokenValue(); |
216 |
24406 |
break; |
|
217 |
} |
||
218 |
|||
219 |
49043 |
return true; |
|
220 |
} |
||
221 |
|||
222 |
template <typename ALLOC> |
||
223 |
124615 |
bool BasicJsonTokenizer<ALLOC>::skipWhitespaces() |
|
224 |
{ |
||
225 |
75512 |
while (true) |
|
226 |
{ |
||
227 |
✓✓✓✓ |
124615 |
if (m_pos >= m_content.size()) |
228 |
{ |
||
229 |
49 |
setToken(JsonToken::END_OF_FILE); |
|
230 |
49 |
return false; |
|
231 |
} |
||
232 |
|||
233 |
124566 |
const char nextChar = m_content[m_pos]; |
|
234 |
✓✓✗✓ ✓✓✓✓ |
124566 |
switch (nextChar) |
235 |
{ |
||
236 |
case ' ': |
||
237 |
case '\t': |
||
238 |
63114 |
setPosition(m_pos + 1, m_columnNumber + 1); |
|
239 |
63114 |
break; |
|
240 |
case '\n': |
||
241 |
12396 |
m_lineNumber++; |
|
242 |
12396 |
setPosition(m_pos + 1, 1); |
|
243 |
12396 |
break; |
|
244 |
case '\r': |
||
245 |
✗✗✓✓ |
3 |
if (m_pos + 1 >= m_content.size()) |
246 |
{ |
||
247 |
1 |
setToken(JsonToken::END_OF_FILE); |
|
248 |
1 |
return false; |
|
249 |
} |
||
250 |
2 |
m_lineNumber++; |
|
251 |
✗✗✓✓ |
2 |
setPosition(m_pos + (m_content[m_pos + 1] == '\n' ? 2 : 1), 1); |
252 |
2 |
break; |
|
253 |
default: |
||
254 |
49053 |
return true; |
|
255 |
} |
||
256 |
} |
||
257 |
} |
||
258 |
|||
259 |
template <typename ALLOC> |
||
260 |
template <typename T> |
||
261 |
24637 |
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, T&& value) |
|
262 |
{ |
||
263 |
24637 |
m_token = token; |
|
264 |
24637 |
m_value.set(std::forward<T>(value)); |
|
265 |
24637 |
} |
|
266 |
|||
267 |
template <typename ALLOC> |
||
268 |
24411 |
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, AnyHolder<ALLOC>&& value) |
|
269 |
{ |
||
270 |
24411 |
m_token = token; |
|
271 |
24411 |
m_value = std::move(value); |
|
272 |
24411 |
} |
|
273 |
|||
274 |
template <typename ALLOC> |
||
275 |
50 |
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token) |
|
276 |
{ |
||
277 |
50 |
m_token = token; |
|
278 |
50 |
m_value.reset(); |
|
279 |
50 |
} |
|
280 |
|||
281 |
template <typename ALLOC> |
||
282 |
124560 |
void BasicJsonTokenizer<ALLOC>::setPosition(size_t newPos, size_t newColumnNumber) |
|
283 |
{ |
||
284 |
124560 |
m_pos = newPos; |
|
285 |
124560 |
m_columnNumber = newColumnNumber; |
|
286 |
124560 |
} |
|
287 |
|||
288 |
template <typename ALLOC> |
||
289 |
24413 |
void BasicJsonTokenizer<ALLOC>::setTokenValue() |
|
290 |
{ |
||
291 |
✗✓✓✓ |
24413 |
if (!m_decoderResult.value.hasValue()) |
292 |
{ |
||
293 |
✗✗✗✗ ✗✗✗✗ ✗✗✗✗ ✗✗✓✓ ✓✗✓✗ ✓✗✓✗ ✓✗✓✗ |
6 |
throw JsonParserException("JsonTokenizer:") << m_lineNumber << ":" << m_tokenColumnNumber << ": " << |
294 |
(m_decoderResult.integerOverflow |
||
295 |
2 |
? "Value is outside of the 64-bit integer range!" |
|
296 |
4 |
: "Unknown token!"); |
|
297 |
} |
||
298 |
|||
299 |
24411 |
setToken(JsonToken::VALUE, std::move(m_decoderResult.value)); |
|
300 |
24411 |
setPosition(m_pos + m_decoderResult.numReadChars, m_columnNumber + m_decoderResult.numReadChars); |
|
301 |
24411 |
} |
|
302 |
|||
303 |
} // namespace zserio |
||
304 |
|||
305 |
#endif // ZSERIO_JSON_TOKENIZER_H_INC |
Generated by: GCOVR (Version 4.2) |