Bitcoin ABC 0.30.9
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1// Copyright 2014 BitPay Inc.
2// Distributed under the MIT software license, see the accompanying
3// file COPYING or https://opensource.org/licenses/mit-license.php.
4
5#include <univalue.h>
7
8#include <cstdint>
9#include <cstdio>
10#include <cstring>
11#include <string>
12#include <string_view>
13#include <vector>
14
15/*
16 * According to stackexchange, the original json test suite wanted
17 * to limit depth to 22. Widely-deployed PHP bails at depth 512,
18 * so we will follow PHP's lead, which should be more than sufficient
19 * (further stackexchange comments indicate depth > 32 rarely occurs).
20 */
21static constexpr size_t MAX_JSON_DEPTH = 512;
22
23static bool json_isdigit(int ch) {
24 return ((ch >= '0') && (ch <= '9'));
25}
26
27// convert hexadecimal string to unsigned integer
28static const char *hatoui(const char *first, const char *last,
29 unsigned int &out) {
30 unsigned int result = 0;
31 for (; first != last; ++first) {
32 int digit;
33 if (json_isdigit(*first)) {
34 digit = *first - '0';
35 } else if (*first >= 'a' && *first <= 'f') {
36 digit = *first - 'a' + 10;
37 } else if (*first >= 'A' && *first <= 'F') {
38 digit = *first - 'A' + 10;
39 } else {
40 break;
41 }
42
43 result = 16 * result + digit;
44 }
45 out = result;
46
47 return first;
48}
49
50enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed,
51 const char *raw, const char *end) {
52 tokenVal.clear();
53 consumed = 0;
54
55 const char *rawStart = raw;
56
57 // skip whitespace
58 while (raw < end && (json_isspace(*raw))) {
59 raw++;
60 }
61
62 if (raw >= end) {
63 return JTOK_NONE;
64 }
65
66 switch (*raw) {
67 case '{':
68 raw++;
69 consumed = (raw - rawStart);
70 return JTOK_OBJ_OPEN;
71 case '}':
72 raw++;
73 consumed = (raw - rawStart);
74 return JTOK_OBJ_CLOSE;
75 case '[':
76 raw++;
77 consumed = (raw - rawStart);
78 return JTOK_ARR_OPEN;
79 case ']':
80 raw++;
81 consumed = (raw - rawStart);
82 return JTOK_ARR_CLOSE;
83
84 case ':':
85 raw++;
86 consumed = (raw - rawStart);
87 return JTOK_COLON;
88 case ',':
89 raw++;
90 consumed = (raw - rawStart);
91 return JTOK_COMMA;
92
93 case 'n':
94 case 't':
95 case 'f':
96 if (!strncmp(raw, "null", 4)) {
97 raw += 4;
98 consumed = (raw - rawStart);
99 return JTOK_KW_NULL;
100 } else if (!strncmp(raw, "true", 4)) {
101 raw += 4;
102 consumed = (raw - rawStart);
103 return JTOK_KW_TRUE;
104 } else if (!strncmp(raw, "false", 5)) {
105 raw += 5;
106 consumed = (raw - rawStart);
107 return JTOK_KW_FALSE;
108 } else {
109 return JTOK_ERR;
110 }
111
112 case '-':
113 case '0':
114 case '1':
115 case '2':
116 case '3':
117 case '4':
118 case '5':
119 case '6':
120 case '7':
121 case '8':
122 case '9': {
123 // part 1: int
124 std::string numStr;
125
126 const char *first = raw;
127
128 const char *firstDigit = first;
129 if (!json_isdigit(*firstDigit)) {
130 firstDigit++;
131 }
132 if ((*firstDigit == '0') && json_isdigit(firstDigit[1])) {
133 return JTOK_ERR;
134 }
135
136 // copy first char
137 numStr += *raw;
138 raw++;
139
140 if ((*first == '-') && (raw < end) && (!json_isdigit(*raw))) {
141 return JTOK_ERR;
142 }
143
144 // copy digits
145 while (raw < end && json_isdigit(*raw)) {
146 numStr += *raw;
147 raw++;
148 }
149
150 // part 2: frac
151 if (raw < end && *raw == '.') {
152 // copy .
153 numStr += *raw;
154 raw++;
155
156 if (raw >= end || !json_isdigit(*raw)) {
157 return JTOK_ERR;
158 }
159 // copy digits
160 while (raw < end && json_isdigit(*raw)) {
161 numStr += *raw;
162 raw++;
163 }
164 }
165
166 // part 3: exp
167 if (raw < end && (*raw == 'e' || *raw == 'E')) {
168 numStr += *raw; // copy E
169 raw++;
170
171 if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
172 numStr += *raw;
173 raw++;
174 }
175
176 if (raw >= end || !json_isdigit(*raw)) {
177 return JTOK_ERR;
178 }
179 // copy digits
180 while (raw < end && json_isdigit(*raw)) {
181 numStr += *raw;
182 raw++;
183 }
184 }
185
186 tokenVal = numStr;
187 consumed = (raw - rawStart);
188 return JTOK_NUMBER;
189 }
190
191 case '"': {
192 // skip "
193 raw++;
194
195 std::string valStr;
196 JSONUTF8StringFilter writer(valStr);
197
198 while (true) {
199 if (raw >= end || (uint8_t)*raw < 0x20) {
200 return JTOK_ERR;
201 } else if (*raw == '\\') {
202 // skip backslash
203 raw++;
204 if (raw >= end) {
205 return JTOK_ERR;
206 }
207
208 switch (*raw) {
209 case '"':
210 writer.push_back('\"');
211 break;
212 case '\\':
213 writer.push_back('\\');
214 break;
215 case '/':
216 writer.push_back('/');
217 break;
218 case 'b':
219 writer.push_back('\b');
220 break;
221 case 'f':
222 writer.push_back('\f');
223 break;
224 case 'n':
225 writer.push_back('\n');
226 break;
227 case 'r':
228 writer.push_back('\r');
229 break;
230 case 't':
231 writer.push_back('\t');
232 break;
233
234 case 'u': {
235 unsigned int codepoint;
236 if (raw + 1 + 4 >= end ||
237 hatoui(raw + 1, raw + 1 + 4, codepoint) !=
238 raw + 1 + 4) {
239 return JTOK_ERR;
240 }
241 writer.push_back_u(codepoint);
242 raw += 4;
243 break;
244 }
245 default:
246 return JTOK_ERR;
247 }
248
249 // skip esc'd char
250 raw++;
251 } else if (*raw == '"') {
252 // skip "
253 raw++;
254 // stop scanning
255 break;
256 } else {
257 writer.push_back(static_cast<uint8_t>(*raw));
258 raw++;
259 }
260 }
261
262 if (!writer.finalize()) {
263 return JTOK_ERR;
264 }
265 tokenVal = valStr;
266 consumed = (raw - rawStart);
267 return JTOK_STRING;
268 }
269
270 default:
271 return JTOK_ERR;
272 }
273}
274
275enum expect_bits : unsigned {
276 EXP_OBJ_NAME = (1U << 0),
277 EXP_COLON = (1U << 1),
278 EXP_ARR_VALUE = (1U << 2),
279 EXP_VALUE = (1U << 3),
280 EXP_NOT_VALUE = (1U << 4),
281};
282
283#define expect(bit) (expectMask & (EXP_##bit))
284#define setExpect(bit) (expectMask |= EXP_##bit)
285#define clearExpect(bit) (expectMask &= ~EXP_##bit)
286
287bool UniValue::read(std::string_view str_in) {
288 clear();
289
290 uint32_t expectMask = 0;
291 std::vector<UniValue *> stack;
292
293 std::string tokenVal;
294 unsigned int consumed;
295 enum jtokentype tok = JTOK_NONE;
296 enum jtokentype last_tok = JTOK_NONE;
297 const char *raw{str_in.data()};
298 const char *end{raw + str_in.size()};
299 do {
300 last_tok = tok;
301
302 tok = getJsonToken(tokenVal, consumed, raw, end);
303 if (tok == JTOK_NONE || tok == JTOK_ERR) {
304 goto return_fail;
305 }
306 raw += consumed;
307
308 bool isValueOpen = jsonTokenIsValue(tok) || tok == JTOK_OBJ_OPEN ||
309 tok == JTOK_ARR_OPEN;
310
311 if (expect(VALUE)) {
312 if (!isValueOpen) {
313 goto return_fail;
314 }
315 clearExpect(VALUE);
316 } else if (expect(ARR_VALUE)) {
317 bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
318 if (!isArrValue) {
319 goto return_fail;
320 }
321
322 clearExpect(ARR_VALUE);
323 } else if (expect(OBJ_NAME)) {
324 bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
325 if (!isObjName) {
326 goto return_fail;
327 }
328 } else if (expect(COLON)) {
329 if (tok != JTOK_COLON) {
330 goto return_fail;
331 }
332 clearExpect(COLON);
333 } else if (!expect(COLON) && (tok == JTOK_COLON)) {
334 goto return_fail;
335 }
336
337 if (expect(NOT_VALUE)) {
338 if (isValueOpen) {
339 goto return_fail;
340 }
341 clearExpect(NOT_VALUE);
342 }
343
344 switch (tok) {
345 case JTOK_OBJ_OPEN:
346 case JTOK_ARR_OPEN: {
347 VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
348 if (!stack.size()) {
349 if (utyp == VOBJ) {
350 setObject();
351 } else {
352 setArray();
353 }
354 stack.push_back(this);
355 } else {
356 UniValue tmpVal(utyp);
357 UniValue *top = stack.back();
358 top->values.push_back(tmpVal);
359
360 UniValue *newTop = &(top->values.back());
361 stack.push_back(newTop);
362 }
363
364 if (stack.size() > MAX_JSON_DEPTH) {
365 goto return_fail;
366 }
367
368 if (utyp == VOBJ) {
369 setExpect(OBJ_NAME);
370 } else {
371 setExpect(ARR_VALUE);
372 }
373 break;
374 }
375
376 case JTOK_OBJ_CLOSE:
377 case JTOK_ARR_CLOSE: {
378 if (!stack.size() || (last_tok == JTOK_COMMA)) {
379 goto return_fail;
380 }
381
382 VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
383 UniValue *top = stack.back();
384 if (utyp != top->getType()) {
385 goto return_fail;
386 }
387
388 stack.pop_back();
389 clearExpect(OBJ_NAME);
390 setExpect(NOT_VALUE);
391 break;
392 }
393
394 case JTOK_COLON: {
395 if (!stack.size()) {
396 goto return_fail;
397 }
398
399 UniValue *top = stack.back();
400 if (top->getType() != VOBJ) {
401 goto return_fail;
402 }
403
404 setExpect(VALUE);
405 break;
406 }
407
408 case JTOK_COMMA: {
409 if (!stack.size() || (last_tok == JTOK_COMMA) ||
410 (last_tok == JTOK_ARR_OPEN)) {
411 goto return_fail;
412 }
413
414 UniValue *top = stack.back();
415 if (top->getType() == VOBJ) {
416 setExpect(OBJ_NAME);
417 } else {
418 setExpect(ARR_VALUE);
419 }
420 break;
421 }
422
423 case JTOK_KW_NULL:
424 case JTOK_KW_TRUE:
425 case JTOK_KW_FALSE: {
426 UniValue tmpVal;
427 switch (tok) {
428 case JTOK_KW_NULL:
429 // do nothing more
430 break;
431 case JTOK_KW_TRUE:
432 tmpVal.setBool(true);
433 break;
434 case JTOK_KW_FALSE:
435 tmpVal.setBool(false);
436 break;
437 default:
438 /* impossible */
439 break;
440 }
441
442 if (!stack.size()) {
443 *this = tmpVal;
444 break;
445 }
446
447 UniValue *top = stack.back();
448 top->values.push_back(tmpVal);
449
450 setExpect(NOT_VALUE);
451 break;
452 }
453
454 case JTOK_NUMBER: {
455 UniValue tmpVal(VNUM, tokenVal);
456 if (!stack.size()) {
457 *this = tmpVal;
458 break;
459 }
460
461 UniValue *top = stack.back();
462 top->values.push_back(tmpVal);
463
464 setExpect(NOT_VALUE);
465 break;
466 }
467
468 case JTOK_STRING: {
469 if (expect(OBJ_NAME)) {
470 UniValue *top = stack.back();
471 top->keys.push_back(tokenVal);
472 clearExpect(OBJ_NAME);
473 setExpect(COLON);
474 } else {
475 UniValue tmpVal(VSTR, tokenVal);
476 if (!stack.size()) {
477 *this = tmpVal;
478 break;
479 }
480 UniValue *top = stack.back();
481 top->values.push_back(tmpVal);
482 }
483
484 setExpect(NOT_VALUE);
485 break;
486 }
487
488 default:
489 goto return_fail;
490 }
491 } while (!stack.empty());
492
493 /* Check that nothing follows the initial construct (parsed above). */
494 tok = getJsonToken(tokenVal, consumed, raw, end);
495 if (tok != JTOK_NONE) {
496 goto return_fail;
497 }
498
499 return true;
500
501return_fail:
502 clear();
503 return false;
504}
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
void push_back_u(unsigned int codepoint_)
void push_back(uint8_t ch)
void push_back(UniValue val)
Definition: univalue.cpp:96
enum VType getType() const
Definition: univalue.h:88
@ VOBJ
Definition: univalue.h:31
@ VSTR
Definition: univalue.h:33
@ VARR
Definition: univalue.h:32
@ VNUM
Definition: univalue.h:34
void setArray()
Definition: univalue.cpp:86
void clear()
Definition: univalue.cpp:18
void setBool(bool val)
Definition: univalue.cpp:29
std::vector< UniValue > values
Definition: univalue.h:130
std::vector< std::string > keys
Definition: univalue.h:129
bool read(std::string_view raw)
void setObject()
Definition: univalue.cpp:91
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:191
static bool json_isspace(int ch)
Definition: univalue.h:207
jtokentype
Definition: univalue.h:170
@ JTOK_OBJ_CLOSE
Definition: univalue.h:174
@ JTOK_STRING
Definition: univalue.h:183
@ JTOK_COLON
Definition: univalue.h:177
@ JTOK_OBJ_OPEN
Definition: univalue.h:173
@ JTOK_NUMBER
Definition: univalue.h:182
@ JTOK_KW_NULL
Definition: univalue.h:179
@ JTOK_COMMA
Definition: univalue.h:178
@ JTOK_ARR_CLOSE
Definition: univalue.h:176
@ JTOK_KW_TRUE
Definition: univalue.h:180
@ JTOK_ARR_OPEN
Definition: univalue.h:175
@ JTOK_KW_FALSE
Definition: univalue.h:181
@ JTOK_ERR
Definition: univalue.h:171
@ JTOK_NONE
Definition: univalue.h:172
static bool json_isdigit(int ch)
#define clearExpect(bit)
static constexpr size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
static const char * hatoui(const char *first, const char *last, unsigned int &out)
#define setExpect(bit)