Bitcoin ABC 0.33.0
P2P Digital Currency
strencodings.cpp
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-2016 The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6#include <util/strencodings.h>
7#include <util/string.h>
8
9#include <tinyformat.h>
10
11#include <crypto/hex_base.h>
12#include <span.h>
13
14#include <array>
15#include <cstdlib>
16#include <cstring>
17#include <optional>
18
19static const std::string CHARS_ALPHA_NUM =
20 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21
22static const std::string SAFE_CHARS[] = {
23 // SAFE_CHARS_DEFAULT
24 CHARS_ALPHA_NUM + " .,;-_/:?@()",
25 // SAFE_CHARS_UA_COMMENT
26 CHARS_ALPHA_NUM + " .,;-_?@",
27 // SAFE_CHARS_FILENAME
28 CHARS_ALPHA_NUM + ".-_",
29 // SAFE_CHARS_URI
30 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%",
31};
32
33std::string SanitizeString(std::string_view str, int rule) {
34 std::string result;
35 for (char c : str) {
36 if (SAFE_CHARS[rule].find(c) != std::string::npos) {
37 result.push_back(c);
38 }
39 }
40 return result;
41}
42
43bool IsHex(std::string_view str) {
44 for (char c : str) {
45 if (HexDigit(c) < 0) {
46 return false;
47 }
48 }
49 return (str.size() > 0) && (str.size() % 2 == 0);
50}
51
52bool IsHexNumber(std::string_view str) {
53 if (str.substr(0, 2) == "0x") {
54 str.remove_prefix(2);
55 }
56 for (char c : str) {
57 if (HexDigit(c) < 0) {
58 return false;
59 }
60 }
61 // Return false for empty string or "0x".
62 return str.size() > 0;
63}
64
65template <typename Byte>
66std::optional<std::vector<Byte>> TryParseHex(std::string_view str) {
67 std::vector<Byte> vch;
68 auto it = str.begin();
69 while (it != str.end()) {
70 if (IsSpace(*it)) {
71 ++it;
72 continue;
73 }
74 auto c1 = HexDigit(*(it++));
75 if (it == str.end()) {
76 return std::nullopt;
77 }
78 auto c2 = HexDigit(*(it++));
79 if (c1 < 0 || c2 < 0) {
80 return std::nullopt;
81 }
82 vch.push_back(Byte(c1 << 4) | Byte(c2));
83 }
84 return vch;
85}
86template std::vector<std::byte> ParseHex(std::string_view);
87template std::vector<uint8_t> ParseHex(std::string_view);
88
89bool SplitHostPort(std::string_view in, uint16_t &portOut,
90 std::string &hostOut) {
91 bool valid = false;
92 size_t colon = in.find_last_of(':');
93 // if a : is found, and it either follows a [...], or no other : is in the
94 // string, treat it as port separator
95 bool fHaveColon = colon != in.npos;
96 // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is
97 // safe
98 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']');
99 bool fMultiColon =
100 fHaveColon && (in.find_last_of(':', colon - 1) != in.npos);
101 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
102 uint16_t n;
103 if (ParseUInt16(in.substr(colon + 1), &n)) {
104 in = in.substr(0, colon);
105 portOut = n;
106 valid = (portOut != 0);
107 }
108 } else {
109 valid = true;
110 }
111 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
112 hostOut = in.substr(1, in.size() - 2);
113 } else {
114 hostOut = in;
115 }
116
117 return valid;
118}
119
121 static const char *pbase64 =
122 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
123
124 std::string str;
125 str.reserve(((input.size() + 2) / 3) * 4);
126 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(),
127 input.end());
128 while (str.size() % 4) {
129 str += '=';
130 }
131 return str;
132}
133
134std::optional<std::vector<uint8_t>> DecodeBase64(std::string_view str) {
135 static const int8_t decode64_table[256] = {
136 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
137 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
138 -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57,
139 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6,
140 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
141 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
142 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1,
143 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
144 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
145 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
146 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
147 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
148 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
149 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
150 -1, -1, -1, -1};
151
152 if (str.size() % 4 != 0) {
153 return {};
154 }
155 /* One or two = characters at the end are permitted. */
156 if (str.size() >= 1 && str.back() == '=') {
157 str.remove_suffix(1);
158 }
159 if (str.size() >= 1 && str.back() == '=') {
160 str.remove_suffix(1);
161 }
162
163 std::vector<uint8_t> ret;
164 ret.reserve((str.size() * 3) / 4);
165 bool valid = ConvertBits<6, 8, false>(
166 [&](uint8_t c) { ret.push_back(c); }, str.begin(), str.end(),
167 [](char c) { return decode64_table[uint8_t(c)]; });
168 if (!valid) {
169 return {};
170 }
171
172 return ret;
173}
174
175std::string EncodeBase32(Span<const uint8_t> input, bool pad) {
176 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
177
178 std::string str;
179 str.reserve(((input.size() + 4) / 5) * 8);
180 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(),
181 input.end());
182 if (pad) {
183 while (str.size() % 8) {
184 str += '=';
185 }
186 }
187 return str;
188}
189
190std::string EncodeBase32(std::string_view str, bool pad) {
191 return EncodeBase32(MakeUCharSpan(str), pad);
192}
193
194std::optional<std::vector<uint8_t>> DecodeBase32(std::string_view str) {
195 static const int8_t decode32_table[256] = {
196 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
197 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29,
199 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6,
200 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
201 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
202 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1,
203 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
204 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
205 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
208 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
209 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
210 -1, -1, -1, -1};
211
212 if (str.size() % 8 != 0) {
213 return {};
214 }
215 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
216 if (str.size() >= 1 && str.back() == '=') {
217 str.remove_suffix(1);
218 }
219 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") {
220 str.remove_suffix(2);
221 }
222 if (str.size() >= 1 && str.back() == '=') {
223 str.remove_suffix(1);
224 }
225 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") {
226 str.remove_suffix(2);
227 }
228
229 std::vector<uint8_t> ret;
230 ret.reserve((str.size() * 5) / 8);
231 bool valid = ConvertBits<5, 8, false>(
232 [&](uint8_t c) { ret.push_back(c); }, str.begin(), str.end(),
233 [](char c) { return decode32_table[uint8_t(c)]; });
234
235 if (!valid) {
236 return {};
237 }
238
239 return ret;
240}
241
242namespace {
243template <typename T> bool ParseIntegral(std::string_view str, T *out) {
244 static_assert(std::is_integral<T>::value);
245 // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
246 // handling leading +/- for backwards compatibility.
247 if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
248 return false;
249 }
250 const std::optional<T> opt_int =
251 ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
252 if (!opt_int) {
253 return false;
254 }
255 if (out != nullptr) {
256 *out = *opt_int;
257 }
258 return true;
259}
260}; // namespace
261
262bool ParseInt32(std::string_view str, int32_t *out) {
263 return ParseIntegral<int32_t>(str, out);
264}
265
266bool ParseInt64(std::string_view str, int64_t *out) {
267 return ParseIntegral<int64_t>(str, out);
268}
269
270bool ParseUInt8(std::string_view str, uint8_t *out) {
271 return ParseIntegral<uint8_t>(str, out);
272}
273
274bool ParseUInt16(std::string_view str, uint16_t *out) {
275 return ParseIntegral<uint16_t>(str, out);
276}
277
278bool ParseUInt32(std::string_view str, uint32_t *out) {
279 return ParseIntegral<uint32_t>(str, out);
280}
281
282bool ParseUInt64(std::string_view str, uint64_t *out) {
283 return ParseIntegral<uint64_t>(str, out);
284}
285
286std::string FormatParagraph(std::string_view in, size_t width, size_t indent) {
287 std::stringstream out;
288 size_t ptr = 0;
289 size_t indented = 0;
290 while (ptr < in.size()) {
291 size_t lineend = in.find_first_of('\n', ptr);
292 if (lineend == std::string::npos) {
293 lineend = in.size();
294 }
295 const size_t linelen = lineend - ptr;
296 const size_t rem_width = width - indented;
297 if (linelen <= rem_width) {
298 out << in.substr(ptr, linelen + 1);
299 ptr = lineend + 1;
300 indented = 0;
301 } else {
302 size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
303 if (finalspace == std::string::npos || finalspace < ptr) {
304 // No place to break; just include the entire word and move on
305 finalspace = in.find_first_of("\n ", ptr);
306 if (finalspace == std::string::npos) {
307 // End of the string, just add it and break
308 out << in.substr(ptr);
309 break;
310 }
311 }
312 out << in.substr(ptr, finalspace - ptr) << "\n";
313 if (in[finalspace] == '\n') {
314 indented = 0;
315 } else if (indent) {
316 out << std::string(indent, ' ');
317 indented = indent;
318 }
319 ptr = finalspace + 1;
320 }
321 }
322 return out.str();
323}
324
334static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
335
337static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa,
338 int &mantissa_tzeros) {
339 if (ch == '0') {
340 ++mantissa_tzeros;
341 } else {
342 for (int i = 0; i <= mantissa_tzeros; ++i) {
343 // overflow
344 if (mantissa > (UPPER_BOUND / 10LL)) {
345 return false;
346 }
347 mantissa *= 10;
348 }
349 mantissa += ch - '0';
350 mantissa_tzeros = 0;
351 }
352 return true;
353}
354
355bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out) {
356 int64_t mantissa = 0;
357 int64_t exponent = 0;
358 int mantissa_tzeros = 0;
359 bool mantissa_sign = false;
360 bool exponent_sign = false;
361 int ptr = 0;
362 int end = val.size();
363 int point_ofs = 0;
364
365 if (ptr < end && val[ptr] == '-') {
366 mantissa_sign = true;
367 ++ptr;
368 }
369 if (ptr < end) {
370 if (val[ptr] == '0') {
371 // pass single 0
372 ++ptr;
373 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
374 while (ptr < end && IsDigit(val[ptr])) {
375 if (!ProcessMantissaDigit(val[ptr], mantissa,
376 mantissa_tzeros)) {
377 // overflow
378 return false;
379 }
380 ++ptr;
381 }
382 } else {
383 // missing expected digit
384 return false;
385 }
386 } else {
387 // empty string or loose '-'
388 return false;
389 }
390 if (ptr < end && val[ptr] == '.') {
391 ++ptr;
392 if (ptr < end && IsDigit(val[ptr])) {
393 while (ptr < end && IsDigit(val[ptr])) {
394 if (!ProcessMantissaDigit(val[ptr], mantissa,
395 mantissa_tzeros)) {
396 // overflow
397 return false;
398 }
399 ++ptr;
400 ++point_ofs;
401 }
402 } else {
403 // missing expected digit
404 return false;
405 }
406 }
407 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E')) {
408 ++ptr;
409 if (ptr < end && val[ptr] == '+') {
410 ++ptr;
411 } else if (ptr < end && val[ptr] == '-') {
412 exponent_sign = true;
413 ++ptr;
414 }
415 if (ptr < end && IsDigit(val[ptr])) {
416 while (ptr < end && IsDigit(val[ptr])) {
417 if (exponent > (UPPER_BOUND / 10LL)) {
418 // overflow
419 return false;
420 }
421 exponent = exponent * 10 + val[ptr] - '0';
422 ++ptr;
423 }
424 } else {
425 // missing expected digit
426 return false;
427 }
428 }
429 if (ptr != end) {
430 // trailing garbage
431 return false;
432 }
433 // finalize exponent
434 if (exponent_sign) {
435 exponent = -exponent;
436 }
437 exponent = exponent - point_ofs + mantissa_tzeros;
438
439 // finalize mantissa
440 if (mantissa_sign) {
441 mantissa = -mantissa;
442 }
443
444 // convert to one 64-bit fixed-point value
445 exponent += decimals;
446 if (exponent < 0) {
447 // cannot represent values smaller than 10^-decimals
448 return false;
449 }
450 if (exponent >= 18) {
451 // cannot represent values larger than or equal to 10^(18-decimals)
452 return false;
453 }
454
455 for (int i = 0; i < exponent; ++i) {
456 if (mantissa > (UPPER_BOUND / 10LL) ||
457 mantissa < -(UPPER_BOUND / 10LL)) {
458 // overflow
459 return false;
460 }
461 mantissa *= 10;
462 }
463 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND) {
464 // overflow
465 return false;
466 }
467
468 if (amount_out) {
469 *amount_out = mantissa;
470 }
471
472 return true;
473}
474
475std::string ToLower(std::string_view str) {
476 std::string r;
477 for (auto ch : str) {
478 r += ToLower(ch);
479 }
480 return r;
481}
482
483std::string ToUpper(std::string_view str) {
484 std::string r;
485 for (auto ch : str) {
486 r += ToUpper(ch);
487 }
488 return r;
489}
490
491std::string Capitalize(std::string str) {
492 if (str.empty()) {
493 return str;
494 }
495 str[0] = ToUpper(str.front());
496 return str;
497}
constexpr std::size_t size() const noexcept
Definition: span.h:210
constexpr C * begin() const noexcept
Definition: span.h:200
constexpr C * end() const noexcept
Definition: span.h:201
signed char HexDigit(char c)
Definition: hex_base.cpp:64
constexpr auto MakeUCharSpan(V &&v) -> decltype(UCharSpanCast(Span{std::forward< V >(v)}))
Like the Span constructor, but for (const) uint8_t member types only.
Definition: span.h:350
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
Definition: strencodings.h:133
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
Definition: strencodings.h:149
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
static const std::string SAFE_CHARS[]
bool IsHexNumber(std::string_view str)
Return true if the string is a hex number, optionally prefixed with "0x".
bool ParseInt32(std::string_view str, int32_t *out)
Convert string to signed 32-bit integer with strict parse error feedback.
std::string EncodeBase64(Span< const uint8_t > input)
bool ParseUInt16(std::string_view str, uint16_t *out)
Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.
template std::vector< std::byte > ParseHex(std::string_view)
std::string EncodeBase32(Span< const uint8_t > input, bool pad)
Base32 encode.
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
bool ParseInt64(std::string_view str, int64_t *out)
Convert string to signed 64-bit integer with strict parse error feedback.
bool ParseUInt8(std::string_view str, uint8_t *out)
Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
bool ParseUInt64(std::string_view str, uint64_t *out)
Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
bool IsHex(std::string_view str)
Returns true if each character in str is a hex character, and has an even number of hex digits.
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::optional< std::vector< uint8_t > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool ParseUInt32(std::string_view str, uint32_t *out)
Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
std::optional< std::vector< uint8_t > > DecodeBase32(std::string_view str)
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
static const std::string CHARS_ALPHA_NUM