Update gtest-printers.cc

Distill the conditions of the if-statements of function "bool IsValidUTF8()" in separate functions, e.g. “bool Is2Byte()”
This commit is contained in:
desosa-9 2020-03-29 16:35:42 +02:00 committed by GitHub
parent 67cc66080d
commit be4b693607
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -375,6 +375,29 @@ bool ContainsUnprintableControlCodes(const char* str, size_t length) {
bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; } bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; }
bool Is2ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
return lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i]);
}
bool Is3ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
return 0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
IsUTF8TrailByte(s[i]) &&
IsUTF8TrailByte(s[i + 1]) &&
// check for non-shortest form and surrogate
(lead != 0xe0 || s[i] >= 0xa0) &&
(lead != 0xed || s[i] < 0xa0);
}
bool Is4ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
return 0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
IsUTF8TrailByte(s[i]) &&
IsUTF8TrailByte(s[i + 1]) &&
IsUTF8TrailByte(s[i + 2]) &&
// check for non-shortest form
(lead != 0xf0 || s[i] >= 0x90) &&
(lead != 0xf4 || s[i] < 0x90);
}
bool IsValidUTF8(const char* str, size_t length) { bool IsValidUTF8(const char* str, size_t length) {
const unsigned char *s = reinterpret_cast<const unsigned char *>(str); const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
@ -386,23 +409,12 @@ bool IsValidUTF8(const char* str, size_t length) {
} }
if (lead < 0xc2) { if (lead < 0xc2) {
return false; // trail byte or non-shortest form return false; // trail byte or non-shortest form
} else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) { } else if (Is2ByteChar(lead, i, length, s)) {
++i; // 2-byte character ++i;
} else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length && } else if (Is3ByteChar(lead, i, length, s)) {
IsUTF8TrailByte(s[i]) && i += 2;
IsUTF8TrailByte(s[i + 1]) && } else if (Is4ByteChar(lead, i, length, s)) {
// check for non-shortest form and surrogate i += 3;
(lead != 0xe0 || s[i] >= 0xa0) &&
(lead != 0xed || s[i] < 0xa0)) {
i += 2; // 3-byte character
} else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
IsUTF8TrailByte(s[i]) &&
IsUTF8TrailByte(s[i + 1]) &&
IsUTF8TrailByte(s[i + 2]) &&
// check for non-shortest form
(lead != 0xf0 || s[i] >= 0x90) &&
(lead != 0xf4 || s[i] < 0x90)) {
i += 3; // 4-byte character
} else { } else {
return false; return false;
} }