Update gtest-printers.cc
Distill the conditions of the if-statements of function "bool IsValidUTF8()" in separate functions, e.g. “bool Is2Byte()”
This commit is contained in:
parent
67cc66080d
commit
be4b693607
|
@ -375,6 +375,29 @@ bool ContainsUnprintableControlCodes(const char* str, size_t length) {
|
||||||
|
|
||||||
bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; }
|
bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; }
|
||||||
|
|
||||||
|
bool Is2ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
|
||||||
|
return lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Is3ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
|
||||||
|
return 0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
|
||||||
|
IsUTF8TrailByte(s[i]) &&
|
||||||
|
IsUTF8TrailByte(s[i + 1]) &&
|
||||||
|
// check for non-shortest form and surrogate
|
||||||
|
(lead != 0xe0 || s[i] >= 0xa0) &&
|
||||||
|
(lead != 0xed || s[i] < 0xa0);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Is4ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
|
||||||
|
return 0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
|
||||||
|
IsUTF8TrailByte(s[i]) &&
|
||||||
|
IsUTF8TrailByte(s[i + 1]) &&
|
||||||
|
IsUTF8TrailByte(s[i + 2]) &&
|
||||||
|
// check for non-shortest form
|
||||||
|
(lead != 0xf0 || s[i] >= 0x90) &&
|
||||||
|
(lead != 0xf4 || s[i] < 0x90);
|
||||||
|
}
|
||||||
|
|
||||||
bool IsValidUTF8(const char* str, size_t length) {
|
bool IsValidUTF8(const char* str, size_t length) {
|
||||||
const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
|
const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
|
||||||
|
|
||||||
|
@ -386,23 +409,12 @@ bool IsValidUTF8(const char* str, size_t length) {
|
||||||
}
|
}
|
||||||
if (lead < 0xc2) {
|
if (lead < 0xc2) {
|
||||||
return false; // trail byte or non-shortest form
|
return false; // trail byte or non-shortest form
|
||||||
} else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) {
|
} else if (Is2ByteChar(lead, i, length, s)) {
|
||||||
++i; // 2-byte character
|
++i;
|
||||||
} else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
|
} else if (Is3ByteChar(lead, i, length, s)) {
|
||||||
IsUTF8TrailByte(s[i]) &&
|
i += 2;
|
||||||
IsUTF8TrailByte(s[i + 1]) &&
|
} else if (Is4ByteChar(lead, i, length, s)) {
|
||||||
// check for non-shortest form and surrogate
|
i += 3;
|
||||||
(lead != 0xe0 || s[i] >= 0xa0) &&
|
|
||||||
(lead != 0xed || s[i] < 0xa0)) {
|
|
||||||
i += 2; // 3-byte character
|
|
||||||
} else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
|
|
||||||
IsUTF8TrailByte(s[i]) &&
|
|
||||||
IsUTF8TrailByte(s[i + 1]) &&
|
|
||||||
IsUTF8TrailByte(s[i + 2]) &&
|
|
||||||
// check for non-shortest form
|
|
||||||
(lead != 0xf0 || s[i] >= 0x90) &&
|
|
||||||
(lead != 0xf4 || s[i] < 0x90)) {
|
|
||||||
i += 3; // 4-byte character
|
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user