Update gtest-printers.cc
Distill the conditions of the if-statements of function "bool IsValidUTF8()" in separate functions, e.g. “bool Is2Byte()”
This commit is contained in:
parent
67cc66080d
commit
be4b693607
|
@ -375,6 +375,29 @@ bool ContainsUnprintableControlCodes(const char* str, size_t length) {
|
|||
|
||||
bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; }
|
||||
|
||||
bool Is2ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
|
||||
return lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i]);
|
||||
}
|
||||
|
||||
bool Is3ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
|
||||
return 0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
|
||||
IsUTF8TrailByte(s[i]) &&
|
||||
IsUTF8TrailByte(s[i + 1]) &&
|
||||
// check for non-shortest form and surrogate
|
||||
(lead != 0xe0 || s[i] >= 0xa0) &&
|
||||
(lead != 0xed || s[i] < 0xa0);
|
||||
}
|
||||
|
||||
bool Is4ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) {
|
||||
return 0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
|
||||
IsUTF8TrailByte(s[i]) &&
|
||||
IsUTF8TrailByte(s[i + 1]) &&
|
||||
IsUTF8TrailByte(s[i + 2]) &&
|
||||
// check for non-shortest form
|
||||
(lead != 0xf0 || s[i] >= 0x90) &&
|
||||
(lead != 0xf4 || s[i] < 0x90);
|
||||
}
|
||||
|
||||
bool IsValidUTF8(const char* str, size_t length) {
|
||||
const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
|
||||
|
||||
|
@ -386,23 +409,12 @@ bool IsValidUTF8(const char* str, size_t length) {
|
|||
}
|
||||
if (lead < 0xc2) {
|
||||
return false; // trail byte or non-shortest form
|
||||
} else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) {
|
||||
++i; // 2-byte character
|
||||
} else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
|
||||
IsUTF8TrailByte(s[i]) &&
|
||||
IsUTF8TrailByte(s[i + 1]) &&
|
||||
// check for non-shortest form and surrogate
|
||||
(lead != 0xe0 || s[i] >= 0xa0) &&
|
||||
(lead != 0xed || s[i] < 0xa0)) {
|
||||
i += 2; // 3-byte character
|
||||
} else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
|
||||
IsUTF8TrailByte(s[i]) &&
|
||||
IsUTF8TrailByte(s[i + 1]) &&
|
||||
IsUTF8TrailByte(s[i + 2]) &&
|
||||
// check for non-shortest form
|
||||
(lead != 0xf0 || s[i] >= 0x90) &&
|
||||
(lead != 0xf4 || s[i] < 0x90)) {
|
||||
i += 3; // 4-byte character
|
||||
} else if (Is2ByteChar(lead, i, length, s)) {
|
||||
++i;
|
||||
} else if (Is3ByteChar(lead, i, length, s)) {
|
||||
i += 2;
|
||||
} else if (Is4ByteChar(lead, i, length, s)) {
|
||||
i += 3;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user