From be4b69360706159d0ecb4f91f3aa08d1e14a23de Mon Sep 17 00:00:00 2001 From: desosa-9 <62608283+desosa-9@users.noreply.github.com> Date: Sun, 29 Mar 2020 16:35:42 +0200 Subject: [PATCH] Update gtest-printers.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distill the conditions of the if-statements of function "bool IsValidUTF8()" in separate functions, e.g. “bool Is2Byte()” --- googletest/src/gtest-printers.cc | 46 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/googletest/src/gtest-printers.cc b/googletest/src/gtest-printers.cc index 4e1ccad8..8fc7a491 100644 --- a/googletest/src/gtest-printers.cc +++ b/googletest/src/gtest-printers.cc @@ -375,6 +375,29 @@ bool ContainsUnprintableControlCodes(const char* str, size_t length) { bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; } +bool Is2ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) { + return lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i]); +} + +bool Is3ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) { + return 0xe0 <= lead && lead <= 0xef && (i + 2) <= length && + IsUTF8TrailByte(s[i]) && + IsUTF8TrailByte(s[i + 1]) && + // check for non-shortest form and surrogate + (lead != 0xe0 || s[i] >= 0xa0) && + (lead != 0xed || s[i] < 0xa0); +} + +bool Is4ByteChar(const unsigned char lead, const size_t i, const size_t length, const unsigned char *s) { + return 0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length && + IsUTF8TrailByte(s[i]) && + IsUTF8TrailByte(s[i + 1]) && + IsUTF8TrailByte(s[i + 2]) && + // check for non-shortest form + (lead != 0xf0 || s[i] >= 0x90) && + (lead != 0xf4 || s[i] < 0x90); +} + bool IsValidUTF8(const char* str, size_t length) { const unsigned char *s = reinterpret_cast(str); @@ -386,23 +409,12 @@ bool IsValidUTF8(const char* str, size_t length) { } if (lead < 0xc2) { return false; // trail byte or non-shortest form - } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) { - ++i; // 2-byte character - } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length && - IsUTF8TrailByte(s[i]) && - IsUTF8TrailByte(s[i + 1]) && - // check for non-shortest form and surrogate - (lead != 0xe0 || s[i] >= 0xa0) && - (lead != 0xed || s[i] < 0xa0)) { - i += 2; // 3-byte character - } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length && - IsUTF8TrailByte(s[i]) && - IsUTF8TrailByte(s[i + 1]) && - IsUTF8TrailByte(s[i + 2]) && - // check for non-shortest form - (lead != 0xf0 || s[i] >= 0x90) && - (lead != 0xf4 || s[i] < 0x90)) { - i += 3; // 4-byte character + } else if (Is2ByteChar(lead, i, length, s)) { + ++i; + } else if (Is3ByteChar(lead, i, length, s)) { + i += 2; + } else if (Is4ByteChar(lead, i, length, s)) { + i += 3; } else { return false; }