diff --git a/googletest/docs/AdvancedGuide.md b/googletest/docs/AdvancedGuide.md index ccb087c0..e1df20d5 100644 --- a/googletest/docs/AdvancedGuide.md +++ b/googletest/docs/AdvancedGuide.md @@ -1951,6 +1951,17 @@ variable to `0` has the same effect. _Availability:_ Linux, Windows, Mac. (In Google Test 1.3.0 and lower, the default behavior is that the elapsed time is **not** printed.) +**Availability**: Linux, Windows, Mac. + +#### Suppressing UTF-8 Text Output + +In case of assertion failures, gUnit prints expected and actual values of type +`string` both as hex-encoded strings as well as in readable UTF-8 text if they +contain valid non-ASCII UTF-8 characters. If you want to suppress the UTF-8 text +because, for example, you don't have an UTF-8 compatible output medium, run the +test program with `--gunit_print_utf8=0` or set the `GUNIT_PRINT_UTF8` +environment variable to `0`. + ### Generating an XML Report ### Google Test can emit a detailed XML report to a file in addition to its normal diff --git a/googletest/include/gtest/gtest.h b/googletest/include/gtest/gtest.h index 93b755fb..01994e6d 100644 --- a/googletest/include/gtest/gtest.h +++ b/googletest/include/gtest/gtest.h @@ -115,6 +115,9 @@ GTEST_DECLARE_string_(output); // test. GTEST_DECLARE_bool_(print_time); +// This flags control whether Google Test prints UTF8 characters as text. +GTEST_DECLARE_bool_(print_utf8); + // This flag specifies the random number seed. GTEST_DECLARE_int32_(random_seed); diff --git a/googletest/src/gtest-internal-inl.h b/googletest/src/gtest-internal-inl.h index 5ec0af9b..099761ad 100644 --- a/googletest/src/gtest-internal-inl.h +++ b/googletest/src/gtest-internal-inl.h @@ -86,6 +86,7 @@ const char kFilterFlag[] = "filter"; const char kListTestsFlag[] = "list_tests"; const char kOutputFlag[] = "output"; const char kPrintTimeFlag[] = "print_time"; +const char kPrintUTF8Flag[] = "print_utf8"; const char kRandomSeedFlag[] = "random_seed"; const char kRepeatFlag[] = "repeat"; const char kShuffleFlag[] = "shuffle"; @@ -166,6 +167,7 @@ class GTestFlagSaver { list_tests_ = GTEST_FLAG(list_tests); output_ = GTEST_FLAG(output); print_time_ = GTEST_FLAG(print_time); + print_utf8_ = GTEST_FLAG(print_utf8); random_seed_ = GTEST_FLAG(random_seed); repeat_ = GTEST_FLAG(repeat); shuffle_ = GTEST_FLAG(shuffle); @@ -187,6 +189,7 @@ class GTestFlagSaver { GTEST_FLAG(list_tests) = list_tests_; GTEST_FLAG(output) = output_; GTEST_FLAG(print_time) = print_time_; + GTEST_FLAG(print_utf8) = print_utf8_; GTEST_FLAG(random_seed) = random_seed_; GTEST_FLAG(repeat) = repeat_; GTEST_FLAG(shuffle) = shuffle_; @@ -208,6 +211,7 @@ class GTestFlagSaver { bool list_tests_; std::string output_; bool print_time_; + bool print_utf8_; internal::Int32 random_seed_; internal::Int32 repeat_; bool shuffle_; diff --git a/googletest/src/gtest-printers.cc b/googletest/src/gtest-printers.cc index dd67f645..1bdf243d 100644 --- a/googletest/src/gtest-printers.cc +++ b/googletest/src/gtest-printers.cc @@ -43,12 +43,13 @@ // defines Foo. #include "gtest/gtest-printers.h" -#include #include +#include #include #include // NOLINT #include #include "gtest/internal/gtest-port.h" +#include "src/gtest-internal-inl.h" namespace testing { @@ -262,11 +263,12 @@ template GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ -static void PrintCharsAsStringTo( +static CharFormat PrintCharsAsStringTo( const CharType* begin, size_t len, ostream* os) { const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; *os << kQuoteBegin; bool is_previous_hex = false; + CharFormat print_format = kAsIs; for (size_t index = 0; index < len; ++index) { const CharType cur = begin[index]; if (is_previous_hex && IsXDigit(cur)) { @@ -276,8 +278,13 @@ static void PrintCharsAsStringTo( *os << "\" " << kQuoteBegin; } is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; + // Remember if any characters required hex escaping. + if (is_previous_hex) { + print_format = kHexEscape; + } } *os << "\""; + return print_format; } // Prints a (const) char/wchar_t array of 'len' elements, starting at address @@ -347,15 +354,88 @@ void PrintTo(const wchar_t* s, ostream* os) { } #endif // wchar_t is native +namespace { + +bool ContainsUnprintableControlCodes(const char* str, size_t length) { + for (size_t i = 0; i < length; i++) { + char ch = *str++; + if (std::iscntrl(ch)) { + switch (ch) { + case '\t': + case '\n': + case '\r': + break; + default: + return true; + } + } + } + return false; +} + +bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; } + +bool IsValidUTF8(const char* str, size_t length) { + const unsigned char *s = reinterpret_cast(str); + + for (size_t i = 0; i < length;) { + unsigned char lead = s[i++]; + + if (lead <= 0x7f) { + continue; // single-byte character (ASCII) 0..7F + } + if (lead < 0xc2) { + return false; // trail byte or non-shortest form + } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) { + ++i; // 2-byte character + } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length && + IsUTF8TrailByte(s[i]) && + IsUTF8TrailByte(s[i + 1]) && + // check for non-shortest form and surrogate + (lead != 0xe0 || s[i] >= 0xa0) && + (lead != 0xed || s[i] < 0xa0)) { + i += 2; // 3-byte character + } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length && + IsUTF8TrailByte(s[i]) && + IsUTF8TrailByte(s[i + 1]) && + IsUTF8TrailByte(s[i + 2]) && + // check for non-shortest form + (lead != 0xf0 || s[i] >= 0x90) && + (lead != 0xf4 || s[i] < 0x90)) { + i += 3; // 4-byte character + } else { + return false; + } + } + return true; +} + +void ConditionalPrintAsText(const char* str, size_t length, ostream* os) { + if (!ContainsUnprintableControlCodes(str, length) && + IsValidUTF8(str, length)) { + *os << "\n As Text: \"" << str << "\""; + } +} + +} // anonymous namespace + // Prints a ::string object. #if GTEST_HAS_GLOBAL_STRING void PrintStringTo(const ::string& s, ostream* os) { - PrintCharsAsStringTo(s.data(), s.size(), os); + if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) { + if (GTEST_FLAG(print_utf8)) { + ConditionalPrintAsText(s.data(), s.size(), os); + } + } } #endif // GTEST_HAS_GLOBAL_STRING void PrintStringTo(const ::std::string& s, ostream* os) { - PrintCharsAsStringTo(s.data(), s.size(), os); + if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) { + if (GTEST_FLAG(print_utf8)) { + ConditionalPrintAsText(s.data(), s.size(), os); + } + } } // Prints a ::wstring object. diff --git a/googletest/src/gtest.cc b/googletest/src/gtest.cc index 3435f9cb..2c25f838 100644 --- a/googletest/src/gtest.cc +++ b/googletest/src/gtest.cc @@ -246,6 +246,12 @@ GTEST_DEFINE_bool_( "True iff " GTEST_NAME_ " should display elapsed time in text output."); +GTEST_DEFINE_bool_( + print_utf8, + internal::BoolFromGTestEnv("print_utf8", true), + "True iff " GTEST_NAME_ + " prints UTF8 characters as text."); + GTEST_DEFINE_int32_( random_seed, internal::Int32FromGTestEnv("random_seed", 0), @@ -5230,6 +5236,7 @@ static bool ParseGoogleTestFlag(const char* const arg) { ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || + ParseBoolFlag(arg, kPrintUTF8Flag, >EST_FLAG(print_utf8)) || ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || diff --git a/googletest/test/gtest-printers_test.cc b/googletest/test/gtest-printers_test.cc index aff97a2a..a8c82a8a 100644 --- a/googletest/test/gtest-printers_test.cc +++ b/googletest/test/gtest-printers_test.cc @@ -1552,6 +1552,78 @@ TEST(PrintToStringTest, WorksForCharArrayWithEmbeddedNul) { EXPECT_PRINT_TO_STRING_(mutable_str_with_nul, "\"hello\\0 world\""); } + TEST(PrintToStringTest, ContainsNonLatin) { + // Sanity test with valid UTF-8. Prints both in hex and as text. + std::string non_ascii_str = ::std::string("오전 4:30"); + EXPECT_PRINT_TO_STRING_(non_ascii_str, + "\"\\xEC\\x98\\xA4\\xEC\\xA0\\x84 4:30\"\n" + " As Text: \"오전 4:30\""); + non_ascii_str = ::std::string("From ä — ẑ"); + EXPECT_PRINT_TO_STRING_(non_ascii_str, + "\"From \\xC3\\xA4 \\xE2\\x80\\x94 \\xE1\\xBA\\x91\"" + "\n As Text: \"From ä — ẑ\""); +} + +TEST(IsValidUTF8Test, IllFormedUTF8) { + // The following test strings are ill-formed UTF-8 and are printed + // as hex only (or ASCII, in case of ASCII bytes) because IsValidUTF8() is + // expected to fail, thus output does not contain "As Text:". + + static const char *const kTestdata[][2] = { + // 2-byte lead byte followed by a single-byte character. + {"\xC3\x74", "\"\\xC3t\""}, + // Valid 2-byte character followed by an orphan trail byte. + {"\xC3\x84\xA4", "\"\\xC3\\x84\\xA4\""}, + // Lead byte without trail byte. + {"abc\xC3", "\"abc\\xC3\""}, + // 3-byte lead byte, single-byte character, orphan trail byte. + {"x\xE2\x70\x94", "\"x\\xE2p\\x94\""}, + // Truncated 3-byte character. + {"\xE2\x80", "\"\\xE2\\x80\""}, + // Truncated 3-byte character followed by valid 2-byte char. + {"\xE2\x80\xC3\x84", "\"\\xE2\\x80\\xC3\\x84\""}, + // Truncated 3-byte character followed by a single-byte character. + {"\xE2\x80\x7A", "\"\\xE2\\x80z\""}, + // 3-byte lead byte followed by valid 3-byte character. + {"\xE2\xE2\x80\x94", "\"\\xE2\\xE2\\x80\\x94\""}, + // 4-byte lead byte followed by valid 3-byte character. + {"\xF0\xE2\x80\x94", "\"\\xF0\\xE2\\x80\\x94\""}, + // Truncated 4-byte character. + {"\xF0\xE2\x80", "\"\\xF0\\xE2\\x80\""}, + // Invalid UTF-8 byte sequences embedded in other chars. + {"abc\xE2\x80\x94\xC3\x74xyc", "\"abc\\xE2\\x80\\x94\\xC3txyc\""}, + {"abc\xC3\x84\xE2\x80\xC3\x84xyz", + "\"abc\\xC3\\x84\\xE2\\x80\\xC3\\x84xyz\""}, + // Non-shortest UTF-8 byte sequences are also ill-formed. + // The classics: xC0, xC1 lead byte. + {"\xC0\x80", "\"\\xC0\\x80\""}, + {"\xC1\x81", "\"\\xC1\\x81\""}, + // Non-shortest sequences. + {"\xE0\x80\x80", "\"\\xE0\\x80\\x80\""}, + {"\xf0\x80\x80\x80", "\"\\xF0\\x80\\x80\\x80\""}, + // Last valid code point before surrogate range, should be printed as text, + // too. + {"\xED\x9F\xBF", "\"\\xED\\x9F\\xBF\"\n As Text: \"퟿\""}, + // Start of surrogate lead. Surrogates are not printed as text. + {"\xED\xA0\x80", "\"\\xED\\xA0\\x80\""}, + // Last non-private surrogate lead. + {"\xED\xAD\xBF", "\"\\xED\\xAD\\xBF\""}, + // First private-use surrogate lead. + {"\xED\xAE\x80", "\"\\xED\\xAE\\x80\""}, + // Last private-use surrogate lead. + {"\xED\xAF\xBF", "\"\\xED\\xAF\\xBF\""}, + // Mid-point of surrogate trail. + {"\xED\xB3\xBF", "\"\\xED\\xB3\\xBF\""}, + // First valid code point after surrogate range, should be printed as text, + // too. + {"\xEE\x80\x80", "\"\\xEE\\x80\\x80\"\n As Text: \"\""} + }; + + for (int i = 0; i < sizeof(kTestdata)/sizeof(kTestdata[0]); ++i) { + EXPECT_PRINT_TO_STRING_(kTestdata[i][0], kTestdata[i][1]); + } +} + #undef EXPECT_PRINT_TO_STRING_ TEST(UniversalTersePrintTest, WorksForNonReference) {