From fb8f4944c8eb37d3b1149d665cd6117404085a94 Mon Sep 17 00:00:00 2001 From: Matej Focko Date: Wed, 14 Sep 2022 10:31:37 +0200 Subject: [PATCH] =?UTF-8?q?problems(cpp):=20add=20=E2=80=9E393.=20UTF-8=20?= =?UTF-8?q?Validation=E2=80=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matej Focko --- problems/utf-8-validation.cpp | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 problems/utf-8-validation.cpp diff --git a/problems/utf-8-validation.cpp b/problems/utf-8-validation.cpp new file mode 100644 index 0000000..6097c91 --- /dev/null +++ b/problems/utf-8-validation.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace { +class todo : public std::exception { + std::string cause; + +public: + todo() + : cause("Not yet implemented!") + { + } + + todo(std::string&& excuse) + : cause("Not yet implemented: " + excuse) + { + } + + virtual const char* what() const throw() + { + return cause.c_str(); + } +}; + +auto expected_size(int first) -> std::optional { + static constexpr std::array HEADERS = { + 0, 6, 14, 30 + }; + + for (auto i = 0; i < 4; i++) { + auto mask_length = 1 + i + (i > 0); + auto mask = (1 << (mask_length)) - 1; + + if (((first >> (8 - mask_length)) & mask) == HEADERS[i]) { + return {i + 1}; + } + } + + return {}; +} + +} + +class Solution { + static constexpr int CONTINUATION_BYTE = 2; + +public: + auto validUtf8(const std::vector& data) -> bool + { + for (auto i = 0; i < data.size();) { + auto expected_length = expected_size(data[i]); + if (!expected_length.has_value()) { + // std::cout << "corrupted first byte\n"; + return false; + } + + if (i + *expected_length > data.size()) { + // std::cout << "unexpected length of size " << data.size() << " ≠ " << *expected_length << "\n"; + return false; + } + + // check first byte + if (data[i] >= (1 << 8)) { + // std::cout << "incorrect leading byte" << "\n"; + return false; + } + + // check remaining bytes + for (auto j = 1; j < expected_length; j++) { + if ((data[i + j] >> 6) != CONTINUATION_BYTE) { + // std::cout << "invalid continuation byte" << "\n"; + return false; + } + } + + i += *expected_length; + } + + return true; + } +}; + +#pragma region tests + +#include + +TEST(examples, valid) +{ + Solution s; + ASSERT_TRUE(s.validUtf8(std::vector { 197, 130, 1 })); +} + +TEST(examples, invalid) +{ + Solution s; + ASSERT_FALSE(s.validUtf8(std::vector { 235, 140, 4 })); +} + +TEST(valid, ascii_byte) +{ + Solution s; + ASSERT_TRUE(s.validUtf8(std::vector { 64 })); +} + +TEST(invalid, just_one_byte) { + Solution s; + ASSERT_FALSE(s.validUtf8(std::vector {2 << 7})); +} + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +#pragma endregion /* tests */