LeetCode/cpp/utf-8-validation.cpp

122 lines
2.6 KiB
C++
Raw Normal View History

#include <array>
#include <exception>
#include <iostream>
#include <optional>
#include <sstream>
#include <string>
#include <vector>
namespace {
class todo : public std::exception {
std::string cause;
public:
todo()
: cause("Not yet implemented!")
{
}
todo(std::string&& excuse)
: cause("Not yet implemented: " + excuse)
{
}
virtual const char* what() const throw()
{
return cause.c_str();
}
};
auto expected_size(int first) -> std::optional<std::size_t> {
static constexpr std::array<int, 4> HEADERS = {
0, 6, 14, 30
};
for (auto i = 0; i < 4; i++) {
auto mask_length = 1 + i + (i > 0);
auto mask = (1 << (mask_length)) - 1;
if (((first >> (8 - mask_length)) & mask) == HEADERS[i]) {
return {i + 1};
}
}
return {};
}
}
class Solution {
static constexpr int CONTINUATION_BYTE = 2;
public:
auto validUtf8(const std::vector<int>& data) -> bool
{
for (auto i = 0; i < data.size();) {
auto expected_length = expected_size(data[i]);
if (!expected_length.has_value()) {
// std::cout << "corrupted first byte\n";
return false;
}
if (i + *expected_length > data.size()) {
// std::cout << "unexpected length of size " << data.size() << " ≠ " << *expected_length << "\n";
return false;
}
// check first byte
if (data[i] >= (1 << 8)) {
// std::cout << "incorrect leading byte" << "\n";
return false;
}
// check remaining bytes
for (auto j = 1; j < expected_length; j++) {
if ((data[i + j] >> 6) != CONTINUATION_BYTE) {
// std::cout << "invalid continuation byte" << "\n";
return false;
}
}
i += *expected_length;
}
return true;
}
};
#pragma region tests
#include <gtest/gtest.h>
TEST(examples, valid)
{
Solution s;
ASSERT_TRUE(s.validUtf8(std::vector { 197, 130, 1 }));
}
TEST(examples, invalid)
{
Solution s;
ASSERT_FALSE(s.validUtf8(std::vector { 235, 140, 4 }));
}
TEST(valid, ascii_byte)
{
Solution s;
ASSERT_TRUE(s.validUtf8(std::vector { 64 }));
}
TEST(invalid, just_one_byte) {
Solution s;
ASSERT_FALSE(s.validUtf8(std::vector {2 << 7}));
}
int main(int argc, char** argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#pragma endregion /* tests */