Lukhnos: Optimized KeyValueBlobReader.

This commit is contained in:
ShikiSuen 2022-01-23 23:15:29 +08:00
parent a713cb324c
commit 8b9e5ca57e
2 changed files with 144 additions and 130 deletions

View File

@ -10,116 +10,118 @@
namespace vChewing {
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out) {
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); };
static auto content_char = [](char c) {
return !blank(c) && !new_line(c);
};
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
{
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
static auto blank_or_newline
= [](char c) { return blank(c) || new_line(c); };
static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
if (state_ == State::ERROR) {
if (state_ == State::ERROR) {
return state_;
}
const char* key_begin = nullptr;
size_t key_length = 0;
const char* value_begin = nullptr;
size_t value_length = 0;
while (true) {
state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
// Check if it's a comment line; if so, read until end of line.
if (*current_ != '#') {
break;
}
state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
}
// No need to check whether* current_ is a content_char, since content_char
// is defined as not blank and not new_line.
key_begin = current_;
state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
key_length = current_ - key_begin;
// There should be at least one blank character after the key string.
if (!blank(*current_)) {
goto error;
}
state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
if (!content_char(*current_)) {
goto error;
}
value_begin = current_;
// value must only contain content characters, blanks not are allowed.
// also, there's no need to check the state after this, since we will always
// emit the value. This also avoids the situation where trailing spaces in a
// line would become part of the value.
SkipUntilNot(content_char);
value_length = current_ - value_begin;
// Unconditionally skip until the end of the line. This prevents the case
// like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line);
if (out != nullptr) {
*out = KeyValue { std::string_view { key_begin, key_length },
std::string_view { value_begin, value_length } };
}
state_ = State::HAS_PAIR;
return state_;
}
const char* key_begin = nullptr;
size_t key_length = 0;
const char* value_begin = nullptr;
size_t value_length = 0;
while (true) {
state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
// Check if it's a comment line; if so, read until end of line.
if (*current_ != '#') {
break;
}
state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
}
// No need to check whether* current_ is a content_char, since content_char
// is defined as not blank and not new_line.
key_begin = current_;
state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
key_length = current_ - key_begin;
// There should be at least one blank character after the key string.
if (!blank(*current_)) {
goto error;
}
state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
if (!content_char(*current_)) {
goto error;
}
value_begin = current_;
// value must only contain content characters, blanks not are allowed.
// also, there's no need to check the state after this, since we will always
// emit the value. This also avoids the situation where trailing spaces in a
// line would become part of the value.
SkipUntilNot(content_char);
value_length = current_ - value_begin;
// Unconditionally skip until the end of the line. This prevents the case
// like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line);
if (out != nullptr) {
*out = KeyValue{
std::string_view{key_begin, key_length},
std::string_view{value_begin, value_length}};
}
state_ = State::HAS_PAIR;
return state_;
error:
state_ = State::ERROR;
return State::ERROR;
state_ = State::ERROR;
return state_;
}
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
const std::function<bool(char)>& f) {
while (current_ != end_ &&* current_) {
if (!f(*current_)) {
return State::CAN_CONTINUE;
const std::function<bool(char)>& f)
{
while (current_ != end_ && *current_) {
if (!f(*current_)) {
return State::CAN_CONTINUE;
}
++current_;
}
++current_;
}
return State::END;
return State::END;
}
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
const std::function<bool(char)>& f) {
while (current_ != end_ &&* current_) {
if (f(*current_)) {
return State::CAN_CONTINUE;
const std::function<bool(char)>& f)
{
while (current_ != end_ && *current_) {
if (f(*current_)) {
return State::CAN_CONTINUE;
}
++current_;
}
++current_;
}
return State::END;
return State::END;
}
std::ostream& operator<<(std::ostream& os,
const KeyValueBlobReader::KeyValue& kv) {
os << "(key: " << kv.key << ", value: " << kv.value << ")";
return os;
std::ostream& operator<<(
std::ostream& os, const KeyValueBlobReader::KeyValue& kv)
{
os << "(key: " << kv.key << ", value: " << kv.value << ")";
return os;
}
} // namespace vChewing
} // namespace vChewing

View File

@ -31,49 +31,61 @@
namespace vChewing {
class KeyValueBlobReader {
public:
enum class State : int {
// There are no more key-value pairs in this blob.
END = 0,
// The reader has produced a new key-value pair.
HAS_PAIR = 1,
// An error is encountered and the parsing stopped.
ERROR = -1,
// Internal-only state: the parser can continue parsing.
CAN_CONTINUE = 2
};
public:
enum class State : int {
// There are no more key-value pairs in this blob.
END = 0,
// The reader has produced a new key-value pair.
HAS_PAIR = 1,
// An error is encountered and the parsing stopped.
ERROR = -1,
// Internal-only state: the parser can continue parsing.
CAN_CONTINUE = 2
};
struct KeyValue {
constexpr KeyValue() : key(""), value("") {}
constexpr KeyValue(std::string_view k, std::string_view v)
: key(k), value(v) {}
struct KeyValue {
constexpr KeyValue()
: key("")
, value("")
{
}
constexpr KeyValue(std::string_view k, std::string_view v)
: key(k)
, value(v)
{
}
bool operator==(const KeyValue& another) const {
return key == another.key && value == another.value;
bool operator==(const KeyValue& another) const
{
return key == another.key && value == another.value;
}
std::string_view key;
std::string_view value;
};
KeyValueBlobReader(const char* blob, size_t size)
: current_(blob)
, end_(blob + size)
{
}
std::string_view key;
std::string_view value;
};
// Parse the next key-value pair and return the state of the reader. If
// `out` is passed, out will be set to the produced key-value pair if there
// is one.
State Next(KeyValue* out = nullptr);
KeyValueBlobReader(const char* blob, size_t size)
: current_(blob), end_(blob + size) {}
private:
State SkipUntil(const std::function<bool(char)>& f);
State SkipUntilNot(const std::function<bool(char)>& f);
// Parse the next key-value pair and return the state of the reader. If `out`
// is passed, out will be set to the produced key-value pair if there is one.
State Next(KeyValue* out = nullptr);
private:
State SkipUntil(const std::function<bool(char)>& f);
State SkipUntilNot(const std::function<bool(char)>& f);
const char* current_;
const char* end_;
State state_ = State::CAN_CONTINUE;
const char* current_;
const char* end_;
State state_ = State::CAN_CONTINUE;
};
std::ostream& operator<<(std::ostream&, const KeyValueBlobReader::KeyValue&);
} // namespace vChewing
} // namespace vChewing
#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_