192 lines
6.1 KiB
C++
192 lines
6.1 KiB
C++
#pragma once
|
|
|
|
#include "azure/storage/blobs/blob_options.hpp"
|
|
|
|
#include <azure/core/io/body_stream.hpp>
|
|
|
|
#include <map>
|
|
#include <memory>
|
|
#include <type_traits>
|
|
|
|
namespace Azure {
|
|
namespace Storage {
|
|
namespace Blobs {
|
|
namespace _detail {
|
|
enum class AvroDatumType {
|
|
String,
|
|
Bytes,
|
|
Int,
|
|
Long,
|
|
Float,
|
|
Double,
|
|
Bool,
|
|
Null,
|
|
Record,
|
|
Enum,
|
|
Array,
|
|
Map,
|
|
Union,
|
|
Fixed,
|
|
};
|
|
|
|
class AvroStreamReader final {
|
|
public:
|
|
// position of a vector that lives through vector resizing
|
|
struct ReaderPos final {
|
|
const std::vector<uint8_t>* BufferPtr = nullptr;
|
|
size_t Offset = 0;
|
|
};
|
|
explicit AvroStreamReader(Core::IO::BodyStream& stream) : m_stream(&stream), m_pos{&m_streambuffer, 0} {}
|
|
AvroStreamReader(const AvroStreamReader&) = delete;
|
|
AvroStreamReader& operator=(const AvroStreamReader&) = delete;
|
|
|
|
int64_t ParseInt(const Core::Context& context);
|
|
void Advance(size_t n, const Core::Context& context);
|
|
// Read at least n bytes from m_stream and append data to m_streambuffer. Return number of bytes
|
|
// available in m_streambuffer;
|
|
size_t Preload(size_t n, const Core::Context& context);
|
|
size_t TryPreload(size_t n, const Core::Context& context);
|
|
// discards data that's before m_pos
|
|
void Discard();
|
|
|
|
private:
|
|
size_t AvailableBytes() const { return m_streambuffer.size() - m_pos.Offset; }
|
|
|
|
private:
|
|
Core::IO::BodyStream* m_stream;
|
|
std::vector<uint8_t> m_streambuffer;
|
|
ReaderPos m_pos;
|
|
|
|
friend class AvroDatum;
|
|
};
|
|
|
|
class AvroSchema final {
|
|
public:
|
|
static const AvroSchema StringSchema;
|
|
static const AvroSchema BytesSchema;
|
|
static const AvroSchema IntSchema;
|
|
static const AvroSchema LongSchema;
|
|
static const AvroSchema FloatSchema;
|
|
static const AvroSchema DoubleSchema;
|
|
static const AvroSchema BoolSchema;
|
|
static const AvroSchema NullSchema;
|
|
static AvroSchema RecordSchema(std::string name, const std::vector<std::pair<std::string, AvroSchema>>& fieldsSchema);
|
|
static AvroSchema ArraySchema(AvroSchema elementSchema);
|
|
static AvroSchema MapSchema(AvroSchema elementSchema);
|
|
static AvroSchema UnionSchema(std::vector<AvroSchema> schemas);
|
|
static AvroSchema FixedSchema(std::string name, int64_t size);
|
|
|
|
const std::string& Name() const { return m_name; }
|
|
AvroDatumType Type() const { return m_type; }
|
|
const std::vector<std::string>& FieldNames() const { return m_status->m_keys; }
|
|
AvroSchema ItemSchema() const { return m_status->m_schemas[0]; }
|
|
const std::vector<AvroSchema>& FieldSchemas() const { return m_status->m_schemas; }
|
|
size_t Size() const { return static_cast<size_t>(m_status->m_size); }
|
|
|
|
private:
|
|
explicit AvroSchema(AvroDatumType type) : m_type(type) {}
|
|
|
|
private:
|
|
AvroDatumType m_type;
|
|
std::string m_name;
|
|
|
|
struct SharedStatus {
|
|
std::vector<std::string> m_keys;
|
|
std::vector<AvroSchema> m_schemas;
|
|
int64_t m_size = 0;
|
|
};
|
|
std::shared_ptr<SharedStatus> m_status;
|
|
};
|
|
|
|
class AvroDatum final {
|
|
public:
|
|
AvroDatum() : m_schema(AvroSchema::NullSchema) {}
|
|
explicit AvroDatum(AvroSchema schema) : m_schema(std::move(schema)) {}
|
|
|
|
void Fill(AvroStreamReader& reader, const Core::Context& context);
|
|
void Fill(AvroStreamReader::ReaderPos& data);
|
|
|
|
const AvroSchema& Schema() const { return m_schema; }
|
|
|
|
template <class T>
|
|
T Value() const;
|
|
struct StringView {
|
|
const uint8_t* Data = nullptr;
|
|
size_t Length = 0;
|
|
};
|
|
|
|
private:
|
|
AvroSchema m_schema;
|
|
AvroStreamReader::ReaderPos m_data;
|
|
};
|
|
|
|
using AvroMap = std::map<std::string, AvroDatum>;
|
|
|
|
class AvroRecord final {
|
|
public:
|
|
bool HasField(const std::string& key) const { return FindField(key) != m_keys->size(); }
|
|
const AvroDatum& Field(const std::string& key) const { return m_values.at(FindField(key)); }
|
|
AvroDatum& Field(const std::string& key) { return m_values.at(FindField(key)); }
|
|
const AvroDatum& FieldAt(size_t i) const { return m_values.at(i); }
|
|
AvroDatum& FieldAt(size_t i) { return m_values.at(i); }
|
|
|
|
private:
|
|
size_t FindField(const std::string& key) const {
|
|
auto i = find(m_keys->begin(), m_keys->end(), key);
|
|
return i - m_keys->begin();
|
|
}
|
|
const std::vector<std::string>* m_keys = nullptr;
|
|
std::vector<AvroDatum> m_values;
|
|
|
|
friend class AvroDatum;
|
|
};
|
|
|
|
class AvroObjectContainerReader final {
|
|
public:
|
|
explicit AvroObjectContainerReader(Core::IO::BodyStream& stream);
|
|
|
|
bool End() const { return m_eof; }
|
|
// Calling Next() will invalidates the previous AvroDatum returned by this function and all
|
|
// AvroDatums propagated from there.
|
|
AvroDatum Next(const Core::Context& context) { return NextImpl(m_objectSchema.get(), context); }
|
|
|
|
private:
|
|
AvroDatum NextImpl(const AvroSchema* schema, const Core::Context& context);
|
|
|
|
private:
|
|
std::unique_ptr<AvroStreamReader> m_reader;
|
|
std::unique_ptr<AvroSchema> m_objectSchema;
|
|
std::string m_syncMarker;
|
|
int64_t m_remainingObjectInCurrentBlock = 0;
|
|
bool m_eof = false;
|
|
};
|
|
|
|
class AvroStreamParser final : public Core::IO::BodyStream {
|
|
public:
|
|
explicit AvroStreamParser(std::unique_ptr<Azure::Core::IO::BodyStream> inner,
|
|
std::function<void(int64_t, int64_t)> progressCallback,
|
|
std::function<void(BlobQueryError)> errorCallback)
|
|
: m_inner(std::move(inner)),
|
|
m_parser(*m_inner),
|
|
m_progressCallback(std::move(progressCallback)),
|
|
m_errorCallback(std::move(errorCallback)) {}
|
|
|
|
int64_t Length() const override { return -1; }
|
|
void Rewind() override { this->m_inner->Rewind(); }
|
|
|
|
private:
|
|
size_t OnRead(uint8_t* buffer, size_t count, const Azure::Core::Context& context) override;
|
|
|
|
private:
|
|
std::unique_ptr<Azure::Core::IO::BodyStream> m_inner;
|
|
AvroObjectContainerReader m_parser;
|
|
std::function<void(int64_t, int64_t)> m_progressCallback;
|
|
std::function<void(BlobQueryError)> m_errorCallback;
|
|
AvroDatum::StringView m_parserBuffer;
|
|
};
|
|
|
|
} // namespace _detail
|
|
} // namespace Blobs
|
|
} // namespace Storage
|
|
} // namespace Azure
|